# Setup

## Imports

In [1]:
import numpy as np
import pandas as pd
import math
import pickle
import sys
sys.path.insert(0, '..')

from cosmic_rAI.data_prep import (event_df_from_matrices,
                                  flatten_event_df)
from cosmic_rAI.machine_learning import (split_and_run,
                                         get_labels,
                                         get_flattened_labels)

In [2]:
def get_all_data(list_of_files):
    list_of_dicts = [np.load(filepath).item() for filepath in list_of_files]
    return list_of_dicts

In [5]:
prefixes = ('../data/sim_12360_', '../data/sim_12362_')
files = []
for prefix in prefixes:
    for i in range(20):
        files.append('{0}{1:02d}.npy'.format(prefix,i))

In [6]:
data = event_df_from_matrices(get_all_data(files))

# Different Experiments

## Use only events in a certain azimuth/zenith band with max charge > 6

In [None]:
dir_band_data = flatten_event_df(data[
    data.loc[:, ('dir_MC','zenith')].between(0, .17) & 
    data.loc[:,('dir_MC','azimuth')].between(0,1) & 
    (data['charges'].max(axis=1) > 6)
])

In [None]:
split_and_run(dir_band_data, [x for x in filtered.columns if 'charges' in x], 10000)

## Use only events with max charges > 6

In [31]:
filtered = flatten_event_df(data[
        data['charges'].max(axis=1) > 6
    ].sample(frac=.5))

In [32]:
split_and_run(full_flat, ['core_MC_x', 'core_MC_y', 'dir_MC_azimuth', 'dir_MC_zenith', 'energy_0'] + [x for x in filtered.columns if 'charges' in x], 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_num_worker_replicas': 1, '_task_id': 0, '_model_dir': '/tmp/tmp47w5ck8f', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1273322ef0>, '_is_chief': True, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_task_type': 'worker', '_session_config': None, '_log_step_count_steps': 100, '_master': '', '_tf_random_seed': None, '_service': None}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp47w5ck8f/model.ckpt.
INFO:tensorflow:loss = 1.16419e+07, step = 1
INFO:tensorflow:global_step/sec: 27.1791
INFO:tensorflow:loss = 228705.0, step = 101 (3.681 sec)
INFO:tensorflow:global_step/sec: 31.2833
INFO:tensorflow:loss = 176769.0, step = 201 (3.196 sec)
INFO:tensorflow:global_step/sec: 33.2202
INFO:tensorflow:loss =

INFO:tensorflow:loss = 70.2923, step = 7701 (3.132 sec)
INFO:tensorflow:global_step/sec: 30.6678
INFO:tensorflow:loss = 68.9087, step = 7801 (3.261 sec)
INFO:tensorflow:global_step/sec: 29.4576
INFO:tensorflow:loss = 68.6842, step = 7901 (3.396 sec)
INFO:tensorflow:global_step/sec: 32.0586
INFO:tensorflow:loss = 68.7926, step = 8001 (3.118 sec)
INFO:tensorflow:global_step/sec: 42.1867
INFO:tensorflow:loss = 69.8216, step = 8101 (2.370 sec)
INFO:tensorflow:global_step/sec: 35.112
INFO:tensorflow:loss = 69.2488, step = 8201 (2.848 sec)
INFO:tensorflow:global_step/sec: 27.1328
INFO:tensorflow:loss = 69.1348, step = 8301 (3.687 sec)
INFO:tensorflow:global_step/sec: 14.9765
INFO:tensorflow:loss = 69.3656, step = 8401 (6.677 sec)
INFO:tensorflow:global_step/sec: 33.9118
INFO:tensorflow:loss = 69.1348, step = 8501 (2.949 sec)
INFO:tensorflow:global_step/sec: 34.2948
INFO:tensorflow:loss = 69.0201, step = 8601 (2.915 sec)
INFO:tensorflow:global_step/sec: 27.8962
INFO:tensorflow:loss = 69.8422,

## Use events within a certain energy range 

In [17]:
filtered = flatten_event_df(data[
        (data.loc[:,('energy',0)] < 1000000)
    ])

In [None]:
split_and_run(filtered, [x for x in filtered.columns if 'charges' in x], 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_num_worker_replicas': 1, '_task_id': 0, '_model_dir': '/tmp/tmphfaudm36', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1271ca6358>, '_is_chief': True, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_task_type': 'worker', '_session_config': None, '_log_step_count_steps': 100, '_master': '', '_tf_random_seed': None, '_service': None}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmphfaudm36/model.ckpt.
INFO:tensorflow:loss = 103.03, step = 1
INFO:tensorflow:global_step/sec: 26.6577
INFO:tensorflow:loss = 24.2065, step = 101 (3.753 sec)
INFO:tensorflow:global_step/sec: 30.5293
INFO:tensorflow:loss = 15.8193, step = 201 (3.278 sec)
INFO:tensorflow:global_step/sec: 30.259
INFO:tensorflow:loss = 8.58105

## Use only events within certain energy range, azimuth/zenith band

In [17]:
filtered = flatten_event_df(data[
        (data.loc[:,('energy',0)] < 1000000) &
        data.loc[:, ('dir_MC','zenith')].between(0, .17) &
        data.loc[:,('dir_MC','azimuth')].between(0,1) &
        (data['charges'].max(axis=1) > 6)
    ])

In [18]:
split_and_run(filtered, [x for x in filtered.columns if 'charges' in x], 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_num_worker_replicas': 1, '_task_id': 0, '_model_dir': '/tmp/tmphfaudm36', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1271ca6358>, '_is_chief': True, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_task_type': 'worker', '_session_config': None, '_log_step_count_steps': 100, '_master': '', '_tf_random_seed': None, '_service': None}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmphfaudm36/model.ckpt.
INFO:tensorflow:loss = 103.03, step = 1
INFO:tensorflow:global_step/sec: 26.6577
INFO:tensorflow:loss = 24.2065, step = 101 (3.753 sec)
INFO:tensorflow:global_step/sec: 30.5293
INFO:tensorflow:loss = 15.8193, step = 201 (3.278 sec)
INFO:tensorflow:global_step/sec: 30.259
INFO:tensorflow:loss = 8.58105

INFO:tensorflow:loss = 0.0169283, step = 7601 (3.310 sec)
INFO:tensorflow:global_step/sec: 34.0982
INFO:tensorflow:loss = 0.0171686, step = 7701 (2.932 sec)
INFO:tensorflow:global_step/sec: 27.4345
INFO:tensorflow:loss = 0.0180109, step = 7801 (3.646 sec)
INFO:tensorflow:global_step/sec: 29.1294
INFO:tensorflow:loss = 0.00812574, step = 7901 (3.432 sec)
INFO:tensorflow:global_step/sec: 28.6994
INFO:tensorflow:loss = 0.0133146, step = 8001 (3.484 sec)
INFO:tensorflow:global_step/sec: 29.7616
INFO:tensorflow:loss = 0.012305, step = 8101 (3.361 sec)
INFO:tensorflow:global_step/sec: 29.3911
INFO:tensorflow:loss = 0.0245767, step = 8201 (3.403 sec)
INFO:tensorflow:global_step/sec: 28.9971
INFO:tensorflow:loss = 0.00682523, step = 8301 (3.448 sec)
INFO:tensorflow:global_step/sec: 32.4645
INFO:tensorflow:loss = 0.00576752, step = 8401 (3.080 sec)
INFO:tensorflow:global_step/sec: 30.4686
INFO:tensorflow:loss = 0.00650614, step = 8501 (3.282 sec)
INFO:tensorflow:global_step/sec: 31.2591
INFO:te

(        charges_0161  charges_0162  charges_0163  charges_0164  charges_0261  \
 172910      1.458593           0.0      1.830331           0.0      0.000000   
 12884       0.000000           0.0      0.000000           0.0      0.000000   
 172898      0.000000           0.0      0.000000           0.0      0.000000   
 115367      0.000000           0.0      0.000000           0.0      0.000000   
 2505        0.000000           0.0      0.000000           0.0      0.000000   
 122420      0.000000           0.0      0.000000           0.0      0.000000   
 152579      0.000000           0.0      0.000000           0.0      0.000000   
 44842       0.000000           0.0      0.000000           0.0      0.000000   
 210814      0.000000           0.0      0.000000           0.0      0.000000   
 258792      0.000000           0.0      0.000000           0.0      0.000000   
 210790      0.000000           0.0      0.000000           0.0      0.000000   
 141094      0.000000       

## Use events within certain energy range, azimuth/zenith band, and take an even split between proton and iron

In [7]:
filtered = flatten_event_df(data[
        (data.loc[:,('energy',0)] < 1000000) &
        data.loc[:, ('dir_MC','zenith')].between(0, .17) &
        data.loc[:,('dir_MC','azimuth')].between(0,1) &
        (data['charges'].max(axis=1) > 6)
    ])

In [8]:
filtered_iron = filtered[filtered['composition_0'] == 'Fe56Nucleus']
filtered_proton = filtered[filtered['composition_0'] == 'PPlus']
min_size = min(filtered_iron.shape[0], filtered_proton.shape[0])
filtered = pd.concat((filtered_iron[:min_size], filtered_proton[:min_size]))

In [9]:
filtered.shape

(396, 333)

In [16]:
results = split_and_run(filtered, [x for x in filtered.columns if 'charges' in x], 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_num_worker_replicas': 1, '_task_id': 0, '_model_dir': '/tmp/tmp00_8b1uy', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1270fc5f98>, '_is_chief': True, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_task_type': 'worker', '_session_config': None, '_log_step_count_steps': 100, '_master': '', '_tf_random_seed': None, '_service': None}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp00_8b1uy/model.ckpt.
INFO:tensorflow:loss = 104.899, step = 1
INFO:tensorflow:global_step/sec: 32.0015
INFO:tensorflow:loss = 6.75452, step = 101 (3.128 sec)
INFO:tensorflow:global_step/sec: 37.6156
INFO:tensorflow:loss = 3.53882, step = 201 (2.659 sec)
INFO:tensorflow:global_step/sec: 33.2971
INFO:tensorflow:loss = 0.947

INFO:tensorflow:global_step/sec: 34.0327
INFO:tensorflow:loss = 0.00202441, step = 7601 (2.938 sec)
INFO:tensorflow:global_step/sec: 37.6921
INFO:tensorflow:loss = 0.00238525, step = 7701 (2.652 sec)
INFO:tensorflow:global_step/sec: 37.5147
INFO:tensorflow:loss = 0.00387498, step = 7801 (2.665 sec)
INFO:tensorflow:global_step/sec: 37.3351
INFO:tensorflow:loss = 0.00365333, step = 7901 (2.680 sec)
INFO:tensorflow:global_step/sec: 37.1694
INFO:tensorflow:loss = 0.00366067, step = 8001 (2.689 sec)
INFO:tensorflow:global_step/sec: 35.8804
INFO:tensorflow:loss = 0.00233, step = 8101 (2.788 sec)
INFO:tensorflow:global_step/sec: 36.8602
INFO:tensorflow:loss = 0.00256682, step = 8201 (2.712 sec)
INFO:tensorflow:global_step/sec: 31.8619
INFO:tensorflow:loss = 0.00359726, step = 8301 (3.140 sec)
INFO:tensorflow:global_step/sec: 40.9897
INFO:tensorflow:loss = 0.00282887, step = 8401 (2.439 sec)
INFO:tensorflow:global_step/sec: 38.1257
INFO:tensorflow:loss = 0.00170409, step = 8501 (2.623 sec)
INF

## Give all the metadata (energy, core, direction)

Take a sample so we don't break tensorflow.

In [30]:
full_flat = flatten_event_df(data).sample(frac=.5)

In [29]:
split_and_run(full_flat, ['core_MC_x', 'core_MC_y', 'dir_MC_azimuth', 'dir_MC_zenith', 'energy_0'] + [x for x in filtered.columns if 'charges' in x], 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_num_worker_replicas': 1, '_task_id': 0, '_model_dir': '/tmp/tmpj0xvc5em', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1271a54d30>, '_is_chief': True, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_task_type': 'worker', '_session_config': None, '_log_step_count_steps': 100, '_master': '', '_tf_random_seed': None, '_service': None}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpj0xvc5em/model.ckpt.
INFO:tensorflow:loss = 8.63516e+07, step = 1
INFO:tensorflow:global_step/sec: 23.2362
INFO:tensorflow:loss = 212813.0, step = 101 (4.308 sec)
INFO:tensorflow:global_step/sec: 29.2013
INFO:tensorflow:loss = 423686.0, step = 201 (3.422 sec)
INFO:tensorflow:global_step/sec: 31.0057
INFO:tensorflow:loss =

INFO:tensorflow:loss = 69.5203, step = 7701 (3.224 sec)
INFO:tensorflow:global_step/sec: 27.1727
INFO:tensorflow:loss = 69.1352, step = 7801 (3.679 sec)
INFO:tensorflow:global_step/sec: 26.9698
INFO:tensorflow:loss = 68.7872, step = 7901 (3.709 sec)
INFO:tensorflow:global_step/sec: 26.6814
INFO:tensorflow:loss = 68.8302, step = 8001 (3.747 sec)
INFO:tensorflow:global_step/sec: 40.0316
INFO:tensorflow:loss = 70.3417, step = 8101 (2.497 sec)
INFO:tensorflow:global_step/sec: 38.6315
INFO:tensorflow:loss = 68.6431, step = 8201 (2.589 sec)
INFO:tensorflow:global_step/sec: 24.4879
INFO:tensorflow:loss = 68.8097, step = 8301 (4.085 sec)
INFO:tensorflow:global_step/sec: 14.7464
INFO:tensorflow:loss = 68.7516, step = 8401 (6.781 sec)
INFO:tensorflow:global_step/sec: 32.9731
INFO:tensorflow:loss = 66.6907, step = 8501 (3.031 sec)
INFO:tensorflow:global_step/sec: 30.0713
INFO:tensorflow:loss = 69.8698, step = 8601 (3.326 sec)
INFO:tensorflow:global_step/sec: 32.7968
INFO:tensorflow:loss = 69.2358