In [1]:
%load_ext autoreload
%matplotlib inline
%autoreload 2

In [2]:
import sys
import os
import h5py
import numpy as np
import matplotlib.pyplot as plt

In [3]:
if '../..' not in sys.path:
    sys.path.append('../..')

from short_tank_EDA.exploratory_data_analysis_utils import *

In [4]:
# Import test events from h5 file
data_path = "/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_emgp0_E0to1000MeV_digihits.h5"
data_file = h5py.File(data_path, "r")

print(data_file.keys())

angles     = np.array(data_file['angles'])
energies   = np.array(data_file['energies'])
positions  = np.array(data_file['positions'])
labels     = np.array(data_file['labels'])
root_files = np.array(data_file['root_files'])

<KeysViewHDF5 ['angles', 'energies', 'event_hits_index', 'event_ids', 'hit_charge', 'hit_pmt', 'hit_time', 'labels', 'positions', 'root_files', 'veto', 'veto2']>


In [7]:
idxs_path = '/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_4_class_3M_emgp0_idxs.npz'
idxs = np.load(idxs_path, allow_pickle=True)

train_idxs = idxs['train_idxs']
val_idxs   = idxs['val_idxs']
test_idxs  = idxs['test_idxs']

In [8]:
train_angles    = angles[train_idxs]
train_energies  = energies[train_idxs]
train_positions = positions[train_idxs]
train_labels    = labels[train_idxs]

train_info = {'name':'Training Set','energies':train_energies,'positions':train_positions, 'angles':train_angles, 'labels':train_labels}

print("Done train info")

val_angles     = angles[val_idxs]
val_energies   = energies[val_idxs]
val_positions  = positions[val_idxs]
val_labels     = labels[val_idxs]

val_info = {'name':'Validation Set','energies':val_energies,'positions':val_positions, 'angles':val_angles, 'labels':val_labels}

print("Done val info")

test_angles    = angles[test_idxs]
test_energies  = energies[test_idxs]
test_positions = positions[test_idxs]
test_labels    = labels[test_idxs]
test_info = {'name':'Test Set','energies':test_energies,'positions':test_positions, 'angles':test_angles, 'labels':test_labels}

print("Done test info")

Done train info
Done val info
Done test info


In [9]:
print(np.min(np.where(test_labels == 0)[0]))
print(np.min(np.where(test_labels == 1)[0]))
print(np.min(np.where(test_labels == 2)[0]))
print(np.min(np.where(test_labels == 3)[0]))

2342105
0
1177966
3524482


In [8]:
dataset_info = [train_info, val_info, test_info]

In [9]:
# Compute d_wall for all events

pbar = ProgressBar(widgets=['Calculating Distance to Wall. Progress: ', Percentage(), ' ', Bar(marker='0',left='[',right=']'),
           ' ', ETA()], maxval=reduce(lambda x,y:x+y , list(map(lambda x : x['positions'].shape[0], dataset_info))))

pbar.start()
done=0
for dset in dataset_info:
    d_wall = []
    for i in range(dset['positions'].shape[0]):
        pbar.update(done + i)
        d_wall.append(compute_d_wall(dset['positions'][i][0]))
        dset['d_wall'] = d_wall
    done = done + dset['positions'].shape[0]
pbar.finish()

np.savez('./prep_data/3M_d_wall.npz',   train_d_wall= dataset_info[0]['d_wall'],
                                                 val_d_wall  = dataset_info[1]['d_wall'],
                                                 test_d_wall = dataset_info[2]['d_wall'])

Calculating Distance to Wall. Progress: 100% [000000000000000000] Time: 0:02:35


In [10]:
sample_d_wall = np.load('./prep_data/3M_d_wall.npz', allow_pickle=True)['test_d_wall']
print(sample_d_wall[0:10])

[ 64.60852051  67.41827541  60.46169831 176.36711884 187.08835403
  43.50449073 228.2210083   26.492864   169.46258444 134.78059469]


In [11]:
sample_d_wall = np.load('./prep_data/3M_d_wall.npz', allow_pickle=True)['test_d_wall']
print(sample_d_wall[0:10])

[ 64.60852051  67.41827541  60.46169831 176.36711884 187.08835403
  43.50449073 228.2210083   26.492864   169.46258444 134.78059469]


In [12]:

# Compute d_to_wall for all events

pbar = ProgressBar(widgets=['Calculating Distance to Wall. Progress: ', Percentage(), ' ', Bar(marker='0',left='[',right=']'),
           ' ', ETA()], maxval=reduce(lambda x,y:x+y , list(map(lambda x : x['positions'].shape[0], dataset_info))))

pbar.start()
done=0
for dset in dataset_info:
    d_to_wall = []
    for i in range(dset['positions'].shape[0]):
        pbar.update(done + i)
        d_to_wall.append(compute_to_wall(dset['positions'][i][0], dset['angles'][i]))
        dset['d_to_wall'] = d_to_wall
    done = done + dset['positions'].shape[0]
pbar.finish()

np.savez('./prep_data/3M_d_to_wall.npz',   train_d_to_wall= dataset_info[0]['d_to_wall'],
                                                    val_d_to_wall  = dataset_info[1]['d_to_wall'],
                                                    test_d_to_wall = dataset_info[2]['d_to_wall'])


Calculating Distance to Wall. Progress: 100% [000000000000000000] Time: 0:05:04
