In [1]:
import uproot
import numpy as np
import sys
import h5py
import pickle

In [2]:
if '../WatChMaL' not in sys.path:
    sys.path.append('../WatChMaL')

from watchmal.dataset.DigiTruthMapping import DigiTruthMapping
from watchmal.dataset.h5_dataset import H5TrueDataset
from watchmal.dataset.cnn_mpmt.cnn_mpmt_dataset import CNNmPMTDataset

In [3]:
## Load Dataset

In [5]:
# Import test events from h5 file
data_path = "/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_digihits.h5"
data_file = h5py.File(data_path, "r")

print(data_file.keys())

angles     = np.array(data_file['angles'])
energies   = np.array(data_file['energies'])
positions  = np.array(data_file['positions'])
labels     = np.array(data_file['labels'])
root_files = np.array(data_file['root_files'])
event_ids  = np.array(data_file['event_ids'])
vetos      = np.array(data_file['veto'])

<KeysViewHDF5 ['angles', 'energies', 'event_hits_index', 'event_ids', 'hit_charge', 'hit_pmt', 'hit_time', 'labels', 'positions', 'root_files', 'veto', 'veto2']>


In [6]:
idxs_path = '/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_3M_idxs.npz'
idxs = np.load(idxs_path, allow_pickle=True)

train_idxs = idxs['train_idxs']
val_idxs   = idxs['val_idxs']
test_idxs  = idxs['test_idxs']

In [7]:
print(vetos.shape)

(20613195,)


In [8]:
print(train_idxs.shape)
print(test_idxs.shape)

(4406273,)
(3524482,)


In [9]:
## Load vetos

In [10]:
test_vetos  = vetos[test_idxs]
test_labels = labels[test_idxs]

e_OD_veto      = (test_labels == 1) & (test_vetos)
mu_OD_veto     = (test_labels == 2) & (test_vetos)

In [11]:
## Load True Momenta

In [12]:
momenta = np.load('./fq_comparison_data/3M_momenta.npz', allow_pickle=True)

test_true_momenta = momenta['test_momenta']

In [13]:
## Load to_wall

In [14]:
to_wall = np.load('./fq_comparison_data/3M_d_to_wall.npz', allow_pickle=True)

test_to_wall = to_wall['test_d_to_wall'] / 100

In [15]:
## Load d_wall

In [16]:
d_wall = np.load('./fq_comparison_data/3M_d_wall.npz', allow_pickle=True)

test_d_wall = d_wall['test_d_wall'] / 100

In [17]:
## Load Data Mapping

In [18]:
dtm = DigiTruthMapping(dataset='./fq_comparison_data/data_for_truth.pkl', mcset='./fq_comparison_data/truth_for_data.pkl')
print(dtm.get_data_entry(10000))

9825


In [19]:
## Load fitqun Flags

In [20]:
# Retrieve flags
gamma_file_data = uproot.open('/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_gamma_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir.fiTQun.root')['fiTQun;1']
e_file_data     = uproot.open('/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_e-_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir.fiTQun.root')['fiTQun;1']
mu_file_data    = uproot.open('/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_mu-_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir.fiTQun.root')['fiTQun;1']
# file_data.show()

gamma_file_data['fq1rpcflg'].interpretation

AsJagged(AsDtype("('>i4', (7,))"))

In [21]:
truehits_filepath = '/data/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_truehits.h5'
truehits_dataset = H5TrueDataset(h5_path=truehits_filepath)

In [22]:
dtm = DigiTruthMapping(dataset='./fq_comparison_data/data_for_truth.pkl', mcset='./fq_comparison_data/truth_for_data.pkl')
print(dtm.get_data_entry(10000))

9825


In [23]:
print(dtm.get_truth_entry(10000))

10176


In [24]:
# find indices in true dataset for offset to fitqun files
print(truehits_dataset.labels.shape)
gamma_base_idx = np.where(truehits_dataset.labels == 0)[0][0]
e_base_idx     = np.where(truehits_dataset.labels == 1)[0][0]
mu_base_idx    = np.where(truehits_dataset.labels == 2)[0][0]

print(gamma_base_idx)
print(e_base_idx)
print(mu_base_idx)

(21000000,)
3000000
0
6000000


In [25]:
print(np.sort(np.where(truehits_dataset.labels == 2)[0]))

[6000000 6000001 6000002 ... 8999997 8999998 8999999]


In [26]:
digihits_labels = labels

In [27]:
'/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_digihits.h5'
'/data/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_truehits.h5'

'/data/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_truehits.h5'

In [28]:
sample_idx = 590500
print(digihits_labels[sample_idx])
print(truehits_dataset.labels[dtm.get_truth_entry(sample_idx)])

1
1


In [29]:
sample_idx = 5890000
print(sample_idx)
print(dtm.get_truth_entry(sample_idx))

print(dtm.get_truth_entry(sample_idx) - sample_idx)

5890000
5989020
99020


In [30]:
print(set(dtm.data_for_truth.values()))

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [31]:
print(root_files[sample_idx])
print(truehits_dataset.root_files[dtm.get_truth_entry(sample_idx)])

b'/localscratch/prouse.56965017.0/WCSim/gamma/E0to1000MeV/unif-pos-R400-y300cm/4pi-dir/IWCD_mPMT_Short_gamma_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir_3000evts_996.root'
b'/localscratch/prouse.56965017.0/WCSim/gamma/E0to1000MeV/unif-pos-R400-y300cm/4pi-dir/IWCD_mPMT_Short_gamma_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir_3000evts_996.root'


In [32]:
print(labels[5900828])
print(truehits_dataset.labels[5901000])

2
0


In [33]:
print(dtm.get_data_entry( 5901000 ))

5803179


In [34]:
print(truehits_dataset.root_files.shape)

(21000000,)


In [35]:
gamma_flags = gamma_file_data.arrays('fq1rpcflg')['fq1rpcflg']
e_flags     = e_file_data.arrays('fq1rpcflg')['fq1rpcflg']
mu_flags    = mu_file_data.arrays('fq1rpcflg')['fq1rpcflg']

gamma_fq1rpcflg_1 = np.array(gamma_flags[:, 0, 1])
e_fq1rpcflg_1     = np.array(e_flags[:, 0, 1])
mu_fq1rpcflg_1    = np.array(mu_flags[:, 0, 1])

gamma_fq1rpcflg_2 = np.array(gamma_flags[:, 0, 2])
e_fq1rpcflg_2     = np.array(e_flags[:, 0, 2])
mu_fq1rpcflg_2    = np.array(mu_flags[:, 0, 2])

In [66]:
# Construct mapping from test set to fitqun
gamma_fq_indices = []
e_fq_indices     = []
mu_fq_indices    = []
for idx in test_idxs:
    particle_label = labels[idx]
    truehits_index = dtm.get_truth_entry(idx)
    
    if particle_label == 0:
        gamma_fq_indices.append(truehits_index % gamma_base_idx)
    elif particle_label == 1:
        e_fq_indices.append(truehits_index % e_base_idx)
    elif particle_label == 2:
        mu_fq_indices.append(truehits_index % mu_base_idx)
    
gamma_fq_indices = np.array(gamma_fq_indices)
e_fq_indices     = np.array(e_fq_indices)
mu_fq_indices    = np.array(mu_fq_indices)

  if sys.path[0] == '':


In [37]:
# Retrieve flags associated with test set
test_fq1rpcflg_1_arr = []
test_fq1rpcflg_2_arr = []
for idx in test_idxs:
    particle_label = labels[idx]
    truehits_index = dtm.get_truth_entry(idx)
    
    if particle_label == 0:
        test_fq1rpcflg_1_arr.append(gamma_fq1rpcflg_1[truehits_index % gamma_base_idx])
        test_fq1rpcflg_2_arr.append(gamma_fq1rpcflg_2[truehits_index % gamma_base_idx])
    elif particle_label == 1:
        test_fq1rpcflg_1_arr.append(e_fq1rpcflg_1[truehits_index % e_base_idx])
        test_fq1rpcflg_2_arr.append(e_fq1rpcflg_2[truehits_index % e_base_idx])
    elif particle_label == 2:
        test_fq1rpcflg_1_arr.append(mu_fq1rpcflg_1[truehits_index % mu_base_idx])
        test_fq1rpcflg_2_arr.append(mu_fq1rpcflg_1[truehits_index % mu_base_idx])

test_fq1rpcflg_1 = np.array(test_fq1rpcflg_1_arr) != 0
test_fq1rpcflg_2 = np.array(test_fq1rpcflg_2_arr) != 0

  if sys.path[0] == '':
  del sys.path[0]


In [39]:
## Define cuts

In [40]:
to_wall_cut = (test_labels == 1) & (test_to_wall > 0.63*np.log(test_true_momenta) - 2)
d_wall_cut  = test_d_wall  < 0.5

In [41]:
fq_comparison = to_wall_cut | d_wall_cut | mu_OD_veto | test_fq1rpcflg_1 | test_fq1rpcflg_2

In [42]:
print(np.bincount(test_fq1rpcflg_1))
print(np.bincount(test_fq1rpcflg_2))

[3063140  461342]
[3070147  454335]


In [43]:
print(test_idxs.shape)
print('#######################')
print(np.where(to_wall_cut)[0].shape)
print(np.where(d_wall_cut)[0].shape)
print(np.where(mu_OD_veto)[0].shape)
print(np.where(test_fq1rpcflg_1)[0].shape)
print(np.where(test_fq1rpcflg_2)[0].shape)

(3524482,)
#######################
(799726,)
(1236817,)
(513916,)
(461342,)
(454335,)


In [44]:
print(fq_comparison.shape)
print(np.where(fq_comparison)[0].shape)

(3524482,)
(2315569,)


In [45]:
print(np.delete(test_idxs, np.where(fq_comparison)[0], 0).shape)

(1208913,)


In [46]:
fq_comparison_OD_veto = fq_comparison | e_OD_veto

In [47]:
cuts = {
        'fq_comparison'        : np.where(fq_comparison)[0],
        'fq_comparison_OD_veto': np.where(fq_comparison_OD_veto)[0],
        'to_wall_cut'          : np.where(to_wall_cut)[0],
        'd_wall_cut'           : np.where(d_wall_cut)[0],
        'e_OD_veto'            : np.where(e_OD_veto)[0],
        'mu_OD_veto'           : np.where(mu_OD_veto)[0],
        'fq1rpcflg_1'          : np.where(test_fq1rpcflg_1)[0],
        'fq1rpcflg_2'          : np.where(test_fq1rpcflg_2)[0]
        }

In [48]:
with open('./fq_comparison_data/3M_fitqun_cuts.pickle', 'wb') as handle:
    pickle.dump(cuts, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [67]:
fq_mapping = {
        'gamma_fq_indices': gamma_fq_indices,
        'e_fq_indices'    : e_fq_indices,
        'mu_fq_indices'   : mu_fq_indices,
        }

In [65]:
with open('./fq_comparison_data/3M_fitqun_mapping.pickle', 'wb') as handle:
    pickle.dump(fq_mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [51]:
#np.savez('./fq_comparison_data/3M_fitqun_cuts.npz', cuts=cuts)

In [62]:
fq_test_set = np.concatenate((np.ones_like(np.array(e_fq_indices))*1,
                              np.ones_like(np.array(mu_fq_indices))*2,
                              np.ones_like(np.array(gamma_fq_indices))*0
                             )
                            )

In [54]:
print(fq_test_set.shape)

(3524482,)


In [55]:
print(test_idxs.shape)

(3524482,)


In [63]:
print(set(fq_test_set - labels[test_idxs]))

{0}


In [64]:
print(labels[test_idxs])

[1 1 1 ... 0 0 0]
