In [1]:
import uproot
import numpy as np
import sys
import h5py
import pickle

In [2]:
if '../WatChMaL' not in sys.path:
    sys.path.append('../WatChMaL')

from watchmal.dataset.DigiTruthMapping import DigiTruthMapping
from watchmal.dataset.h5_dataset import H5TrueDataset
from watchmal.dataset.cnn_mpmt.cnn_mpmt_dataset import CNNmPMTDataset

In [3]:
## Load Dataset

In [4]:
# Import test events from h5 file
data_path = "/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_digihits.h5"
data_file = h5py.File(data_path, "r")

print(data_file.keys())

angles     = np.array(data_file['angles'])
energies   = np.array(data_file['energies'])
positions  = np.array(data_file['positions'])
labels     = np.array(data_file['labels'])
root_files = np.array(data_file['root_files'])
event_ids  = np.array(data_file['event_ids'])
vetos      = np.array(data_file['veto'])

<KeysViewHDF5 ['angles', 'energies', 'event_hits_index', 'event_ids', 'hit_charge', 'hit_pmt', 'hit_time', 'labels', 'positions', 'root_files', 'veto', 'veto2']>


In [5]:
idxs_path = '/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_3M_idxs.npz'
idxs = np.load(idxs_path, allow_pickle=True)

train_idxs = idxs['train_idxs']
val_idxs   = idxs['val_idxs']
test_idxs  = idxs['test_idxs']

In [6]:
print(root_files[test_idxs][0:2])

[b'/localscratch/prouse.56905527.0/WCSim/e-/E0to1000MeV/unif-pos-R400-y300cm/4pi-dir/IWCD_mPMT_Short_e-_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir_3000evts_0.root'
 b'/localscratch/prouse.56905527.0/WCSim/e-/E0to1000MeV/unif-pos-R400-y300cm/4pi-dir/IWCD_mPMT_Short_e-_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir_3000evts_0.root']


In [7]:
short_ids    = np.load('/home/jtindall/WatChMaL/outputs/2021-03-16/20-21-15/outputs' + "/ids.npy")
short_labels = np.load('/home/jtindall/WatChMaL/outputs/2021-03-16/20-21-15/outputs' + "/labels.npy")

In [8]:
print(np.bincount(labels[train_idxs]))
print(np.bincount(labels[val_idxs]))
print(np.bincount(labels[test_idxs]))

[1478191 1472353 1455729]
[295626 294315 291204]
[1182377 1177966 1164139]


In [9]:
print(np.bincount(labels[test_idxs]))
print(np.bincount(short_labels))

[1182377 1177966 1164139]
[1182377 1177966 1164139]


In [10]:
print(test_idxs.shape)
print(short_labels.shape)

(3524482,)
(3524482,)


In [11]:
print(set(labels[test_idxs] - short_labels))

{0}


In [12]:
print(set(labels[test_idxs] - short_labels))

{0}


In [13]:
## Load vetos

In [14]:
test_vetos  = vetos[test_idxs]
test_labels = labels[test_idxs]

e_OD_veto   = (test_labels == 1) & (test_vetos)
mu_OD_veto  = (test_labels == 2) & (test_vetos)

In [15]:
## Load True Momenta

In [16]:
momenta = np.load('./fq_comparison_data/3M_momenta.npz', allow_pickle=True)

test_true_momenta = momenta['test_momenta']

In [17]:
## Load to_wall

In [18]:
to_wall = np.load('./fq_comparison_data/3M_d_to_wall.npz', allow_pickle=True)

test_to_wall = to_wall['test_d_to_wall'] / 100

In [19]:
## Load d_wall

In [20]:
d_wall = np.load('./fq_comparison_data/3M_d_wall.npz', allow_pickle=True)

test_d_wall = d_wall['test_d_wall'] / 100

In [21]:
## Load Data Mapping

In [22]:
dtm = DigiTruthMapping(dataset='./fq_comparison_data/data_for_truth.pkl', mcset='./fq_comparison_data/truth_for_data.pkl')
print(dtm.get_data_entry(10000))

FileNotFoundError: [Errno 2] No such file or directory: './fq_comparison_data/data_for_truth.pkl'

In [None]:
## Load fitqun Flags

In [34]:
def remove_indices(array, cut_idxs):
    return np.delete(array, cut_idxs, 0)

with open('./fitqun_comparison_prep/prep_data/4_class_3M_fitqun_cuts.pickle', 'rb') as handle:
    cuts = pickle.load(handle)

cut_to_apply = cuts['fq_comparison']

In [24]:
# Retrieve flags
gamma_file_data   = uproot.open('/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_gamma_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir.fiTQun.root')['fiTQun;1']
e_file_data       = uproot.open('/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_e-_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir.fiTQun.root')['fiTQun;1']
mu_file_data      = uproot.open('/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_mu-_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir.fiTQun.root')['fiTQun;1']
pion_file_data    = uproot.open('/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_pi0_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir.fiTQun.root')['fiTQun;1']
# file_data.show()

gamma_file_data['fq1rpcflg'].interpretation

AsJagged(AsDtype("('>i4', (7,))"))

In [36]:
gamma_fqpi0mom1 = gamma_file_data.arrays('fqpi0mom1')['fqpi0mom1']
gamma_fqpi0mom2 = gamma_file_data.arrays('fqpi0mom2')['fqpi0mom2']
gamma_fqpi0nll  = gamma_file_data.arrays('fqpi0nll')['fqpi0nll']
gamma_fqpi0mass = gamma_file_data.arrays('fqpi0mass')['fqpi0mass']
gamma_fqpi0pcflg= gamma_file_data.arrays('fqpi0pcflg')['fqpi0pcflg']

e_fqpi0mom1 = e_file_data.arrays('fqpi0mom1')['fqpi0mom1']
e_fqpi0mom2 = e_file_data.arrays('fqpi0mom2')['fqpi0mom2']
e_fqpi0nll  = e_file_data.arrays('fqpi0nll')['fqpi0nll']
e_fqpi0mass = e_file_data.arrays('fqpi0mass')['fqpi0mass']
e_fqpi0pcflg= e_file_data.arrays('fqpi0pcflg')['fqpi0pcflg']

mu_fqpi0mom1 = mu_file_data.arrays('fqpi0mom1')['fqpi0mom1']
mu_fqpi0mom2 = mu_file_data.arrays('fqpi0mom2')['fqpi0mom2']
mu_fqpi0nll  = mu_file_data.arrays('fqpi0nll')['fqpi0nll']
mu_fqpi0mass = mu_file_data.arrays('fqpi0mass')['fqpi0mass']
mu_fqpi0pcflg= mu_file_data.arrays('fqpi0pcflg')['fqpi0pcflg']

pion_fqpi0mom1 = pion_file_data.arrays('fqpi0mom1')['fqpi0mom1']
pion_fqpi0mom2 = pion_file_data.arrays('fqpi0mom2')['fqpi0mom2']
pion_fqpi0nll  = pion_file_data.arrays('fqpi0nll')['fqpi0nll']
pion_fqpi0mass = pion_file_data.arrays('fqpi0mass')['fqpi0mass']
pion_fqpi0pcflg= pion_file_data.arrays('fqpi0pcflg')['fqpi0pcflg']

In [41]:
fq_mapping_path = '/home/jtindall/WatChMaL_analysis/fitqun_comparison/fitqun_comparison_prep/prep_data/4_class_3M_fitqun_mapping.pickle'

with open(fq_mapping_path, 'rb') as handle:
        fq_mapping = pickle.load(handle)

gamma_fq_indices = fq_mapping['gamma_fq_indices']
e_fq_indices     = fq_mapping['e_fq_indices']
mu_fq_indices    = fq_mapping['mu_fq_indices']
pion_fq_indices  = fq_mapping['pion_fq_indices']
    
# Load gamma results
gamma_set_nll = gamma_file_data.arrays('fq1rnll')['fq1rnll']

gamma_set_e_nll, gamma_set_mu_nll = gamma_set_nll[:, 0, 1], gamma_set_nll[:, 0, 2]

gamma_fqpi0mom1 = gamma_file_data.arrays('fqpi0mom1')['fqpi0mom1'][:, 0]
gamma_fqpi0mom2 = gamma_file_data.arrays('fqpi0mom2')['fqpi0mom2'][:, 0]
gamma_fqpi0nll  = gamma_file_data.arrays('fqpi0nll')['fqpi0nll'][:, 0]
gamma_fqpi0mass = gamma_file_data.arrays('fqpi0mass')['fqpi0mass'][:, 0] 

gamma_set_discriminator = np.array(gamma_set_e_nll - gamma_fqpi0nll)
#gamma_set_discriminator = np.array(gamma_set_mu_nll - gamma_set_e_nll)

# Load electron results
e_set_nll    = e_file_data.arrays('fq1rnll')['fq1rnll']

e_set_e_nll, e_set_mu_nll = e_set_nll[:, 0, 1], e_set_nll[:, 0, 2]

e_fqpi0mom1 = e_file_data.arrays('fqpi0mom1')['fqpi0mom1'][:, 0]
e_fqpi0mom2 = e_file_data.arrays('fqpi0mom2')['fqpi0mom2'][:, 0]
e_fqpi0nll  = e_file_data.arrays('fqpi0nll')['fqpi0nll'][:, 0]
e_fqpi0mass = e_file_data.arrays('fqpi0mass')['fqpi0mass'][:, 0]

e_set_discriminator = np.array(e_set_e_nll - e_fqpi0nll)
#e_set_discriminator = np.array(e_set_mu_nll - e_set_e_nll)

# Load mu results
mu_set_nll   = mu_file_data.arrays('fq1rnll')['fq1rnll']

mu_set_e_nll, mu_set_mu_nll = mu_set_nll[:, 0, 1], mu_set_nll[:, 0, 2]

mu_fqpi0mom1 = mu_file_data.arrays('fqpi0mom1')['fqpi0mom1'][:, 0]
mu_fqpi0mom2 = mu_file_data.arrays('fqpi0mom2')['fqpi0mom2'][:, 0]
mu_fqpi0nll  = mu_file_data.arrays('fqpi0nll')['fqpi0nll'][:, 0]
mu_fqpi0mass = mu_file_data.arrays('fqpi0mass')['fqpi0mass'][:, 0]

mu_set_discriminator = np.array(mu_set_e_nll - mu_fqpi0nll) 
#mu_set_discriminator = np.array(mu_set_mu_nll - mu_set_e_nll) 

# Load pion results
pion_set_nll   = pion_file_data.arrays('fq1rnll')['fq1rnll']

pion_set_e_nll, pion_set_mu_nll = pion_set_nll[:, 0, 1], pion_set_nll[:, 0, 2]

pion_fqpi0mom1 = pion_file_data.arrays('fqpi0mom1')['fqpi0mom1'][:, 0]
pion_fqpi0mom2 = pion_file_data.arrays('fqpi0mom2')['fqpi0mom2'][:, 0]
pion_fqpi0nll  = pion_file_data.arrays('fqpi0nll')['fqpi0nll'][:, 0]
pion_fqpi0mass = pion_file_data.arrays('fqpi0mass')['fqpi0mass'][:, 0]

pion_set_discriminator = np.array(pion_set_e_nll - pion_fqpi0nll)
#pion_set_discriminator = np.array(pion_set_mu_nll - pion_set_e_nll)

# Construct likelihoods
fq_likelihoods = np.concatenate((e_set_discriminator[e_fq_indices],
                                 mu_set_discriminator[mu_fq_indices],
                                 gamma_set_discriminator[gamma_fq_indices],
                                 pion_set_discriminator[pion_fq_indices]
                                 ))

# Collect scores
fq_scores = np.zeros((fq_likelihoods.shape[0], 3))
fq_scores[:, 1] = fq_likelihoods

# Generate labels
fq_labels = np.concatenate((np.ones_like(e_set_discriminator[e_fq_indices])*1,
                            np.ones_like(mu_set_discriminator[mu_fq_indices])*2,
                            np.ones_like(gamma_set_discriminator[gamma_fq_indices])*0,
                            np.ones_like(pion_set_discriminator[pion_fq_indices])*3
                            ))

# Collect reconstructed momentum values
gamma_set_mom = np.array(gamma_fqpi0mom1 + gamma_fqpi0mom2)
e_set_mom     = np.array(e_fqpi0mom1 + e_fqpi0mom2)
mu_set_mom    = np.array(mu_fqpi0mom1 + mu_fqpi0mom2)
pion_set_mom  = np.array(pion_fqpi0mom1 + pion_fqpi0mom2)

fq_mom = np.concatenate((e_set_mom[e_fq_indices],
                         mu_set_mom[mu_fq_indices],
                         gamma_set_mom[gamma_fq_indices],
                         pion_set_mom[pion_fq_indices]
                         ))

fq_fqpi0pcflg = np.concatenate((np.array(e_fqpi0pcflg[e_fq_indices]),
                                np.array(mu_fqpi0pcflg[mu_fq_indices]),
                                np.array(gamma_fqpi0pcflg[gamma_fq_indices]),
                                np.array(pion_fqpi0pcflg[pion_fq_indices])
                         ))

In [42]:
filtered_fq_mom = remove_indices(fq_mom, cut_to_apply)
filtered_fq_fqpi0pcflg = remove_indices(fq_fqpi0pcflg, cut_to_apply)

In [44]:
idx = np.argmax(filtered_fq_mom)
print(filtered_fq_mom[idx])
print(filtered_fq_fqpi0pcflg[idx])

3618063500000.0
[3 0]


In [43]:
print(np.max(filtered_fq_mom))

3618063500000.0


In [29]:
print(np.array(pion_fqpi0mass[:, 0]).shape)

(1200000,)


In [25]:
truehits_filepath = '/data/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_truehits.h5'
truehits_dataset = H5TrueDataset(h5_path=truehits_filepath)

In [26]:
dtm = DigiTruthMapping(dataset='./fq_comparison_data/data_for_truth.pkl', mcset='./fq_comparison_data/truth_for_data.pkl')
print(dtm.get_data_entry(10000))

9825


In [27]:
print(dtm.get_truth_entry(10000))

10176


In [28]:
# find indices in true dataset for offset to fitqun files
print(truehits_dataset.labels.shape)
gamma_base_idx = np.where(truehits_dataset.labels == 0)[0][0]
e_base_idx     = np.where(truehits_dataset.labels == 1)[0][0]
mu_base_idx    = np.where(truehits_dataset.labels == 2)[0][0]

print(gamma_base_idx)
print(e_base_idx)
print(mu_base_idx)

(21000000,)
3000000
0
6000000


In [29]:
print(np.sort(np.where(truehits_dataset.labels == 2)[0]))

[6000000 6000001 6000002 ... 8999997 8999998 8999999]


In [30]:
digihits_labels = labels

In [31]:
'/fast_scratch/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_digihits.h5'
'/data/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_truehits.h5'

'/data/WatChMaL/data/IWCD_mPMT_Short_emg_E0to1000MeV_truehits.h5'

In [32]:
sample_idx = 590500
print(digihits_labels[sample_idx])
print(truehits_dataset.labels[dtm.get_truth_entry(sample_idx)])

1
1


In [33]:
sample_idx = 5890000
print(sample_idx)
print(dtm.get_truth_entry(sample_idx))

print(dtm.get_truth_entry(sample_idx) - sample_idx)

5890000
5989020
99020


In [34]:
print(set(dtm.data_for_truth.values()))

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [35]:
print(root_files[sample_idx])
print(truehits_dataset.root_files[dtm.get_truth_entry(sample_idx)])

b'/localscratch/prouse.56965017.0/WCSim/gamma/E0to1000MeV/unif-pos-R400-y300cm/4pi-dir/IWCD_mPMT_Short_gamma_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir_3000evts_996.root'
b'/localscratch/prouse.56965017.0/WCSim/gamma/E0to1000MeV/unif-pos-R400-y300cm/4pi-dir/IWCD_mPMT_Short_gamma_E0to1000MeV_unif-pos-R400-y300cm_4pi-dir_3000evts_996.root'


In [36]:
print(labels[5900828])
print(truehits_dataset.labels[5901000])

2
0


In [37]:
print(dtm.get_data_entry( 5901000 ))

5803179


In [38]:
print(truehits_dataset.root_files.shape)

(21000000,)


In [39]:
gamma_flags = gamma_file_data.arrays('fq1rpcflg')['fq1rpcflg']
e_flags     = e_file_data.arrays('fq1rpcflg')['fq1rpcflg']
mu_flags    = mu_file_data.arrays('fq1rpcflg')['fq1rpcflg']

gamma_fq1rpcflg_1 = np.array(gamma_flags[:, 0, 1])
e_fq1rpcflg_1     = np.array(e_flags[:, 0, 1])
mu_fq1rpcflg_1    = np.array(mu_flags[:, 0, 1])

gamma_fq1rpcflg_2 = np.array(gamma_flags[:, 0, 2])
e_fq1rpcflg_2     = np.array(e_flags[:, 0, 2])
mu_fq1rpcflg_2    = np.array(mu_flags[:, 0, 2])

In [40]:
# Construct mapping from test set to fitqun
gamma_fq_indices = []
e_fq_indices     = []
mu_fq_indices    = []
for idx in test_idxs:
    particle_label = labels[idx]
    truehits_index = dtm.get_truth_entry(idx)
    
    if particle_label == 0:
        gamma_fq_indices.append(truehits_index % gamma_base_idx)
    elif particle_label == 1:
        e_fq_indices.append(truehits_index)
    elif particle_label == 2:
        mu_fq_indices.append(truehits_index % mu_base_idx)
    
gamma_fq_indices = np.array(gamma_fq_indices)
e_fq_indices     = np.array(e_fq_indices)
mu_fq_indices    = np.array(mu_fq_indices)

In [41]:
test_fq1rpcflg_1 = np.concatenate((e_fq1rpcflg_1[e_fq_indices],
                                  mu_fq1rpcflg_1[mu_fq_indices],
                                  gamma_fq1rpcflg_1[gamma_fq_indices]
                                )) != 0

test_fq1rpcflg_2 = np.concatenate((e_fq1rpcflg_2[e_fq_indices],
                                  mu_fq1rpcflg_2[mu_fq_indices],
                                  gamma_fq1rpcflg_2[gamma_fq_indices]
                                )) != 0

In [42]:
## Define cuts

In [43]:
to_wall_cut = ((test_labels == 1) | (test_labels == 0)) & (test_to_wall < 0.63*np.log(test_true_momenta) - 2)
d_wall_cut  = test_d_wall < 0.5

In [44]:
print(np.max(test_d_wall))

2.9999810457229614


In [45]:
fq_comparison = to_wall_cut | d_wall_cut | mu_OD_veto | test_fq1rpcflg_1 | test_fq1rpcflg_2

In [46]:
print(np.bincount(to_wall_cut))
print(np.bincount(d_wall_cut))
print(np.bincount(mu_OD_veto))
print(np.bincount(test_fq1rpcflg_1))
print(np.bincount(test_fq1rpcflg_2))
print("####################")

print(np.bincount(fq_comparison))

[2763630  760852]
[2287665 1236817]
[3010566  513916]
[2847338  677144]
[3064010  460472]
####################
[1561425 1963057]


In [47]:
print(test_idxs.shape)
print('#######################')
print(np.where(to_wall_cut)[0].shape)
print(np.where(d_wall_cut)[0].shape)
print(np.where(mu_OD_veto)[0].shape)
print(np.where(test_fq1rpcflg_1)[0].shape)
print(np.where(test_fq1rpcflg_2)[0].shape)

(3524482,)
#######################
(760852,)
(1236817,)
(513916,)
(677144,)
(460472,)


In [48]:
print(fq_comparison.shape)
print(np.where(fq_comparison)[0].shape)

(3524482,)
(1963057,)


In [49]:
print(np.delete(test_idxs, np.where(fq_comparison)[0], 0).shape)

(1561425,)


In [50]:
fq_comparison_OD_veto = fq_comparison | e_OD_veto

In [51]:
cuts = {
        'fq_comparison'        : np.where(fq_comparison)[0],
        'fq_comparison_OD_veto': np.where(fq_comparison_OD_veto)[0],
        'to_wall_cut'          : np.where(to_wall_cut)[0],
        'd_wall_cut'           : np.where(d_wall_cut)[0],
        'e_OD_veto'            : np.where(e_OD_veto)[0],
        'mu_OD_veto'           : np.where(mu_OD_veto)[0],
        'fq1rpcflg_1'          : np.where(test_fq1rpcflg_1)[0],
        'fq1rpcflg_2'          : np.where(test_fq1rpcflg_2)[0]
        }

In [52]:
with open('./fq_comparison_data/3M_fitqun_cuts.pickle', 'wb') as handle:
    pickle.dump(cuts, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [53]:
fq_mapping = {
        'gamma_fq_indices': gamma_fq_indices,
        'e_fq_indices'    : e_fq_indices,
        'mu_fq_indices'   : mu_fq_indices,
        }

In [54]:
with open('./fq_comparison_data/3M_fitqun_mapping.pickle', 'wb') as handle:
    pickle.dump(fq_mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [55]:
np.savez('./fq_comparison_data/3M_fitqun_cuts.npz', cuts=cuts)

In [56]:
fq_test_set = np.concatenate((np.ones_like(np.array(e_fq_indices))*1,
                              np.ones_like(np.array(mu_fq_indices))*2,
                              np.ones_like(np.array(gamma_fq_indices))*0
                             )
                            )