## Preparing the CNN input

In [28]:
import uproot
import awkward as ak
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import os

* Dataframe definition:

In [29]:
class ND280_dataset:
    def __init__(self, path):
        #tree_hattracks = uproot.open(path)['hatdigits']
        tree_hattracks = uproot.open(path)['hatdigits']
        
        df = tree_hattracks.arrays()
        tree = ak.to_dataframe(df)
        keys = tree.index.get_level_values('entry').unique()
        
        self.tree = tree
        self.keys = keys
        
    def __getitem__(self, i):
        key = self.keys[i]
        return self.tree.loc[key]
    
    def __len__(self):
        return len(self.keys)

**Step 1**  
Define path_1 and path_2 dataframes and assign 'npart' column with the information of the number of particles involved.

- path_1 (corresponds to events with a single particle involved):

In [30]:
path_1="/data/neutrinos/common/casado/T2K/HAT-Reco/treemaker_mu_minus_800.root"

In [31]:
ds = ND280_dataset(path_1)

In [32]:
df = pd.DataFrame(ds.tree)

In [33]:
df_unique_entries = df.groupby('entry').first()

In [34]:
df.groupby('entry').first()

Unnamed: 0_level_0,event,hat,plate,fem,fec,asic,channel,time,nsamples,adc,row,col,y,z,qmax,tmax,fwhm
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,0,0,0,2,1,3,9,144,52,246,4,24,849.315002,-1660.115967,292,154,24
1,0,0,0,2,1,3,10,91,68,246,4,25,849.315002,-1648.835938,650,113,22
2,0,0,0,2,1,3,11,86,49,257,4,26,849.315002,-1637.555908,1899,105,14
3,0,0,0,2,1,3,20,134,66,281,5,25,859.505005,-1648.835938,304,158,30
4,0,0,0,2,1,3,21,101,119,253,5,26,859.505005,-1637.555908,311,141,61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2087925,9999,0,0,7,0,0,64,73,48,227,30,4,773.255005,-1464.715942,820,91,14
2087926,9999,0,0,7,0,0,65,74,61,260,30,3,773.255005,-1475.995972,415,91,18
2087927,9999,0,0,7,0,0,72,79,52,247,31,6,783.445007,-1442.156006,357,97,23
2087928,9999,0,0,7,0,0,73,72,46,249,31,5,783.445007,-1453.435913,996,90,13


In [35]:
print(len(df.groupby('event').first())-1)

9999


In [36]:
result_array = []
len_array = []
event_array = []

for event_value in range(9999):
    df_sel = df_unique_entries[df_unique_entries['event'] == event_value]
    
    data_list = []  
    
    for fem_value in range(8):
        df_sel_fem = df_sel[df_sel['fem'] == fem_value]
        num_inputs = len(df_sel_fem)

        if num_inputs != 0:
            mask_qmax = df_sel_fem['qmax'] != 0
            mask_tmax = df_sel_fem['tmax'] != 0

            qmax_values = df_sel_fem['qmax'][mask_qmax]
            tmax_values = df_sel_fem['tmax'][mask_tmax]
            
            # Apply row and col transformations based on fem_value
            row_offset = 0
            col_offset = 0
            if fem_value == 0:
                row_offset += 32
                col_offset += 0
            elif fem_value == 1:
                row_offset += 32
                col_offset += 36
            elif fem_value == 2:
                row_offset += 32
                col_offset += 72
            elif fem_value == 3:
                row_offset += 32
                col_offset += 108
            elif fem_value == 5:
                col_offset += 36
            elif fem_value == 6:
                col_offset += 72
            elif fem_value == 7:
                col_offset += 108

            row_values = df_sel_fem['row'][mask_qmax] + row_offset
            col_values = df_sel_fem['col'][mask_qmax] + col_offset
            
            for qmax, tmax, row, col in zip(qmax_values, tmax_values, row_values, col_values):
                data_list.append([row, col, qmax, tmax])
            
    result_array.append(data_list)
    len_array.append(len(data_list))
    event_array.append(event_value)

df_result_1 = pd.DataFrame({
    'event': event_array,
    'nhits': len_array,
    '[row, col, qmax, tmax]': result_array})

df_result_1

Unnamed: 0,event,nhits,"[row, col, qmax, tmax]"
0,0,223,"[[36, 96, 292, 154], [36, 97, 650, 113], [36, ..."
1,1,207,"[[36, 116, 293, 64], [32, 113, 671, 48], [32, ..."
2,2,217,"[[34, 88, 300, 81], [34, 89, 450, 56], [33, 88..."
3,3,207,"[[32, 98, 328, 59], [41, 106, 287, 100], [41, ..."
4,4,215,"[[36, 116, 477, 64], [37, 116, 297, 80], [32, ..."
...,...,...,...
9994,9994,223,"[[36, 70, 287, 136], [36, 71, 526, 90], [35, 6..."
9995,9995,210,"[[38, 89, 298, 125], [37, 88, 291, 123], [37, ..."
9996,9996,197,"[[40, 133, 373, 97], [40, 134, 714, 91], [41, ..."
9997,9997,223,"[[36, 98, 300, 113], [32, 95, 628, 90], [32, 9..."


In [37]:
df_result_1['npart'] = 1

In [38]:
df_result_1

Unnamed: 0,event,nhits,"[row, col, qmax, tmax]",npart
0,0,223,"[[36, 96, 292, 154], [36, 97, 650, 113], [36, ...",1
1,1,207,"[[36, 116, 293, 64], [32, 113, 671, 48], [32, ...",1
2,2,217,"[[34, 88, 300, 81], [34, 89, 450, 56], [33, 88...",1
3,3,207,"[[32, 98, 328, 59], [41, 106, 287, 100], [41, ...",1
4,4,215,"[[36, 116, 477, 64], [37, 116, 297, 80], [32, ...",1
...,...,...,...,...
9994,9994,223,"[[36, 70, 287, 136], [36, 71, 526, 90], [35, 6...",1
9995,9995,210,"[[38, 89, 298, 125], [37, 88, 291, 123], [37, ...",1
9996,9996,197,"[[40, 133, 373, 97], [40, 134, 714, 91], [41, ...",1
9997,9997,223,"[[36, 98, 300, 113], [32, 95, 628, 90], [32, 9...",1


- path_2 (corresponds to events with two particles involved):

In [39]:
path_2= "/data/neutrinos/common/casado/T2K/HAT-Reco/treemaker_two_muons_29Aug2023.root"

In [40]:
ds = ND280_dataset(path_2)

In [41]:
df = pd.DataFrame(ds.tree)

In [42]:
df_unique_entries = df.groupby('entry').first()

In [43]:
print(len(df.groupby('event').first())-1)

9999


In [44]:
result_array = []
len_array = []
event_array = []

for event_value in range(9999):
    df_sel = df_unique_entries[df_unique_entries['event'] == event_value]
    
    data_list = []  
    
    for fem_value in range(8):
        df_sel_fem = df_sel[df_sel['fem'] == fem_value]
        num_inputs = len(df_sel_fem)

        if num_inputs != 0:
            mask_qmax = df_sel_fem['qmax'] != 0
            mask_tmax = df_sel_fem['tmax'] != 0

            qmax_values = df_sel_fem['qmax'][mask_qmax]
            tmax_values = df_sel_fem['tmax'][mask_tmax]
            
            # Apply row and col transformations based on fem_value
            row_offset = 0
            col_offset = 0
            if fem_value == 0:
                row_offset += 32
                col_offset += 0
            elif fem_value == 1:
                row_offset += 32
                col_offset += 36
            elif fem_value == 2:
                row_offset += 32
                col_offset += 72
            elif fem_value == 3:
                row_offset += 32
                col_offset += 108
            elif fem_value == 5:
                col_offset += 36
            elif fem_value == 6:
                col_offset += 72
            elif fem_value == 7:
                col_offset += 108

            row_values = df_sel_fem['row'][mask_qmax] + row_offset
            col_values = df_sel_fem['col'][mask_qmax] + col_offset
            
            for qmax, tmax, row, col in zip(qmax_values, tmax_values, row_values, col_values):
                data_list.append([row, col, qmax, tmax])
            
    result_array.append(data_list)
    len_array.append(len(data_list))
    event_array.append(event_value)

df_result_2 = pd.DataFrame({
    'event': event_array,
    'nhits': len_array,
    '[row, col, qmax, tmax]': result_array})

df_result_2

Unnamed: 0,event,nhits,"[row, col, qmax, tmax]"
0,0,297,"[[48, 71, 310, 293], [47, 71, 352, 294], [46, ..."
1,1,397,"[[59, 31, 333, 326], [59, 30, 585, 321], [59, ..."
2,2,370,"[[59, 69, 654, 355], [59, 68, 372, 356], [58, ..."
3,3,421,"[[59, 57, 319, 290], [59, 58, 527, 287], [58, ..."
4,4,347,"[[56, 35, 367, 373], [57, 35, 352, 369], [58, ..."
...,...,...,...
9994,9994,379,"[[51, 53, 472, 376], [50, 53, 673, 375], [49, ..."
9995,9995,381,"[[56, 68, 370, 296], [56, 69, 478, 292], [57, ..."
9996,9996,387,"[[59, 70, 495, 344], [59, 69, 425, 346], [58, ..."
9997,9997,380,"[[56, 32, 400, 360], [56, 33, 842, 356], [56, ..."


In [45]:
df_result_2['npart'] = 2

In [46]:
df_result_2

Unnamed: 0,event,nhits,"[row, col, qmax, tmax]",npart
0,0,297,"[[48, 71, 310, 293], [47, 71, 352, 294], [46, ...",2
1,1,397,"[[59, 31, 333, 326], [59, 30, 585, 321], [59, ...",2
2,2,370,"[[59, 69, 654, 355], [59, 68, 372, 356], [58, ...",2
3,3,421,"[[59, 57, 319, 290], [59, 58, 527, 287], [58, ...",2
4,4,347,"[[56, 35, 367, 373], [57, 35, 352, 369], [58, ...",2
...,...,...,...,...
9994,9994,379,"[[51, 53, 472, 376], [50, 53, 673, 375], [49, ...",2
9995,9995,381,"[[56, 68, 370, 296], [56, 69, 478, 292], [57, ...",2
9996,9996,387,"[[59, 70, 495, 344], [59, 69, 425, 346], [58, ...",2
9997,9997,380,"[[56, 32, 400, 360], [56, 33, 842, 356], [56, ...",2


**Step 2**  
Merge both dataframes into one

In [47]:
df_both_paths=pd.concat([df_result_1, df_result_2])

In [48]:
df_both_paths.reset_index(drop=True, inplace=True)

In [49]:
df_both_paths

Unnamed: 0,event,nhits,"[row, col, qmax, tmax]",npart
0,0,223,"[[36, 96, 292, 154], [36, 97, 650, 113], [36, ...",1
1,1,207,"[[36, 116, 293, 64], [32, 113, 671, 48], [32, ...",1
2,2,217,"[[34, 88, 300, 81], [34, 89, 450, 56], [33, 88...",1
3,3,207,"[[32, 98, 328, 59], [41, 106, 287, 100], [41, ...",1
4,4,215,"[[36, 116, 477, 64], [37, 116, 297, 80], [32, ...",1
...,...,...,...,...
19993,9994,379,"[[51, 53, 472, 376], [50, 53, 673, 375], [49, ...",2
19994,9995,381,"[[56, 68, 370, 296], [56, 69, 478, 292], [57, ...",2
19995,9996,387,"[[59, 70, 495, 344], [59, 69, 425, 346], [58, ...",2
19996,9997,380,"[[56, 32, 400, 360], [56, 33, 842, 356], [56, ...",2


**Step 3**   
Randomize the inputs entries

In [50]:
input_paths=df_both_paths.sample(frac=1)

In [51]:
input_paths.reset_index(drop=True, inplace=True)

In [52]:
input_paths

Unnamed: 0,event,nhits,"[row, col, qmax, tmax]",npart
0,5893,186,"[[39, 125, 286, 156], [38, 125, 370, 106], [37...",1
1,8337,318,"[[59, 35, 1051, 342], [59, 34, 800, 343], [59,...",2
2,7457,379,"[[56, 26, 294, 364], [57, 26, 292, 345], [57, ...",2
3,8259,286,"[[32, 62, 1374, 119], [33, 62, 335, 148], [41,...",1
4,576,224,"[[37, 89, 326, 71], [36, 88, 308, 81], [36, 89...",1
...,...,...,...,...
19993,8691,371,"[[36, 61, 294, 123], [36, 62, 852, 76], [37, 6...",1
19994,7606,361,"[[56, 49, 376, 353], [56, 50, 904, 345], [56, ...",2
19995,8464,210,"[[36, 60, 287, 177], [36, 61, 424, 120], [36, ...",1
19996,7720,208,"[[36, 88, 291, 192], [36, 89, 375, 94], [35, 8...",1


Save Dataframe:

In [53]:
path_pwd='/data/neutrinos/common/mrodrigu/CNN_discriminate_1-2_particles/CNN_bulding/input.csv'

In [54]:
input_paths.to_csv(path_pwd, index=False)