# Notebook to extract only signal events from the given data

In [20]:
import sys,os,os.path
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tables as tb
from tqdm import tqdm

import csv
import traceback

sys.path.append("../")
sys.path.append(os.path.expanduser('~/code/eol_hsrl_python'))
os.environ['ICTDIR']='/home/e78368jw/Documents/NEXT_CODE/IC'

from invisible_cities.core.core_functions   import shift_to_bin_centers
from invisible_cities.io.dst_io           import load_dst, load_dsts, df_writer

import core.loading_functions as load_func
import quantification.quantify_functions as quant_func

# Load in the data

In [21]:
full_path = f'../data/MC/'
MC_df = load_func.load_data_fast(full_path, 'MC', 'particles')

display(MC_df)

Unnamed: 0,event_id,particle_id,particle_name,primary,mother_id,initial_x,initial_y,initial_z,initial_t,final_x,...,initial_momentum_x,initial_momentum_y,initial_momentum_z,final_momentum_x,final_momentum_y,final_momentum_z,kin_energy,length,creator_proc,final_proc
0,12300000,1,Tl208,True,0,398.527893,401.795593,563.769714,0.000000,398.527893,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,none,RadioactiveDecay
1,12300000,4,e-,False,1,398.527893,401.795593,563.769714,0.000593,398.723450,...,0.866598,-1.133614,0.059521,0.000000,-0.000000,-0.000000,1.005819,0.721643,RadioactiveDecay,NoProcess
2,12300000,3,anti_nu_e,False,1,398.527893,401.795593,563.769714,0.000593,8578.574219,...,0.263275,0.149004,-0.420457,0.263275,0.149004,-0.420457,0.517977,16093.740234,RadioactiveDecay,Transportation
3,12300000,2,Pb208[3475.078],False,1,398.527893,401.795593,563.769714,0.000593,398.527893,...,-1.129872,0.984610,0.360936,-0.000000,0.000000,0.000000,0.000006,0.000000,RadioactiveDecay,RadioactiveDecay
4,12300000,6,gamma,False,2,398.527893,401.795593,563.769714,0.000593,419.103455,...,0.338387,0.497161,0.615531,0.000000,-0.000000,0.000000,0.860554,67.016731,RadioactiveDecay,phot
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265045,12204093,106596,e-,False,100813,387.775604,219.997345,238.799622,2.063528,387.778992,...,0.068059,-0.044950,0.114018,-0.000000,-0.000000,-0.000000,0.018881,0.647196,eIoni,NoProcess
265046,12204093,109494,e-,False,100813,389.590118,225.515747,240.684677,2.104760,389.620636,...,-0.011423,0.118143,0.010405,0.000000,0.000000,0.000000,0.013707,0.404894,eIoni,NoProcess
265047,12204093,110620,e-,False,100813,390.905304,228.111465,239.332016,2.123276,390.938751,...,-0.124717,0.196850,0.104513,0.000000,-0.000000,-0.000000,0.060269,5.201819,eIoni,NoProcess
265048,12204093,100813,e-,False,53,390.975769,205.736023,244.167709,1.944052,393.301849,...,-0.098265,-0.144932,-0.636418,0.000000,-0.000000,-0.000000,0.323752,48.867710,phot,NoProcess


In [22]:
active_volume = MC_df[MC_df.initial_volume == 'ACTIVE']
print(f'Number of events in active volume: {active_volume.event_id.nunique()}')

positrons = active_volume[active_volume['particle_name'] == 'e+']
print(f'Number of positron events: {positrons.event_id.nunique()}')


conv_positrons = positrons[positrons['creator_proc'] == 'conv']
print(f'Number of positron events with conv creator proc: {conv_positrons.event_id.nunique()}')

#display(conv_positrons.event_id.unique())

selected_events = conv_positrons.event_id.unique()

selected_events_MC = MC_df[MC_df.event_id.isin(selected_events)]


Number of events in active volume: 2806
Number of positron events: 475
Number of positron events with conv creator proc: 475


# Select only the hits as well

In [23]:
# Load in RECO hits
full_path = f'../data/MC/'
RECO_df = load_func.load_data_fast(full_path, 'RECO', 'Events')

# And the MC hits
MChits_df = load_func.load_data_fast(full_path, 'MC', 'hits')

display(RECO_df)
display(MChits_df)

Unnamed: 0,event,time,npeak,Xpeak,Ypeak,nsipm,X,Y,Xrms,Yrms,Z,Q,E,Qc,Ec,track_id,Ep
0,12300000,2.460000e+13,0,317.485685,85.823177,1,335.525,168.675,0.0,0.0,499.824461,5.043478,380.105596,-1.0,0.001002,-1,-1.0
1,12300000,2.460000e+13,0,317.485685,85.823177,1,335.525,184.225,0.0,0.0,499.824461,6.782609,511.176490,-1.0,0.001367,-1,-1.0
2,12300000,2.460000e+13,0,317.485685,85.823177,1,351.075,168.675,0.0,0.0,499.824461,6.144928,463.117140,-1.0,0.001255,-1,-1.0
3,12300000,2.460000e+13,0,317.485685,85.823177,1,351.075,184.225,0.0,0.0,499.824461,9.043478,681.568606,-1.0,0.001880,-1,-1.0
4,12300000,2.460000e+13,0,317.485685,85.823177,1,366.625,184.225,0.0,0.0,499.824461,5.333333,401.950703,-1.0,0.001122,-1,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2669039,12204093,2.440819e+13,0,134.784905,74.657123,1,181.025,91.425,0.0,0.0,524.442797,11.942030,796.636348,-1.0,0.001976,-1,-1.0
2669040,12204093,2.440819e+13,0,134.784905,74.657123,1,181.025,106.975,0.0,0.0,524.442797,5.797101,386.716646,-1.0,0.000960,-1,-1.0
2669041,12204093,2.440819e+13,0,134.784905,74.657123,1,196.575,75.875,0.0,0.0,524.442797,8.463768,564.606291,-1.0,0.001401,-1,-1.0
2669042,12204093,2.440819e+13,0,134.784905,74.657123,1,149.925,75.875,0.0,0.0,527.828347,6.318841,2152.728505,-1.0,0.005323,-1,-1.0


Unnamed: 0,event_id,particle_id,hit_id,x,y,z,time,energy,label
0,12300000,39,0,352.022552,248.544846,508.099854,0.566156,0.000154,ACTIVE
1,12300000,46,0,352.022736,248.544662,508.099945,0.566252,0.000021,ACTIVE
2,12300000,45,0,352.022461,248.544876,508.099915,0.566188,0.000034,ACTIVE
3,12300000,44,0,352.022552,248.545090,508.099792,0.566248,0.000023,ACTIVE
4,12300000,43,0,352.022614,248.544830,508.099670,0.566254,0.000010,ACTIVE
...,...,...,...,...,...,...,...,...,...
3134536,12204093,100813,103,393.287750,227.403854,240.302551,2.187072,0.000048,ACTIVE
3134537,12204093,100813,104,393.291870,227.408783,240.302444,2.187221,0.000278,ACTIVE
3134538,12204093,100813,105,393.296753,227.411469,240.305679,2.187375,0.000011,ACTIVE
3134539,12204093,100813,106,393.305115,227.417709,240.309952,2.187645,0.002771,ACTIVE


In [24]:
# Select only events within selected_events
selected_events_RECO = RECO_df[RECO_df.event.isin(selected_events)]
selected_events_MChits = MChits_df[MChits_df.event_id.isin(selected_events)]
display(selected_events_RECO)
display(selected_events_MChits)

Unnamed: 0,event,time,npeak,Xpeak,Ypeak,nsipm,X,Y,Xrms,Yrms,Z,Q,E,Qc,Ec,track_id,Ep
6459,12300006,2.460001e+13,0,6.515505,-148.908484,1,-19.125,-155.375,0.0,0.0,40.263651,7.188406,574.714427,-1.0,0.001402,-1,-1.0
6460,12300006,2.460001e+13,0,6.515505,-148.908484,1,-19.125,-139.825,0.0,0.0,40.263651,5.217391,417.131443,-1.0,0.001018,-1,-1.0
6461,12300006,2.460001e+13,0,6.515505,-148.908484,1,-3.575,-155.375,0.0,0.0,40.263651,15.072464,1205.046364,-1.0,0.002943,-1,-1.0
6462,12300006,2.460001e+13,0,6.515505,-148.908484,1,-3.575,-139.825,0.0,0.0,40.263651,9.449275,755.471336,-1.0,0.001847,-1,-1.0
6463,12300006,2.460001e+13,0,6.515505,-148.908484,1,10.975,-155.375,0.0,0.0,40.263651,8.811594,704.488618,-1.0,0.001721,-1,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2662050,12204085,2.440817e+13,1,200.640000,-7.964710,1,288.875,-16.925,0.0,0.0,1028.999786,6.608695,398.112332,-1.0,0.001020,-1,-1.0
2662051,12204085,2.440817e+13,1,200.640000,-7.964710,1,304.425,-16.925,0.0,0.0,1028.999786,5.565217,335.252499,-1.0,0.000870,-1,-1.0
2662052,12204085,2.440817e+13,1,200.640000,-7.964710,1,257.775,-16.925,0.0,0.0,1032.469781,5.855072,1413.585331,-1.0,0.003604,-1,-1.0
2662053,12204085,2.440817e+13,1,200.640000,-7.964710,1,257.775,-1.375,0.0,0.0,1032.469781,5.623188,1357.601767,-1.0,0.003466,-1,-1.0


Unnamed: 0,event_id,particle_id,hit_id,x,y,z,time,energy,label
7317,12300006,32,0,36.513824,-125.199722,105.147636,2.834566,0.000001,ACTIVE
7318,12300006,31,0,36.547150,-125.759644,104.332382,2.629001,0.002518,ACTIVE
7319,12300006,31,1,36.605511,-126.437218,103.599846,2.633066,0.002213,ACTIVE
7320,12300006,31,2,36.760361,-127.064156,102.854980,2.637081,0.002263,ACTIVE
7321,12300006,31,3,37.286903,-127.554703,102.174568,2.641119,0.000403,ACTIVE
...,...,...,...,...,...,...,...,...,...
3125036,12204085,69,52,209.448700,-34.775208,997.616333,2.203594,0.002556,ACTIVE
3125037,12204085,69,53,209.446777,-34.771420,997.600708,2.203996,0.002089,ACTIVE
3125038,12204085,69,54,209.446533,-34.771984,997.601624,2.204033,0.000024,ACTIVE
3125039,12204085,69,55,209.445343,-34.767570,997.604004,2.204250,0.002396,ACTIVE


# EXTRACT TOPOLOGICAL INFORMATION OF EACH

In [None]:
mc_out = quant_func.extract_true_MC_info(selected_events_MChits)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  MChits['npeak'] = 1
 18%|█▊        | 85/475 [05:45<27:23,  4.21s/it]

# SAVE

In [None]:
output_file = '../data/CLEAN/signal_events.h5'
with tb.open_file(output_file, 'w') as h5out:
    df_writer(h5out, selected_events_MC, 'MC', 'particles')
    df_writer(h5out, selected_events_RECO, 'RECO', 'Events')
    df_writer(h5out, selected_events_MChits, 'MC', 'hits')
    df_writer(h5out, mc_out, 'Tracking', 'Tracks')
    