### Notebook designed to check if moving the fiducial after certain cuts alters its efficiency significantly (showing that its current 20% efficiency isn't as bad as it seems)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import tables as tb
from matplotlib import colors

from typing          import Optional
from typing          import Union
from typing          import Callable

from concurrent.futures import ProcessPoolExecutor

import sys,os,os.path
from pathlib import Path
sys.path.append("/scratch/halmazan/NEXT/IC_alter-blob-centre/IC/")
sys.path.append(os.path.expanduser('~/code/eol_hsrl_python'))
sys.path.append("/scratch/halmazan/NEXT/testing/notebooks/")
os.environ['ICTDIR']='/scratch/halmazan/NEXT/IC_master/IC/'

from invisible_cities.io.dst_io           import load_dst, load_dsts, df_writer
from invisible_cities.io.hits_io          import hits_writer
from invisible_cities.core                import tbl_functions   as tbl
from invisible_cities.core.core_functions import in_range
#from invisible_cities.cities.beersheba    import hitc_to_df_
from invisible_cities.io.hits_io          import hits_from_df
from invisible_cities.evm.nh5             import HitsTable
from invisible_cities.types.symbols       import NormStrategy
from invisible_cities.types.ic_types      import NoneType
from invisible_cities.reco.corrections    import read_maps, get_df_to_z_converter, apply_all_correction
from invisible_cities.evm.event_model     import HitCollection

from tqdm import tqdm


import functions.functions_HE as func


In [2]:
def load_single_file(file_path):
    '''
    Load data from a single h5 file and produce dataframes for /Tracking/Tracks

    Args:
        file_path       :       str
                                Path to the h5 file to be loaded.

    Returns:
        tracks_df       :       pandas.DataFrame
                                DataFrame containing the /Tracking/Tracks data.
        failed          :       int
                                1 if the file failed to load, 0 otherwise.
    '''
    try: 
        tracks_df = load_dst(file_path, 'Tracking', 'Tracks')
        return tracks_df, 0
    except Exception as e:
        print(f'File {file_path} broke with error:\n{e}')
        x = pd.DataFrame()
        return x, 1

def load_data_fast(folder_path):
    '''
    Load multiple h5 files and produce concatenated dataframes for /Tracking/Tracks, /MC/Particles, and their corresponding eventmap.

    Args:
        folder_path     :       str
                                Path to the folder containing the h5 files.

    Returns:
        tracks          :       pandas.DataFrame
                                Concatenated DataFrame containing the /Tracking/Tracks data from all h5 files.
        total_failures  :       int
                                Total number of failed file loads.
    '''
    
    file_names = [f for f in os.listdir(folder_path) if f.endswith('.h5')]
    file_paths = [os.path.join(folder_path, f) for f in file_names]

    with ProcessPoolExecutor() as executor:
        results = list(executor.map(load_single_file, file_paths))
    
    # Separate the results into respective lists
    tracks_list, failures = zip(*results)

    tracks = pd.concat(tracks_list, axis=0, ignore_index=True)

    # Sum up the failures
    total_failures = sum(failures)

    return tracks, total_failures


### select runs and timestamps

In [3]:
RUN_NUMBERS = [15589]#, 15590, 15591, 15592]
TIMESTAMPS  = ['355021'] # compare to 557018, and 658021

data_path = 'data' # this defines where the thekla files are located
                   # data or scratch

RUN_NUMBER = "_".join(map(str, RUN_NUMBERS))
TIMESTAMP = "_".join(map(str, TIMESTAMPS))
CITY = 'thekla'
print(RUN_NUMBERS, TIMESTAMP)

[15589] 355021


### load in

In [4]:
# if multiple files use this one
hdst = []
errors = 0
for q, (RN, TS) in enumerate(zip(RUN_NUMBERS, TIMESTAMPS)):
    print(f'R-{RN}, TS-{TS}')
    # whole thing
    # scratch path (postcut)
    n100_dir = f'/{data_path}/halmazan/NEXT/N100_LPR/{RN}/{CITY}/{TS}/'
    

    for i in tqdm(range(1,8)):
        print(f"LDC {i}")
        folder_path = n100_dir + 'ldc' + str(i) + '/'
        holder, err = load_data_fast(folder_path)
        r = holder
        errors += err
        r['event'] *= 1/(q+1)
        hdst.append(r)

hdst = pd.concat(hdst)

print(f'Number of failed files: {errors}')

R-15589, TS-355021


  0%|                                                                                           | 0/7 [00:00<?, ?it/s]

LDC 1
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc1/run_15589_4746_ldc1_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc1/run_15589_0549_ldc1_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc1/run_15589_3509_ldc1_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc1/run_15589_4285_ldc1_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc1/run_15589_5231_ldc1_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``

File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc1/run_15589_3686_ldc1_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tra

 14%|███████████▊                                                                       | 1/7 [01:12<07:14, 72.43s/it]

LDC 2
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc2/run_15589_0826_ldc2_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc2/run_15589_2406_ldc2_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc2/run_15589_1201_ldc2_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc2/run_15589_4996_ldc2_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc2/run_15589_5383_ldc2_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc2/run_15589_5189_ldc2_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tra

 29%|███████████████████████▋                                                           | 2/7 [02:26<06:07, 73.57s/it]

LDC 3
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc3/run_15589_0930_ldc3_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc3/run_15589_4349_ldc3_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc3/run_15589_1088_ldc3_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc3/run_15589_4247_ldc3_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc3/run_15589_0555_ldc3_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc3/run_15589_3006_ldc3_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Trac

 43%|███████████████████████████████████▌                                               | 3/7 [03:45<05:02, 75.75s/it]

LDC 4
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc4/run_15589_2669_ldc4_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc4/run_15589_2806_ldc4_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc4/run_15589_1521_ldc4_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc4/run_15589_3002_ldc4_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc4/run_15589_2229_ldc4_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc4/run_15589_1240_ldc4_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Trac

 57%|███████████████████████████████████████████████▍                                   | 4/7 [05:07<03:54, 78.24s/it]

LDC 5
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc5/run_15589_1041_ldc5_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc5/run_15589_0172_ldc5_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc5/run_15589_3683_ldc5_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc5/run_15589_5384_ldc5_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc5/run_15589_3538_ldc5_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc5/run_15589_2397_ldc5_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tra

 71%|███████████████████████████████████████████████████████████▎                       | 5/7 [06:21<02:33, 76.86s/it]

LDC 6
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc6/run_15589_2201_ldc6_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc6/run_15589_0545_ldc6_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc6/run_15589_1101_ldc6_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc6/run_15589_3960_ldc6_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc6/run_15589_0457_ldc6_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc6/run_15589_2266_ldc6_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tra

 86%|███████████████████████████████████████████████████████████████████████▏           | 6/7 [07:36<01:16, 76.03s/it]

LDC 7
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc7/run_15589_4607_ldc7_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc7/run_15589_3577_ldc7_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc7/run_15589_2706_ldc7_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc7/run_15589_2072_ldc7_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``

File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc7/run_15589_4472_ldc7_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tracking``
File /data/halmazan/NEXT/N100_LPR/15589/thekla/355021/ldc7/run_15589_2446_ldc7_230725_thekla.h5 broke with error:
group ``/`` does not have a child named ``Tra

100%|███████████████████████████████████████████████████████████████████████████████████| 7/7 [08:51<00:00, 75.98s/it]

Number of failed files: 194





In [5]:
#######################################################################################
#########################   CUT PARAMETERS   #####################################
#######################################################################################
low_z = 20
upp_z = 1195
r_lim = 450
low_e = 1.5
upp_e = 1.7

In [6]:
cut_hdst, efficiencies = func.apply_cuts(hdst, 
                                         lower_z = low_z, 
                                         upper_z = upp_z, 
                                         r_lim   = r_lim, 
                                         lower_e = low_e, 
                                         upper_e = upp_e)

display(efficiencies)

Cutting events around fiducial volume related to:
Z range between 20 and 1195
Radius range < 450
Fiducial track cut
Relative Cut efficiency:
Efficiency: 21.43 %
Absolute Cut efficiency:
Efficiency: 21.43 %
One track cut
Relative Cut efficiency:
Efficiency: 53.57 %
Absolute Cut efficiency:
Efficiency: 22.41 %

Blob overlap cut
Relative Cut efficiency:
Efficiency: 97.07 %
Absolute Cut efficiency:
Efficiency: 22.17 %
Energy cut
Relative Cut efficiency:
Efficiency: 60.18 %
Absolute Cut efficiency:
Efficiency: 29.92 %


Unnamed: 0,Cut,Relative Efficiency,Relative Events,Single Cut Efficiency
0,No cuts,100.0,191184,100.0
1,Fiducial Cuts,21.428571,40968,21.428571
2,One track cut,53.573521,21948,22.413486
3,Blob overlap cuts,97.074904,21306,22.168696
4,Energy cuts,60.184924,12823,29.918299


In [15]:
def cut_bespoke(fnc, df, args = None):
    '''
    Apply differing cuts and provide efficiency output.

    func -> function to apply cuts
    df   -> dataframe to be cut
    args -> arguments required (cuts, etc), if none put None
    '''
    print(f'Applying {func} with args:\n{args}')

    if args is None:
        cut_df = fnc(df)
    else:
        cut_df = fnc(df, *args)
    print('Relative cut efficiency:')
    ef = func.cut_effic(cut_df, df, verbose = True)

    return (cut_df, ef)

In [17]:
oneT_df, oneT_ef = cut_bespoke(func.one_track_cuts, hdst)
fid_ef, fid_ef = cut_bespoke(func.fiducial_track_cut_2, oneT_df, (low_z, upp_z, r_lim))

Applying <module 'functions.functions_HE' from '/scratch/halmazan/NEXT/testing/notebooks/functions/functions_HE.py'> with args:
None
Relative cut efficiency:
Efficiency: 22.41 %
Events in reduced dataframe: 42851
Events in initial dataframe: 539034
Applying <module 'functions.functions_HE' from '/scratch/halmazan/NEXT/testing/notebooks/functions/functions_HE.py'> with args:
(20, 1195, 450)
Relative cut efficiency:
Efficiency: 51.22 %
Events in reduced dataframe: 21948
Events in initial dataframe: 42851
