### FOM producer
This FOM producer needs to:
- Read in isaura h5 files from a folder
- Apply the appropriate cuts to isaura h5 files.
    - Need to be modifiable!
- Calculate a true FOM across a folder of isaura h5 files.
- Loop this across multiple folders (tricky!)
- Output FOM plot from this

In [3]:
import sys,os,os.path

sys.path.append("../../")   # cite IC from parent directory
#sys.path.append(os.path.expanduser('~/code/eol_hsrl_python'))
os.environ['ICTDIR']='/home/e78368jw/Documents/NEXT_CODE/IC'

import matplotlib.pyplot as plt
import pandas as pd
import numpy  as np
import tables as tb
import IC.invisible_cities.io.dst_io                           as     dstio
import IC.invisible_cities.io.mcinfo_io as mcio
from    IC.invisible_cities.core.core_functions   import shift_to_bin_centers
import iminuit,probfit

import scipy.special as special
from scipy.stats import skewnorm
from scipy.optimize import curve_fit

import next_misc.cluster_code.bin.fom_functions as func ### POINT ME TO THE CLUSTER-CODE DIRECTORY

Load data

In [5]:
test_data = 'data/isaura_test/'
def full_monty(path, port, output_folder):
    '''
    Will do everything as explained above

    path is directory
    port is port of interest (1a, 1b, 2a, 2b)
    output_folder is filepath and name of folder relative to path

    So for example, you have a folder structure of:
    105_7e-3/PORT_1a/isaura/isaura_1_208Tl.h5
    105_7e-3/PORT_1a/isaura/isaura_2_208Tl.h5
    105_7e-3/PORT_1a/isaura/...
    105_7e-3/PORT_1a/isaura/isaura_300_208Tl.h5
    .
    .
    .
    90_7e-3/PORT_1a/isaura/isaura_1_208Tl.h5
    90_7e-3/PORT_1a/isaura/isaura_2_208Tl.h5
    and so on.

    You input the path to the outer directory, and the port of interest
    and it will collect and process the isaura data within it.

    And output do a output folder respective to the path.
    '''
    ####################################
    # Change parameters for cutting here:
    ####################################

    # FIDUCIAL
    lower_z = 20
    upper_z = 1170
    r_lim = 415

    # ENERGY CUTS
    lower_e = 1.5
    upper_e = 1.7

    # SATELLITE REMOVAL
    energy_limit = 0.05

    ####################################
    # FOM cut list
    ####################################
    cut_list = np.linspace(0,0.6,61)

    print("Opening files...")
    # load data from path
    dire = path + "PORT_" + str(port) + "/isaura/"
    data = func.load_data(dire)

    tracks      = data[0]
    particles   = data[1]
    eventmap    = data[2]

    print("Applying cuts")

    # removing satellite tracks
    #low_e_cut_tracks = func.remove_low_E_events(tracks, energy_limit)
    low_e_cut_tracks = tracks[tracks.energy > 0.05] # just get rid of the satellites

    # apply cuts
    cut_output = func.apply_cuts(low_e_cut_tracks, lower_z, upper_z, r_lim, lower_e, upper_e)
    cut_data = cut_output[0]
    efficiencies = cut_output[1]

    print("Calculating FOM")

    # calculate FOM
    fom_output = func.apply_FOM(dire, cut_data, cut_list)

    # apply them to the efficiencies
    efficiencies.loc[len(efficiencies.index)] = ['pos_evt - all_evt', fom_output[0], len(cut_data), 0]
    efficiencies.loc[len(efficiencies.index)] = ['FOM_MAX - blob2_E_val (MeV)', fom_output[1], fom_output[2], 0]


    # write to respective directories
    out_dir = path+output_folder
    if not (os.path.isdir(out_dir)):
        os.mkdir(out_dir)
    efficiencies.to_csv(str(out_dir) + '/efficiency.csv')
    # Save the data to a h5 file
    cut_data.to_hdf(str(out_dir) + '/post_cuts.h5', key='cut_data', mode = 'w')
    print("Data written")
    np.save(np.unique(cut_data.event.to_numpy()), 'post_cut_events.npy')
    return (efficiencies)

In [7]:
#full_monty('data/cluster_DFtesting/105_7e-3/', '1a', 'testing_output')
full_monty('../FOM_merge&fit/12_12_18/', '1a', 'testing_output')


Opening files...
50
100
150
200
250
300
Applying cuts
Cutting events around fiducial volume related to:
Z range between 20 and 1170
Radius range < 415
Fiducial track cut
Relative Cut efficiency:
Efficiency: 45.85 %
Absolute Cut efficiency:
Efficiency: 45.85 %
One track cut
Relative Cut efficiency:
Efficiency: 69.16 %
Absolute Cut efficiency:
Efficiency: 62.26 %
Blob overlap cut
Relative Cut efficiency:
Efficiency: 88.49 %
Absolute Cut efficiency:
Efficiency: 79.53 %
Energy cut
Relative Cut efficiency:
Efficiency: 9.08 %
Absolute Cut efficiency:
Efficiency: 6.57 %
Calculating FOM


  fom.append(e[i]/np.sqrt(b[i]))
  fom.append(e[i]/np.sqrt(b[i]))


FOM values:
[1.00000000e+000 1.00000000e+000 1.01393877e+000 1.05708300e+000
 1.13092269e+000 1.24766681e+000 1.39698519e+000 1.51484650e+000
 1.66272349e+000 1.77710987e+000 1.95327908e+000 2.05830909e+000
 2.09457612e+000 2.14397098e+000 2.17314843e+000 2.23943953e+000
 2.37731884e+000 2.38369803e+000 2.41191742e+000 2.44730724e+000
 2.39436165e+000 2.41707970e+000 2.47982218e+000 2.60031893e+000
 2.49111470e+000 2.55764841e+000 2.76414314e+000 2.38666104e+000
 2.39733333e+000 2.29582493e+000 2.13536124e+000 1.79094814e+000
 1.42931438e+000 1.29220336e+000 9.52646270e-001 1.16325494e+000
 1.28654545e+000 9.91272727e-001 6.32727273e-001 1.79769313e+308
 1.79769313e+308 1.79769313e+308 1.79769313e+308 1.79769313e+308
 1.79769313e+308 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+

TypeError: expected str, bytes or os.PathLike object, not numpy.ndarray

need now to try and iterate over the outer folders in cluster_DFtesting

In [20]:
try:
    file_names = [f for f in os.listdir('data/cluster_DFtesting/')]
except:
    print("File path incorrect, please state the correct file path\n(but not any particular folder!)")


In [21]:
print(file_names)

['90_5e-3', '105_5e-3', '105_7e-3', '105_6e-3']


In [31]:
monty_output = []
# wipe out the previous output_test file
if os.path.exists('output_test.h5'):
    os.remove('output_test.h5')
for i in range(len(file_names)):
    path_ = 'data/cluster_DFtesting/' + str(file_names[i]) + "/"
    monty_output.append(full_monty(path_, '1a', 'loop_testing'))
    monty_output[i].to_hdf('output_test.h5', key=str(file_names[i]), mode = 'a', format='table', data_columns=True)


Opening files...
Applying cuts


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  remove_low_E['numb_of_tracks'] = remove_low_E['event'].map(event_counts)


Cutting events around fiducial volume related to:
Z range between 20 and 1195
Radius range < 472
Fiducial track cut
Relative Cut efficiency:
Efficiency: 65.36 %
Absolute Cut efficiency:
Efficiency: 65.36 %
One track cut
Relative Cut efficiency:
Efficiency: 72.18 %
Absolute Cut efficiency:
Efficiency: 65.99 %
Blob overlap cut
Relative Cut efficiency:
Efficiency: 54.15 %
Absolute Cut efficiency:
Efficiency: 46.24 %
Energy cut
Relative Cut efficiency:
Efficiency: 0.00 %
Absolute Cut efficiency:
Efficiency: 0.00 %
No events left in ROI... jobs done!
Calculating FOM


  if ((i%chunker) == 0):
  if ((i%chunker) == 0):


Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
FOM values:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Data written
Opening files...


  fom.append(e[i]/np.sqrt(b[i]))
  check_attribute_name(name)


Applying cuts
Cutting events around fiducial volume related to:
Z range between 20 and 1195
Radius range < 472
Fiducial track cut
Relative Cut efficiency:
Efficiency: 61.57 %
Absolute Cut efficiency:
Efficiency: 61.57 %
One track cut
Relative Cut efficiency:
Efficiency: 78.91 %
Absolute Cut efficiency:
Efficiency: 70.69 %
Blob overlap cut
Relative Cut efficiency:
Efficiency: 58.47 %
Absolute Cut efficiency:
Efficiency: 47.94 %
Energy cut
Relative Cut efficiency:
Efficiency: 0.00 %
Absolute Cut efficiency:
Efficiency: 0.00 %
No events left in ROI... jobs done!
Calculating FOM
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 

  check_attribute_name(name)


Applying cuts
Cutting events around fiducial volume related to:
Z range between 20 and 1195
Radius range < 472
Fiducial track cut
Relative Cut efficiency:
Efficiency: 64.53 %
Absolute Cut efficiency:
Efficiency: 64.53 %
One track cut
Relative Cut efficiency:
Efficiency: 75.76 %
Absolute Cut efficiency:
Efficiency: 70.11 %
Blob overlap cut
Relative Cut efficiency:
Efficiency: 57.71 %
Absolute Cut efficiency:
Efficiency: 46.65 %
Energy cut
Relative Cut efficiency:
Efficiency: 0.00 %
Absolute Cut efficiency:
Efficiency: 0.00 %
No events left in ROI... jobs done!
Calculating FOM
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 

  check_attribute_name(name)


Applying cuts
Cutting events around fiducial volume related to:
Z range between 20 and 1195
Radius range < 472
Fiducial track cut
Relative Cut efficiency:
Efficiency: 67.89 %
Absolute Cut efficiency:
Efficiency: 67.89 %
One track cut
Relative Cut efficiency:
Efficiency: 73.85 %
Absolute Cut efficiency:
Efficiency: 69.45 %
Blob overlap cut
Relative Cut efficiency:
Efficiency: 57.29 %
Absolute Cut efficiency:
Efficiency: 50.65 %
Energy cut
Relative Cut efficiency:
Efficiency: 0.00 %
Absolute Cut efficiency:
Efficiency: 0.00 %
No events left in ROI... jobs done!
Calculating FOM
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 0
Zero-div error, appending 

  check_attribute_name(name)


In [32]:
display(monty_output[1])

Unnamed: 0,Cut,Relative Efficiency,Relative Events,Single Cut Efficiency
0,No cuts,100.0,778.0,100.0
1,Fiducial Cuts,61.568123,479.0,61.568123
2,One track cut,78.914405,378.0,70.694087
3,Blob overlap cuts,58.465608,221.0,47.943445
4,Energy cuts,0.0,0.0,0.0
5,pos_evt - all_evt,0.0,0.0,0.0
6,FOM_MAX - blob2_E_val (MeV),0.0,0.0,0.0


In [None]:
# save each dataframe as a page in h5

