# Performance plots

In [1]:
%load_ext autoreload
%autoreload 2

import math
from itertools import product
import uproot
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

import matplotlib
matplotlib.rcParams.update({'font.size': 12})

from calorimetry_likelihood.calo_likelihood import caloLikelihood
from calorimetry_likelihood.general_functions import particle_for_test, selection_for_test, selection_planes, compute_pid, add_norm_variable
from calorimetry_likelihood.loaders import load_data
from calorimetry_likelihood.hard_coded import branches_lookup
from calorimetry_likelihood.initializers import pdgid2name, initialisation_for_caloriemtry_data_mc

## Define filename and type of separation

In [2]:
# pdg_codes = [321, 2212]
pdg_codes = [13, 2212]
input_filename = '~/Desktop/MicroBooNE/calorimetry_likelihood/v08/bnb_nu/out_mc_skimmed.root'
test_fraction = 0.95

lookup_calo_filename = f'../dumped_objects/{pdgid2name[pdg_codes[0]]}_{pdgid2name[pdg_codes[1]]}_lookup.dat'

In [3]:
output_plot_folder = '/home/nic/Desktop/MicroBooNE/calorimetry_likelihood/trial/'
!mkdir -p $output_plot_folder

## Load data

In [4]:
array, pot = load_data(filename=input_filename, 
                              branches=branches_lookup, 
                              fraction=test_fraction, 
                              before=False)
initialisation_for_caloriemtry_data_mc(array)

start loading ~/Desktop/MicroBooNE/calorimetry_likelihood/v08/bnb_nu/out_mc_skimmed.root
Done!


  return 0.3 / cos


## Load lookup table table and compute PID

In [5]:
caloLike_test = caloLikelihood()
caloLike_test.load(lookup_calo_filename)
caloLike_test.setArray(array, quality_mask=selection_for_test(array), quality_masks_planes=selection_planes(array))
compute_pid(caloLike_test.array, lookup_calo_filename)

## Plot variable distributions

In [6]:
output_plot_folder_performance = output_plot_folder + 'performance_plots/'
!mkdir -p $output_plot_folder_performance

In [7]:
from calorimetry_likelihood.hard_coded import variable_labels, variable_labels_fancy

plot_variables = [
                         'trk_pid_chipr_y',
#                          'trk_pid_chimu_y',
#                          'trk_bragg_p_y',
#                          'trk_bragg_mu_y',
#                          'trk_bragg_p_three_planes',
#                          'llr_sum_0',
#                          'llr_sum_1',
                         'llr_sum_2',
#                          'llr_01',
                         'llr_012',
                        ]

for variable in plot_variables:
    if 'llr' in variable:
        scale = 100
    elif 'bragg' in variable:
        scale = 1
    else:
        scale = 20
    add_norm_variable(caloLike_test.array, variable, scale)

  array[var+'_n'] = 2/np.pi*np.arctan(array[var]/scale)


In [8]:
bins = 41
range = [-1, 1]
kwargs = {'histtype':'step'}
for var in plot_variables:
    variable = var + '_n'
    caloLike_test.plotVariableMC(variable, bins, range, function_mask=particle_for_test, 
                                 kwargs_function_mask={'pdgid':pdg_codes[0]}, quality_mask=True, 
                                 label=pdgid2name[pdg_codes[0]], **kwargs);
    caloLike_test.plotVariableMC(variable, bins, range, function_mask=particle_for_test, 
                                 kwargs_function_mask={'pdgid':pdg_codes[1]}, quality_mask=True, 
                                 label=pdgid2name[pdg_codes[1]], **kwargs);
    caloLike_test.plotVariableMCFancy(variable, bins, range, function_mask=particle_for_test, 
                                      kwargs_function_mask={'pdgid':0}, quality_mask=True, 
                                      label='cosmic', **kwargs);
    plt.legend(loc='upper left')
    plt.xlabel(variable_labels[var])
    plt.tight_layout()
    plt.savefig(output_plot_folder_performance + variable + '.png', dpi=250)
    plt.savefig(output_plot_folder_performance + variable + '.pdf')
    plt.close()

# ROC curves

In [9]:
out = []
with_vertical_line = None
for variable in plot_variables:
    out.append(caloLike_test.rocCurve(variable=variable+'_n', 
         pdg_codes=pdg_codes, 
         plot=True,
         variable_label=variable_labels[variable]))
#     for mis_id in [0.01, 0.05, 0.1, 0.15, 0.2]:
#         print(f"var = {variable} - cut_value {np.interp(mis_id, out[i][1][::-1], out[i][3][::-1]):.3g}, mis-id {100*mis_id}%, eff {100*np.interp(mis_id, out[i][1][::-1], out[i][2][::-1]):.3g}%")

if with_vertical_line is not None:
    plt.vlines(with_vertical_line, 0., plt.gca().get_ylim()[1], 'purple')
plt.title('MicroBooNE Simulation', loc='left')
plt.title('', loc='right')
plt.xlabel(f'{pdgid2name[pdg_codes[0]]} mis-identification rate')
plt.ylabel(f'{pdgid2name[pdg_codes[1]]} identification efficiency')

if with_vertical_line is not None:
    plt.savefig(output_plot_folder_performance + 'roc_curves_with_vline.pdf', bbox_inches='tight')
    plt.savefig(output_plot_folder_performance + 'roc_curves_with_vline.png', dpi=250)
else:
    plt.savefig(output_plot_folder_performance + 'roc_curves.pdf', bbox_inches='tight')
    plt.savefig(output_plot_folder_performance + 'roc_curves.png', dpi=250)
    
plt.close()

In [10]:
# roc curves 1d
def selection_function(array, parameter, bin_range):
    return (array[parameter] >= bin_range[0]) &\
           (array[parameter] < bin_range[1])

parameter_names = ['trk_theta', 'trk_phi', 'trk_len']
parameter_bin_edges = [np.linspace(0, np.pi, 20), np.linspace(-np.pi, np.pi, 20), np.linspace(0, 100, 20)]
paramter_labels = [r'Track $\theta$', r'Track $\phi$', 'Track length [cm]']

for name, bin_edges, label in zip(parameter_names, parameter_bin_edges, paramter_labels):
    for variable in plot_variables:
        caloLike_test.auc1D(variable+'_n', 
                            pdg_codes, 
                            selection_function, 
                            name, 
                            bin_edges,
                            legend_label=variable_labels[variable])
    
    plt.xlabel(label)
    plt.legend()
    plt.title(f'{pdgid2name[pdg_codes[0]]}/{pdgid2name[pdg_codes[1]]} separation\nSimulated tracks', loc='left')
    plt.tight_layout()
    plt.savefig(output_plot_folder_performance+'auc1d_'+name+'.png', dpi=250)
    plt.savefig(output_plot_folder_performance+'auc1d_'+name+'.pdf')
    plt.close()

In [11]:
# roc curves 2d
def selection_function(array, parameters_names, bin_ranges):
    return (array[parameters_names[0]] >= bin_ranges[0][0]) &\
           (array[parameters_names[0]] < bin_ranges[0][1]) &\
           (array[parameters_names[1]] >= bin_ranges[1][0]) &\
           (array[parameters_names[1]] < bin_ranges[1][1])

parameters_names = ['trk_theta', 'trk_phi']
parameter_bin_edges = (np.linspace(0, np.pi, 6), np.linspace(-np.pi, np.pi, 10))

for variable in plot_variables:
    caloLike_test.auc2D(variable+'_n', 
                            pdg_codes, 
                            selection_function, 
                            parameters_names, 
                            parameter_bin_edges)
    plt.xlabel(r'Track $\theta$')
    plt.ylabel(r'Track $\phi$')
    plt.title(f'{pdgid2name[pdg_codes[0]]}/{pdgid2name[pdg_codes[1]]} separation\nSimulated tracks', loc='left')
    plt.tight_layout()
    plt.savefig(output_plot_folder_performance+'auc2d'+variable+'.png', dpi=250)
    plt.savefig(output_plot_folder_performance+'auc2d'+variable+'.pdf')
    plt.close()

# 2D plots dedx vs rr

In [23]:
output_plot_folder_dedx_vs_rr = output_plot_folder + 'dedx_vs_rr/'
!mkdir -p $output_plot_folder_dedx_vs_rr/plane0
!mkdir -p $output_plot_folder_dedx_vs_rr/plane1
!mkdir -p $output_plot_folder_dedx_vs_rr/plane2

In [24]:
pitch_binning = [0.3, 0.6, 1, 1.5, 3, 30]
binning = [50, 50]
range = [[0, 40], [0, 20]]

array = caloLike_test.array
selection_test = caloLike_test.quality_mask
selection_test_planes = caloLike_test.quality_masks_planes

for i, (plane, plane_label) in enumerate(zip(['_u', '_v', '_y'], ['First induction', 'Second induction', 'Collection'])):
    for pitch_low, pitch_high in zip(pitch_binning[:-1], pitch_binning[1:]):
        pitch_mask = (array['pitch'+plane] >= pitch_low) & (array['pitch'+plane] < pitch_high)
    
        for pdg, pdg_label in zip(pdg_codes, [pdgid2name[pdg_codes[0]], pdgid2name[pdg_codes[1]]]):
            pdg_mask = np.abs(array['backtracked_pdg'])==pdg
            total_mask = selection_test & selection_test_planes[i] & pitch_mask & pdg_mask
            
            dedx = array['dedx'+plane][total_mask].flatten()
            rr = array['rr'+plane][total_mask].flatten()
            
            plt.hist2d(rr, dedx, bins=binning, range=range)
            plt.title('MicroBooNE In Progress', loc='right')
            plt.title('Simulated tracks backtracked to {}\n'.format(pdg_label)+
                      'fully contained, completeness, purity > 90%\n'+
                      'only hits with {:.2g} cm <= pitch < {:.2g} cm\n'.format(pitch_low, pitch_high)+
                      '{} plane'.format(plane_label), loc='left')
            plt.xlabel('Residual range [cm]')
            plt.ylabel('dE/dx [MeV/cm]')
            plt.tight_layout()
            plt.savefig(output_plot_folder_dedx_vs_rr+'plane{}/pdg_{}_{:.1f}_pitch_{:.1f}'.format(i, pdg, pitch_low, pitch_high).replace('.', '')+'.pdf')
            plt.savefig(output_plot_folder_dedx_vs_rr+'plane{}/pdg_{}_{:.1f}_pitch_{:.1f}'.format(i, pdg, pitch_low, pitch_high).replace('.', '')+'.png', dpi=250)
            plt.close()