In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import simweights
import pickle
import os, sys
import numpy as np
import matplotlib as mat
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.colors as colors
import matplotlib.gridspec as gridspec
import pandas as pd
import tables
import h5py
import math
from scipy.stats import mstats
import matplotlib as mpl
import matplotlib.font_manager as font_manager
from fonts import *



In [3]:
sys.path.append("/data/user/tvaneede/GlobalFit/EventGenerator/performance")
from weights import *
from utils import *
from files import files
from selections import create_selections

In [4]:
# version and plotting path
version = "v0"
files = files[version]


In [5]:
# open the files
for flavor in files:
    files[flavor]['hdf_file'] = pd.HDFStore(files[flavor]['file_path'],'r')
    files[flavor]['variables'] = get_variables( files[flavor]['hdf_file'] )
    files[flavor] = Append_Weights( files[flavor] )


In [25]:
# Collect table data
table_rows = []
total_files = 0
total_events = 0
for simulation in files:
    energy = files[simulation]['variables']['PrimaryNeutrinoEnergy']
    nfiles = files[simulation]['nfiles']
    nevents = len(energy)
    events_per_file = nevents / nfiles
    simulation = simulation.replace("_E2","").replace("_E3","")
    total_files += nfiles
    total_events += nevents
    table_rows.append(f"{simulation} & {np.log10(min(energy)):.1f}  & {np.log10(max(energy)):.1f} & {nfiles} & {nevents} & {events_per_file:.1f} \\\\")

table_rows.append(f"\hline")
table_rows.append(f"All &  & & {total_files} & {total_events} & {total_events/total_files:.1f} \\\\")


# Create LaTeX table
latex_table = r"""\begin{table}[h]
\centering
\begin{tabular}{l|c|c|c|c|c}
\hline
Simulation & $\log_{10}(E_\nu^{\rm min})$ & $\log_{10}(E_\nu^{\rm max})$ & nfiles & evts & evts/file \\
\hline
""" + "\n".join(table_rows) + r"""
\hline
\end{tabular}
\caption{Summary of event statistics for different simulations.}
\label{tab:events_summary}
\end{table}"""

# Print LaTeX table
print(latex_table)


\begin{table}[h]
\centering
\begin{tabular}{l|c|c|c|c|c}
\hline
Simulation & $\log_{10}(E_\nu^{\rm min})$ & $\log_{10}(E_\nu^{\rm max})$ & nfiles & evts & evts/file \\
\hline
NuMu & 4.3  & 6.0 & 675 & 7128 & 10.6 \\
NuMu & 6.0  & 8.0 & 1000 & 3109 & 3.1 \\
NuE & 4.2  & 6.0 & 1000 & 20938 & 20.9 \\
NuE & 6.0  & 8.0 & 995 & 2900 & 2.9 \\
NuTau & 4.3  & 6.0 & 1000 & 11184 & 11.2 \\
NuTau & 6.0  & 8.0 & 999 & 4195 & 4.2 \\
\hline
All &  & & 5669 & 49454 & 8.7 \\
\hline
\end{tabular}
\caption{Summary of event statistics for different simulations.}
\label{tab:events_summary}
\end{table}


In [6]:
keys_to_merge = {
    "NuE" : ["NuE_E2", "NuE_E3"],
    "NuMu" : ["NuMu_E2", "NuMu_E3"],
    "NuTau" : ["NuTau_E2", "NuTau_E3"],
    "NuAll" : ['NuE', "NuMu", "NuTau"],
}

for new_key in keys_to_merge:
    files[new_key] = {}
    files[new_key]['variables'] = {}

    for key in keys_to_merge[new_key]:
        for variable in files[key]['variables']:
            if variable not in files[new_key]['variables']:
                files[new_key]['variables'][variable] = []
            files[new_key]['variables'][variable].append( pd.Series(files[key]['variables'][variable]) )
    
    for variable in files[new_key]['variables']:
        files[new_key]['variables'][variable] = pd.concat(files[new_key]['variables'][variable])


In [6]:
from selections import create_selections

In [7]:
selections = create_selections( files["NuAll"] )

KeyError: 'NuAll'

In [None]:



# check energy range files, number of events, files
for simulation in files:
    energy = files[simulation]['variables']['PrimaryNeutrinoEnergy']
    recoenergy = files[simulation]['variables']['RecoETot']
    print(20*"-", simulation)
    # print("PrimaryNeutrinoEnergy", "min", np.log10(min(energy)), "max", np.log10(max(energy)) )
    # print("RecoETot", "min", np.log10(min(recoenergy)), "max", np.log10(max(recoenergy)) )
    print("nfiles", files[simulation]['nfiles'])
    print("evts", len(energy), "evts/file", len(energy)/files[simulation]['nfiles'])

-------------------- NuMu_E2
nfiles 675
evts 7128 evts/file 10.56
-------------------- NuMu_E3
nfiles 1000
evts 3109 evts/file 3.109
-------------------- NuE_E2
nfiles 1000
evts 20938 evts/file 20.938
-------------------- NuE_E3
nfiles 995
evts 2900 evts/file 2.9145728643216082
-------------------- NuTau_E2
nfiles 1000
evts 11184 evts/file 11.184
-------------------- NuTau_E3
nfiles 999
evts 4195 evts/file 4.1991991991991995
-------------------- NuE


KeyError: 'nfiles'

In [22]:
# check true double cascades

for selection_name in ["TrueDoubleCascade", "DoubleCascade_TrueDoubleCascade", "DoubleCascadeEvtGen_TrueDoubleCascade"]:

    selection = selections[selection_name]

    true_type = files["NuAll"]['variables']["PrimaryNeutrinoType"][selection]
    print(20*"-", selection_name)
    print( len(true_type), sum( true_type == -16.0 ), sum( true_type == 16.0 ) )
    print( sum( true_type == -16.0 ) + sum( true_type == 16.0 ) == len(true_type) )


-------------------- TrueDoubleCascade
9207 4494 4713
True
-------------------- DoubleCascade_TrueDoubleCascade
2181 1078 1103
True
-------------------- DoubleCascadeEvtGen_TrueDoubleCascade
2145 1057 1088
True
