In [1]:
import numpy as np
import matplotlib.pyplot as plt
import uproot3 as uproot
import pandas as pd

# Variables 

In [2]:
kine_vars   = ['kine_reco_Enu']
bdt_vars    = ['numu_cc_flag','nue_score']
pot_vars    = ['pot_tor875']
pfeval_vars = ['truth_corr_nuvtxX','truth_corr_nuvtxY','truth_corr_nuvtxZ']
eval_vars   = ['truth_isCC','truth_nuPdg','truth_vtxInside','weight_spline', 'weight_cv']

# Functions

In [3]:
def create_dataframe(file, family):

    # --- import trees and variables
    T_pot = uproot.open(file)['wcpselection/T_pot']
    df_pot = T_pot.pandas.df(pot_vars, flatten=False)

    T_KINE = uproot.open(file)['wcpselection/T_KINEvars']
    df_KINE = T_KINE.pandas.df(kine_vars, flatten=False)

    T_BDT = uproot.open(file)['wcpselection/T_BDTvars']
    df_BDT = T_BDT.pandas.df(bdt_vars, flatten=False)
            
    T_PFeval = uproot.open(file)['wcpselection/T_PFeval']
    df_PFeval = T_PFeval.pandas.df(pfeval_vars, flatten=False)

    T_eval = uproot.open(file)['wcpselection/T_eval']
    df_eval = T_eval.pandas.df(eval_vars, flatten=False)

    # --- merge dataframes        
    df = pd.concat([df_KINE, df_PFeval, df_BDT, df_eval], axis=1)

    # -------------------------------------------------- #
    #     calculate cos_theta wrt the beam direction     #
    # -------------------------------------------------- #
    
    T_PFeval_cos_theta = uproot.open(file)['wcpselection/T_PFeval']
    df_PFeval_cos_theta = T_PFeval_cos_theta.pandas.df("reco_showerMomentum", flatten=False)

    # get vectors
    v_targ_uboone = [-31387.58422, -3316.402543, -60100.2414]
    v_shower_direction = [df_PFeval_cos_theta['reco_showerMomentum[0]'],df_PFeval_cos_theta['reco_showerMomentum[1]'],df_PFeval_cos_theta['reco_showerMomentum[2]']]

    # normalise vectors
    unit_v_targ_uboone = v_targ_uboone / np.linalg.norm(v_targ_uboone)
    unit_v_shower_direction = v_shower_direction / np.linalg.norm(v_shower_direction)

    # calculate cos theta
    cos_theta = np.dot(-unit_v_targ_uboone,unit_v_shower_direction)

    df.loc[:,'cos_theta'] = cos_theta

    # ------------------- #
    #    calculate POT    #
    # ------------------- #

    POT = sum(df_pot.pot_tor875)
    #print('POT = %.2e' % POT)

    # ----------------- #
    #    fix weights    #
    # ----------------- #

    # --- make sure weights are valid numbers  
  
    df.loc[ df['weight_cv']<=0, 'weight_cv' ] = 1
    df.loc[ df['weight_cv']>30, 'weight_cv' ] = 1
    df.loc[ df['weight_cv']==np.nan, 'weight_cv' ] = 1
    df.loc[ df['weight_cv']==np.inf, 'weight_cv' ] = 1
    df.loc[ df['weight_cv'].isna(), 'weight_cv' ] = 1
    df.loc[ df['weight_spline']<=0, 'weight_spline' ] = 1
    df.loc[ df['weight_spline']>30, 'weight_spline' ] = 1
    df.loc[ df['weight_spline']==np.nan, 'weight_spline' ] = 1
    df.loc[ df['weight_spline']==np.inf, 'weight_spline' ] = 1
    df.loc[ df['weight_spline'].isna(), 'weight_spline'] = 1

    # --- calculate weight

    if(family=='NUE'): W_ = 1
    elif(family=='MC'): W_ = 1#POT/POT_NUE

    #print('W_ = %.2e' % W_)

    df.loc[:,'weight_genie'] = df['weight_cv']*df['weight_spline']
    df.loc[:,'weight'] = [W_]*df.shape[0]*df['weight_genie']

    # -------------------------------------------------------------------------------------- intrinsuc nue/overlay tag

    # variable created to classify signal and background dataframes

    if(family=='NUE'): df.loc[:,'original_file'] = 0
    elif(family=='MC'): df.loc[:,'original_file'] = 1

    # -------------------------------------------------------------------------------------- delete dataframes

    del df_pot
    del df_KINE
    del df_BDT 
    del df_PFeval 
    del df_eval

    return df, POT

# Open files

In [4]:
filename_nue = '../rootfiles/checkout_prodgenie_numi_intrinsic_nue_overlay_run1_OFFSETFIXED2.root'
filename_overlay = '../rootfiles/checkout_prodgenie_numi_overlay_run1.root'

df_intrinsic_nue, POT_NUE = create_dataframe(filename_nue,'NUE')
df_overlay, POT_MC = create_dataframe(filename_overlay,'MC')

In [6]:
def define_signal(df):
    df_ = df[ (df.truth_nuPdg==-12) | (df.truth_nuPdg==12) ]                # PDG definition
    df_ = df_[df_.truth_isCC==1]                                            # apply CC interaction condition 
    df_ = df_[df_.truth_vtxInside==1]                                       # apply in active volume condition
    df_ = apply_gen_nu_selection(df_)                                       # apply generic neutrino selection
    df_ = apply_vtx_quality(df_)                                            # check reco-true vertex distance
    return df_
    
def define_background(df):
    df_ = df[ (df.truth_nuPdg!=-12) & (df.truth_nuPdg!=12) ]
    df_ = df_[df_.truth_isCC==1]
    df_ = df_[df_.truth_vtxInside==1]
    df_ = apply_gen_nu_selection(df_)
    return df_