## Setup

In [None]:
# plot event rate variations, fractional uncertainties, & data/MC comparisons 
# for all sources of systematic error
# also consider potential NuMI oscillations on the event rate 
# make sure to update the plots_path here & in backend function scripts before saving

In [None]:
import sys
import selection_functions as sf

import importlib

import uproot
import matplotlib.pylab as pylab
import numpy as np
import math
from sklearn.model_selection import train_test_split
import pickle
import xgboost as xgb


import awkward
import matplotlib.pyplot as plt
import pandas as pd

import ROOT


In [None]:
from datetime import datetime
import time
now = datetime.now()
date_time = now.strftime("%H:%M:%S")
print("date and time:",date_time)

In [None]:
import NuMIGeoWeights
importlib.reload(NuMIGeoWeights)

# the default option is FHC, RHC needs different arguments
numiBeamlineGeoWeights = NuMIGeoWeights.NuMIGeoWeights() 

In [None]:
import NuMIDetSys
importlib.reload(NuMIDetSys)

NuMIDetSysWeights = NuMIDetSys.NuMIDetSys()

In [None]:
importlib.reload(sf)
from selection_functions import *

In [None]:
# Doing Run3??
ISRUN3 = False


In [None]:
# use nue intrinsic? 
NUE_INTRINSIC = True

In [None]:
if ISRUN3: 
    plots_path = "/uboone/data/users/kmiller/searchingfornues_v33/v08_00_00_33/plots/rhc/"

else: 
    plots_path = "/uboone/data/users/kmiller/searchingfornues_v33/v08_00_00_33/plots/fhc/"


In [None]:
# POT normalization factors

# FHC
if not ISRUN3:  
    overlay_pot =  2.33652E21  # v7       
    dirt_pot = 1.67392E21 # david's file
    beamon_pot = 2.0E20 # v5

    beamon_ntrig =  5268051.0 # v5 (EA9CNT_wcut)
    beamoff_ntrig = 9199232.74  # v5 (EXT_NUMIwin_FEMBeamTriggerAlgo)
    
    if NUE_INTRINSIC: 
        nue_intrinsic_pot = 2.37838E22 # v7
    

# RHC 
else: 
    overlay_pot =  1.98937E21 # v7
    dirt_pot = 1.03226E21 # v6
    beamon_pot = 5.0E20 # v5
    
    beamon_ntrig = 10363728.0 # v5
    beamoff_ntrig =  32878305.25 # v5
        
    if NUE_INTRINSIC: 
        nue_intrinsic_pot = 2.5345E22 # v7
    


In [None]:
fold = "nuselection"
tree = "NeutrinoSelectionFilter"

DATA = ""
EXT = ""
OVRLY  = ""
DRT = ""
NUE = ""


# reduced with opening angle 
path = ''

if not ISRUN3: 
    
    path = '/uboone/data/users/kmiller/ntuples/run1/qualcuts/'#nuepresel/'
    
    # Run 1 FHC 
    OVRLY = 'neutrinoselection_filt_run1_overlay_v7'
    EXT = 'neutrinoselection_filt_run1_beamoff_v5'
    DATA = 'neutrinoselection_filt_run1_beamon_beamgood_v5'
    DRT = 'prodgenie_numi_uboone_overlay_dirt_fhc_mcc9_run1_v28_all_snapshot'
    
    if NUE_INTRINSIC: 
        NUE = 'neutrinoselection_filt_run1_overlay_intrinsic_v7'

else: 
    
    path = '/uboone/data/users/kmiller/ntuples/run3b/nuepresel/'
    
    # Run 3 RHC
    OVRLY = 'neutrinoselection_filt_run3b_overlay_v7'
    DATA = 'neutrinoselection_filt_run3b_beamon_beamgood_v5'
    EXT = 'neutrinoselection_filt_run3b_beamoff_v5'
    DRT = 'neutrinoselection_filt_run3b_dirt_overlay_v6'
    
    if NUE_INTRINSIC: 
        NUE = 'neutrinoselection_filt_run3b_overlay_intrinsic_v7'



In [None]:
overlay = uproot.open(path+OVRLY+".root")[fold][tree]
data = uproot.open(path+DATA+".root")[fold][tree]
ext = uproot.open(path+EXT+".root")[fold][tree]
dirt = uproot.open(path+DRT+".root")[fold][tree]  

uproot_v = [overlay,data,ext,dirt]

if NUE_INTRINSIC: 
    nue = uproot.open(path+NUE+".root")[fold][tree]
    uproot_v.append(nue)


In [None]:
variables = [
    "selected", "nu_pdg", "shr_theta", "true_e_visible", 
    "trk_score_v", 
    "shr_tkfit_dedx_Y", "ccnc", "n_tracks_contained", 
    "NeutrinoEnergy2",
    "reco_nu_vtx_sce_x","reco_nu_vtx_sce_y","reco_nu_vtx_sce_z",
    "shrsubclusters0","shrsubclusters1","shrsubclusters2", # number of sub-clusters in shower
    "trkshrhitdist2",
    "nproton", "nu_e", "n_showers_contained", "nu_purity_from_pfp", 
    "shr_phi", "trk_phi", "trk_theta",
    "shr_score", 
    "trk_energy", "tksh_distance", "tksh_angle",
    "npi0",
    "shr_energy_tot_cali",  
    "nslice", 
    "contained_fraction",
    "true_nu_vtx_x", "true_nu_vtx_y" , "true_nu_vtx_z", 
    "npion", "shr_energy_cali", 
    "shrmoliereavg", "shr_px", "shr_py", "shr_pz"
]


In [None]:
# MC only variables
mc_var = ["weightTune", "weightSpline", "weightSplineTimesTune", "true_nu_px", "true_nu_py", "true_nu_pz", 
            "elec_e", "proton_e", "mc_px", "mc_py", "mc_pz", "elec_px", "elec_py", "elec_pz", 
            "swtrig_pre", "ppfx_cv", "mc_pdg"]#, "opening_angle"]

sys_genie = ["weightsGenie", "weightsReint"]
sys_flux = ['weightsPPFX']


## Create pandas dataframes

In [None]:
overlay = overlay.pandas.df(variables + mc_var + sys_genie + sys_flux, flatten=False)

In [None]:
overlay['weightsPPFX'] = overlay['weightsPPFX']/1000
overlay['weightsReint'] = overlay['weightsReint']/1000
overlay['weightsGenie'] = overlay['weightsGenie']/1000

In [None]:
dirt = dirt.pandas.df(variables + mc_var + sys_genie, flatten=False)

In [None]:
# no flux weights exist for dirt (yet?)
dirt['weightsPPFX'] = [[1 for x in range(len(overlay['weightsPPFX'].iloc[0]))] for y in range(len(dirt))]

In [None]:
if NUE_INTRINSIC: 
    nue = nue.pandas.df(variables + mc_var + sys_genie + sys_flux, flatten=False)
    nue['weightsPPFX'] = nue['weightsPPFX']/1000
    nue['weightsGenie'] = nue['weightsGenie']/1000
    nue['weightsReint'] = nue['weightsReint']/1000

In [None]:
data = data.pandas.df(variables, flatten=False) 

In [None]:
ext = ext.pandas.df(variables, flatten=False)

In [None]:
# make dataframes equal # of columns 

for var in mc_var+sys_genie+sys_flux: 
    data[var] = np.nan
    ext[var] = np.nan

In [None]:
# how to get the LLR-PID value for the "track candidate" 
# (proton for nue selection, muon for numu)
# can be done for any variable
# code from Giuseppe!
#LLR-PID : log likelihood ratio particle ID 

df_v = [overlay,data,ext,dirt]

if NUE_INTRINSIC: 
    df_v.append(nue)
    
for i,df in enumerate(df_v):
    up = uproot_v[i]
    trk_llr_pid_v = up.array('trk_llr_pid_score_v')
    trk_id = up.array('trk_id')-1 # I think we need this -1 to get the right result
    trk_llr_pid_v_sel = awkward.fromiter([pidv[tid] if tid<len(pidv) else 9999. for pidv,tid in zip(trk_llr_pid_v,trk_id)])
    df['trkpid'] = trk_llr_pid_v_sel
    df['subcluster'] = df['shrsubclusters0'] + df['shrsubclusters1'] + df['shrsubclusters2']
    
    df['NeutrinoEnergy2_GeV'] = df['NeutrinoEnergy2']/1000


In [None]:
# Add truth level theta & phi angles (detector & beam coordinates)
overlay = addAngles(overlay)

In [None]:
dirt = addAngles(dirt)

In [None]:
if NUE_INTRINSIC: 
    nue = addAngles(nue)

In [None]:
mc_df = [overlay, dirt]

if NUE_INTRINSIC: 
    mc_df.append(nue)

In [None]:
# add beamline geometry weights 

for i,df in enumerate(mc_df):
    df['weightsNuMIGeo'] = df.apply( lambda x: numiBeamlineGeoWeights.calculateGeoWeight(x['nu_pdg'],x['nu_e'],x['thbeam']) , axis=1)
    

In [None]:
# make dataframes equal # of columns 

nan_var = ['thdet', 'phidet', 'true_nu_px_beam', 'true_nu_py_beam', 'true_nu_pz_beam', 
           'thbeam', 'phibeam','weightsNuMIGeo']

for var in nan_var: 
    data[var] = np.nan
    ext[var] = np.nan

In [None]:
# np.setdiff1d(ext.columns,overlay.columns)
# ext.columns == overlay.columns

## Weights

In [None]:
# cuts applied for bad weights 
for i,df in enumerate(mc_df):
    df.loc[ df['weightSplineTimesTune'] <= 0, 'weightSplineTimesTune' ] = 1.
    df.loc[ df['weightSplineTimesTune'] == np.inf, 'weightSplineTimesTune' ] = 1.
    df.loc[ df['weightSplineTimesTune'] > 100, 'weightSplineTimesTune' ] = 1.
    df.loc[ np.isnan(df['weightSplineTimesTune']) == True, 'weightSplineTimesTune' ] = 1.
    
    df.loc[ df['weightTune'] <= 0, 'weightTune' ] = 1.
    df.loc[ df['weightTune'] == np.inf, 'weightTune' ] = 1.
    df.loc[ df['weightTune'] > 100, 'weightTune' ] = 1.
    df.loc[ np.isnan(df['weightTune']) == True, 'weightTune' ] = 1.  
    
    for ievt in range(df.shape[0]):
        reweightCondition = ((df['weightsGenie'].iloc[ievt] > 60) | (df['weightsGenie'].iloc[ievt] < 0)  | 
                             (df['weightsGenie'].iloc[ievt] == np.inf) | (df['weightsGenie'].iloc[ievt] == np.nan))
        df['weightsGenie'].iloc[ievt][ reweightCondition ] = 1.
        
        # if no variations exist for the event
        if not list(df['weightsGenie'].iloc[ievt]): 
            df['weightsGenie'].iloc[ievt] = [1.0 for k in range(600)]
        
        reweightCondition2 = ((df['weightsReint'].iloc[ievt] > 60) | (df['weightsReint'].iloc[ievt] < 0)  | 
                             (df['weightsReint'].iloc[ievt] == np.inf) | (df['weightsReint'].iloc[ievt] == np.nan))
        df['weightsReint'].iloc[ievt][ reweightCondition2 ] = 1.
        
        # if no variations exist for the event
        if not list(df['weightsReint'].iloc[ievt]): 
            df['weightsReint'].iloc[ievt] = [1.0 for k in range(1000)]

In [None]:
# pot scaling weights 

# applied tunes 
dirt_tune = 0.35
ext_tune = 1

# Run 1
if not ISRUN3: 
    ext_tune = .98

# Run 3
else: 
    ext_tune = .94
    
    
##############################################
# SCALE  TO BEAM ON POT
overlay_scale_to_data = beamon_pot/overlay_pot

if NUE_INTRINSIC: 
    nue_scale_to_data = beamon_pot/nue_intrinsic_pot

dirt_scale_to_data = dirt_tune*(beamon_pot/dirt_pot)
beamoff_scale_to_data = ext_tune*(beamon_ntrig/beamoff_ntrig) # scale factor to beam on POT

overlay['pot_scale'] = overlay_scale_to_data

if NUE_INTRINSIC: 
    nue['pot_scale'] = nue_scale_to_data
    
dirt['pot_scale'] = dirt_scale_to_data
ext['pot_scale'] = beamoff_scale_to_data
data['pot_scale'] = [1 for x in range(len(data))]
##############################################
# SCALE TO OVERLAY

dirt_scale_to_overlay = dirt_tune*(overlay_pot/dirt_pot)
beamoff_scale_to_overlay = ext_tune*((overlay_pot/beamon_pot)*(beamon_ntrig/beamoff_ntrig))

if NUE_INTRINSIC: 
    nue_scale_to_overlay = overlay_pot/nue_intrinsic_pot

overlay['pot_scale_overlay'] = [1 for x in range(len(overlay))]
if NUE_INTRINSIC: 
    nue['pot_scale_overlay'] = nue_scale_to_overlay
    
dirt['pot_scale_overlay'] = dirt_scale_to_overlay
ext['pot_scale_overlay'] = beamoff_scale_to_overlay
data['pot_scale_overlay'] = [1 for x in range(len(data))]
##############################################
# SCALE TO PROJECTED 
proj_pot = 0.0

if not ISRUN3: 
    proj_pot = 9.23E20 # FHC
else: 
    proj_pot = 11.95E20 # RHC

overlay_scale_to_proj = proj_pot/overlay_pot
dirt_scale_to_proj = dirt_tune*(proj_pot/dirt_pot)

if NUE_INTRINSIC: 
    nue_scale_to_proj = proj_pot/nue_intrinsic_pot

# first scale to beamon, then scale to projected
beamoff_scale_to_proj = (ext_tune*(beamon_ntrig/beamoff_ntrig)) * (proj_pot/beamon_pot)

overlay['pot_scale_proj'] = overlay_scale_to_proj

if NUE_INTRINSIC: 
    nue['pot_scale_proj'] = nue_scale_to_proj
    
dirt['pot_scale_proj'] = dirt_scale_to_proj
ext['pot_scale_proj'] = beamoff_scale_to_proj
data['pot_scale_proj'] = [1 for x in range(len(data))]

    
    

In [None]:
# total weights 

# combined genie * POT weight * flux weight (now using online)
# ext gets POT weight only 

flux_weight = 'ppfx_cv'

################################################################
# totweight scales to BEAMON

# tuned
overlay['totweight'] = overlay['pot_scale']*overlay[flux_weight]*overlay['weightSplineTimesTune']
dirt['totweight'] = dirt['pot_scale']*dirt[flux_weight]*dirt['weightSplineTimesTune']

if NUE_INTRINSIC: 
    nue['totweight'] = nue['pot_scale']*nue[flux_weight]*nue['weightSplineTimesTune']


################################################################
# totweight_overlay scales to STANDARD OVERLAY

# tuned
overlay['totweight_overlay'] = overlay[flux_weight]*overlay['weightSplineTimesTune']
dirt['totweight_overlay'] = dirt['pot_scale_overlay']*dirt[flux_weight]*dirt['weightSplineTimesTune']

if NUE_INTRINSIC:
    nue['totweight_overlay'] = nue['pot_scale_overlay']*nue[flux_weight]*nue['weightSplineTimesTune']

################################################################
# totweight_proj scales to TOTAL PROJECTED BEAM ON 

overlay['totweight_proj'] = overlay['pot_scale_proj']*overlay[flux_weight]*overlay['weightSplineTimesTune']
dirt['totweight_proj'] = dirt['pot_scale_proj']*dirt[flux_weight]*dirt['weightSplineTimesTune']

if NUE_INTRINSIC:
    nue['totweight_proj'] = nue['pot_scale_proj']*nue[flux_weight]*nue['weightSplineTimesTune']


In [None]:
# to keep the number of columns the same 
new_var = ['totweight', 'totweight_overlay', 'totweight_proj']

for var in new_var: 
    for df in [data, ext]: 
        df[var] = np.nan

## Categories

In [None]:
in_fv_query = "10<=true_nu_vtx_x<=246 and -106<=true_nu_vtx_y<=106 and 10<=true_nu_vtx_z<=1026"
reco_in_fv_query = "10<=reco_nu_vtx_sce_x<=246 and -106<=reco_nu_vtx_sce_y<=106 and 10<=reco_nu_vtx_sce_z<=1026"

In [None]:
# replace overlay nue CC events with nue intrinsic sample

if NUE_INTRINSIC: 
    # intrinsic sample contains in AV TPC events ONLY, & only CC events (overlay is entire cryo)
    in_AV_query = "-1.55<=true_nu_vtx_x<=254.8 and -116.5<=true_nu_vtx_y<=116.5 and 0<=true_nu_vtx_z<=1036.8"
    
    nueCC_query = 'abs(nu_pdg)==12 and ccnc==0 and '+in_AV_query
    print("# of nueCC in AV in overlay sample = "+str(len(overlay.query(nueCC_query))))
    len1 = len(overlay)
    
    idx = overlay.query(nueCC_query).index
    overlay.drop(idx, inplace=True)
    len2 = len(overlay) 
    print("# of nueCC in AV dropped in overlay = "+str(len1-len2))
    
    overlay = pd.concat([overlay,nue], ignore_index=True)

    # from here on out everything else should be the same. 


In [None]:
# apply SW trigger, combine overlay + dirt as MC 
mc = pd.concat([overlay.query('swtrig_pre==1'),dirt.query('swtrig_pre==1')], ignore_index=True)

In [None]:
# separate by in/out FV & cosmic
infv = mc.query(in_fv_query+' and nu_purity_from_pfp>0.5')

In [None]:
cosmic = mc.query(in_fv_query+' and nu_purity_from_pfp<=0.5')

In [None]:
outfv = mc.query(out_fv_query)

In [None]:

# check that everything is accounted for 
print(len(mc)==len(infv)+len(cosmic)+len(outfv))

if not (len(mc)==len(infv)+len(cosmic)+len(outfv)): 
    d = len(mc) - (len(infv)+len(cosmic)+len(outfv))
    print(d)
    
     
    m = pd.concat([infv, cosmic, outfv])
    diff = np.setdiff1d(list(mc.index),list(m.index))

    #for i in range(d):
        #print(mc.loc[diff[i], 'nu_purity_from_pfp'])
        #print(mc.loc[diff[i], 'nslice'])


In [None]:
# 5 main categories: 

# infv - overlay & dirt events with truth vtx in FV 
# outfv - overlay & dirt events with truth vtx in FV that are classified as neutrinos
# cosmic - overlay & dirt events with true vtx in FV that get misclassified as cosmic 
# ext - beam OFF data
# data - beam ON data 

datasets = [infv, outfv, cosmic, ext, data]

# Apply BDT Model 

In [None]:
BDT_PRE_QUERY = 'nslice==1'
BDT_PRE_QUERY += ' and ' + reco_in_fv_query
BDT_PRE_QUERY +=' and contained_fraction>0.9'
BDT_PRE_QUERY += ' and n_tracks_contained>0'
BDT_PRE_QUERY += ' and n_showers_contained==1'
BDT_PRE_QUERY += ' and shr_energy_tot_cali>0.07'
#BDT_PRE_QUERY += ' and trk_energy>0.04' # 40 MeV reco pion/proton cut on leading track - what about non-leading tracks?

In [None]:
BDT_LOOSE_CUTS = BDT_PRE_QUERY
BDT_LOOSE_CUTS += ' and shr_score<0.3'
BDT_LOOSE_CUTS += ' and trkpid<0.35'
BDT_LOOSE_CUTS += ' and shrmoliereavg<15'
BDT_LOOSE_CUTS += ' and shr_tkfit_dedx_Y<7'
BDT_LOOSE_CUTS += ' and tksh_distance<12'

In [None]:
# load bdt model 
bdt_model = xgb.Booster({'nthread': 4})
bdt_model.load_model('bdt_model_feb2021.model')

In [None]:
# apply the saved model to our df 

datasets_bdt = []

varlist = [
    "shr_score", "shrmoliereavg", "trkpid",
    "n_showers_contained", "shr_tkfit_dedx_Y", "tksh_distance",
    "tksh_angle", "subcluster", "trkshrhitdist2"]

for df in datasets: 

    # apply cuts
    df = df.copy()
    df = df.query(BDT_LOOSE_CUTS)

    # clean datasets 
    for column in varlist:
        df.loc[(df[column] < -1.0e37) | (df[column] > 1.0e37), column] = np.nan
    
    # create testing dmatrix 
    df_test = xgb.DMatrix(data=df[varlist])
    
    # apply the bdt selection
    preds = bdt_model.predict(df_test)

    # add columns for plotting 
    df['BDT_score'] = preds
    
    datasets_bdt.append(df)

## Systematic Variations & Event Rates

### Create ROOT file with BDT-selected detector variations 

In [None]:
# skip this step if it is already created

In [None]:
variations = {
    "LYAttenuation": 7.51336E20,
    "LYRayleigh": 7.60573E20, 
    "LYDown": 7.43109E20, 
    "SCE": 7.39875E20, 
    "Recomb2": 7.59105E20, 
    "WireModX": 7.64918E20, 
    "WireModYZ": 7.532E20, 
    "WireModThetaXZ": 7.64282E20,
    "WireModThetaYZ_withSigmaSplines": 7.64543E20, 
    "WireModThetaYZ_withoutSigmaSplines": 7.5783E20, 
    "CV": 7.59732E20
}

intrinsic_variations = {
    "LYAttenuation_intrinsic": 2.3837E22, 
    "LYRayleigh_intrinsic": 2.38081E22, 
    "LYDown_intrinsic": 2.24505E22, 
    "SCE_intrinsic": 2.39023E22, 
    "Recomb2_intrinsic": 2.38193E22, 
    "WireModX_intrinsic": 2.38318E22, 
    "WireModYZ_intrinsic": 2.38416E22,
    "WireModThetaXZ_intrinsic": 2.31518E22, 
    "WireModThetaYZ_withSigmaSplines_intrinsic": 2.31421E22, 
    "WireModThetaYZ_withoutSigmaSplines_intrinsic": 2.31755E22, 
    "CV_intrinsic": 2.37261E22   
}

In [None]:
for v in list(variations.keys()): 
    NuMIDetSysWeights.makehist_detsys(v, "BDT_score>0.575", intrinsic=False)

In [None]:
for v in list(intrinsic_variations.keys()): 
    NuMIDetSysWeights.makehist_detsys(v, "BDT_score>0.575", intrinsic=False)

### Plotting

In [None]:
bins = [-1, -0.6, -0.2, 0.2, 0.6, 1]
xvar = "tksh_angle"
x_label = "Opening Angle (cos $\\theta_{ep}$)"
data_pot = "$2.0\\times10^{20}$ POT"

xlow = -1
xhigh = 1

In [None]:
bins = [0.19, .4, .65, .85, 1.15, 1.5, 4]
xvar = "NeutrinoEnergy2_GeV"
x_label = "Total Deposited Energy [GeV]"
data_pot = "$2.0\\times10^{20}$ POT"
xlow = 0
xhigh = 4

In [None]:
bins = [0.09, 0.4, 0.65, 1, 3]
xvar = "shr_energy_cali"
x_label = "Shower Energy [GeV]"
data_pot = "$2.0\\times10^{20}$ POT"
xlow = 0.09
xhigh = 3

In [None]:
bins = [1, 2, 3, 7]
xvar = "n_tracks_contained"
x_label = "Track Multiplicity"
data_pot = "$2.0\\times10^{20}$ POT"
xlow = 1
xhigh = 7

In [None]:
x = plot_mc(xvar, bins, xlow, xhigh, 'BDT_score>0.575', datasets_bdt, ISRUN3, 
            plt_norm='proj', pot='$9.23\\times10^{20}$', ymax=450, sys=False, x_label=x_label, 
            save=False, save_label="proj")

#### Flux Systematics

In [None]:
x = calcSysError(xvar, bins, xlow, xhigh, 'BDT_score>0.575', datasets_bdt, 'weightsPPFX', 600, 
                 plot=True, save=False, axis_label=x_label, pot=data_pot)

In [None]:
x = calcSysError(xvar, bins, xlow, xhigh, 'BDT_score>0.575', datasets_bdt, 'weightsNuMIGeo', 20, 
                 plot=True, save=False, axis_label=x_label, pot=data_pot)

#### GENIE systematics

In [None]:
x = calcSysError(xvar, bins, xlow, xhigh, 
             'BDT_score>0.575', datasets_bdt, 'weightsGenie', 600, plot=True, save=False, 
                axis_label=x_label, pot=data_pot)

In [None]:
## NEED GENIE UNISIMS HERE ##

#### GEANT4 systematics

In [None]:
x = calcSysError(xvar, bins, xlow, xhigh, 
             'BDT_score>0.575', datasets_bdt, 'weightsReint', 1000, plot=True, save=False, 
                axis_label=x_label, pot=data_pot)

#### Detector Systematics

In [None]:
# nueCC 
x = calcDetSysError(xvar, bins, plot=True, plot_cov=False, save=False, axis_label=x_label, 
                    intrinsic=True, pot=data_pot)


In [None]:
# break down of the detector systematics 
# x[4]

keys = list(intrinsic_variations.keys())

fig = plt.figure(figsize=(8, 5))

count = 0

for v in x[4]: # for each list of fractional uncertainties
    plt.step(bins, [0]+v, label=keys[count][:-10])
    count += 1
    
plt.xlim(xlow,xhigh)
plt.ylim(0, 0.3)

plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.xlabel(x_label, fontsize=14)
plt.ylabel("Fractional Uncertainty", fontsize=14)

plt.legend(frameon=False, ncol=2, loc='upper left')
plt.title("Detector Variations ($\\nu_{e}$ CC events)", fontsize=14)
#plt.savefig(plots_path+xvar+"_DetFracUncertainty_Intrinsic.pdf", transparent=True, bbox_inches='tight')
plt.show()


In [None]:
# non nueCC backgrounds
x = calcDetSysError(xvar, bins, plot=True, plot_cov=False, save=False, axis_label=x_label, 
                    intrinsic=False, pot=data_pot)

In [None]:
# break down of the detector systematics 
# x[4]

keys = list(variations.keys())

fig = plt.figure(figsize=(8, 5))

count = 0

for v in x[4]: # for each list of fractional uncertainties
    plt.step(bins, [0]+v, label=keys[count])
    count += 1
    
plt.xlim(xlow,xhigh)
plt.ylim(0)

plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.xlabel(x_label, fontsize=14)
plt.ylabel("Fractional Uncertainty", fontsize=14)

plt.legend(frameon=False, ncol=2)
plt.title("Detector Variations (non-$\\nu_{e}$ CC events)", fontsize=14)
#plt.savefig(plots_path+xvar+"_DetFracUncertainty.pdf", transparent=True, bbox_inches='tight')
plt.show()


In [None]:
percent_errors, tot_percent_error = plot_data(xvar, bins, bins[0], xhigh, 'BDT_score>0.575', 
          datasets_bdt, ISRUN3, plt_norm='pot', bdt_scale=None, ymax=85, 
          x_label=x_label, sys=True, save=False, save_label="Final")

In [None]:
## plot the fractional systematic uncertainty 

fig = plt.figure(figsize=(8, 5))  

plt.step(bins, [0]+percent_errors[0], label="Statistical", color='darkgreen')
plt.step(bins, [0]+percent_errors[1], label="PPFX", color='tab:red')
plt.step(bins, [0]+percent_errors[2], label="Beamline Geometry", color='orange')
plt.step(bins, [0]+percent_errors[3], label="GENIE", color='peru')
plt.step(bins, [0]+percent_errors[4], label="Re-Interaction", color='violet')
plt.step(bins, [0]+percent_errors[5], label="Detector ($\\nu_{e}$ CC events)", color='lightskyblue')
plt.step(bins, [0]+tot_percent_error, label="Total", color='black',linewidth=2)

plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.xlabel(x_label, fontsize=14)
plt.ylabel("Fractional Uncertainty", fontsize=14)

plt.xlim(bins[0], xhigh)
plt.ylim(0, .5)

plt.legend(fontsize=12, frameon=False, ncol=2)

#plt.savefig(plots_path+xvar+"_FracUncertainty.pdf", transparent=True, bbox_inches='tight')

plt.show()

## NuMI Oscillations (3+1 Model)

In [None]:
for v in list(variations.keys()): 
    NuMIDetSysWeights.makehist_detsys(v, "BDT_score>0.575", ["nu_e"], intrinsic=False)

In [None]:
bins = [0.19, .4, .65, .85, 1.15, 1.5, 4]
xvar = "nu_e"
x_label = "True Neutrino Energy [GeV]"
data_pot = "$2.0\\times10^{20}$ POT"
xlow = 0
xhigh = 4

In [None]:
x = plot_mc(xvar, [round(0.01*x, 2) for x in range(0, 75, 5)], 0, 0.7, 
        'BDT_score>0.575', datasets_bdt, ISRUN3, 
        plt_norm='proj', pot='$9.23\\times10^{20}$', ymax=30,
 sys=True, x_label='True Neutrino Energy [GeV]', save=False, save_label='bestfit',
       osc='machado_bestfit.csv')

In [None]:
x = plot_mc(xvar, [round(0.01*x, 2) for x in range(0, 75, 5)], 0, 0.7, 
        'BDT_score>0.575', datasets_bdt, ISRUN3, 
        plt_norm='proj', pot='$9.23\\times10^{20}$', ymax=30,
 sys=True, x_label='True Neutrino Energy [GeV]', save=False, save_label='biggest',
       osc='biggest_variation.csv')

In [None]:
# full range 
# smaller binning [round(0.01*x, 2) for x in range(0, 455, 5)]

x = plot_mc(xvar, [0, 0.5, 1, 1.50, 2, 2.5, 3, 3.5, 4, 4.5], 0, 4.5, 
        'BDT_score>0.575', datasets_bdt, ISRUN3, 
        plt_norm='proj', pot='$9.23\\times10^{20}$', #ymax=30,
        sys=True, x_label='True Neutrino Energy [GeV]', save=False)

In [None]:
# if we want to save the dictionary from plot_MC output

import json

with open('Insert_File_Name_Here.json', 'w') as f:
    json.dump(d, f)

In [None]:
# to load a stored dictionary 
with open('FHC_Projected_TrueNeutrinoEnergy.json') as f:
    d = json.load(f)