## Setup

In [None]:
# plot event rate variations, fractional uncertainties, & data/MC comparisons 
# for all sources of systematic error
# also consider potential NuMI oscillations on the event rate 
# make sure to update the plots_path here & in backend function scripts before saving

In [None]:
import sys

sys.path.insert(0, 'backend_functions')

import selection_functions as sf

import importlib

import uproot
import matplotlib.pylab as pylab
import numpy as np
import math
from sklearn.model_selection import train_test_split
import pickle
import xgboost as xgb


import awkward
import matplotlib.pyplot as plt
import pandas as pd

import ROOT

import top 
from top import *

import uncertainty_functions 
from uncertainty_functions import *

import xsec_functions 
from xsec_functions import smear_matrix

from ROOT import TH1D, TH2D, TDirectory, TH1F, TH2F

from selection_functions import *


In [None]:
from datetime import datetime
import time
now = datetime.now()
date_time = now.strftime("%H:%M:%S")
print("date and time:",date_time)

In [None]:
import NuMIGeoWeights
importlib.reload(NuMIGeoWeights)

# the default option is FHC, RHC needs different arguments
numiBeamlineGeoWeights = NuMIGeoWeights.NuMIGeoWeights() 

In [None]:
import NuMIDetSys
importlib.reload(NuMIDetSys)

NuMIDetSysWeights = NuMIDetSys.NuMIDetSys()

In [None]:
# Doing Run3??
ISRUN3 = False


In [None]:
# use nue intrinsic? 
NUE_INTRINSIC = True

In [None]:
plots_path = parameters(ISRUN3)['plots_path']

In [None]:
# POT normalization factors


overlay_pot =  parameters(ISRUN3)['overlay_pot'] #2.33652E21  # v7       
dirt_pot = parameters(ISRUN3)['dirt_pot'] # david's file
beamon_pot = parameters(ISRUN3)['beamon_pot'] # v5
    
#proj_pot = parameters(ISRUN3)['proj_pot'] # FHC Runs 1-5: 9.23E20, FHC Runs 1-3: 4.125E20 

beamon_ntrig =  parameters(ISRUN3)['beamon_ntrig'] # v5 (EA9CNT_wcut)
beamoff_ntrig = parameters(ISRUN3)['beamoff_ntrig']  # v5 (EXT_NUMIwin_FEMBeamTriggerAlgo)
    
if NUE_INTRINSIC: 
    nue_intrinsic_pot = parameters(ISRUN3)['intrinsic_pot'] # v7
    

In [None]:
fold = "nuselection"
tree = "NeutrinoSelectionFilter"

DATA = ""
EXT = ""
OVRLY  = ""
DRT = ""
NUE = ""


# slimmed with opening angle 
path = parameters(ISRUN3)['cv_ntuple_path']
print('path = ', path)

if not ISRUN3: 
    
    # Run 1 FHC 
    OVRLY = 'neutrinoselection_filt_run1_overlay_v7'
    EXT = 'neutrinoselection_filt_run1_beamoff_v5'
    DATA = 'neutrinoselection_filt_run1_beamon_beamgood_v5'
    DRT = 'prodgenie_numi_uboone_overlay_dirt_fhc_mcc9_run1_v28_all_snapshot'
    
    if NUE_INTRINSIC: 
        NUE = 'neutrinoselection_filt_run1_overlay_intrinsic_v7'

else: 
    
    # Run 3 RHC
    OVRLY = 'neutrinoselection_filt_run3b_overlay_v7'
    DATA = 'neutrinoselection_filt_run3b_beamon_beamgood_v5'
    EXT = 'neutrinoselection_filt_run3b_beamoff_v5'
    DRT = 'neutrinoselection_filt_run3b_dirt_overlay_v6'
    
    if NUE_INTRINSIC: 
        NUE = 'neutrinoselection_filt_run3b_overlay_intrinsic_v7'



In [None]:
overlay = uproot.open(path+OVRLY+".root")[fold][tree]
data = uproot.open(path+DATA+".root")[fold][tree]
ext = uproot.open(path+EXT+".root")[fold][tree]
dirt = uproot.open(path+DRT+".root")[fold][tree]  

uproot_v = [overlay,data,ext,dirt]

if NUE_INTRINSIC: 
    nue = uproot.open(path+NUE+".root")[fold][tree]
    uproot_v.append(nue)


In [None]:
variables = [
    "trk_score_v", 
    "shr_tkfit_dedx_Y", 
    "n_tracks_contained", 
    "NeutrinoEnergy2",
    "run","sub","evt",
    "reco_nu_vtx_sce_x","reco_nu_vtx_sce_y","reco_nu_vtx_sce_z",
    "shrsubclusters0","shrsubclusters1","shrsubclusters2",
    "trkshrhitdist2",
    "n_showers_contained", 
    "shr_phi", "trk_phi", "trk_theta",
    "shr_score", 
    "trk_energy", 
    "tksh_distance", "tksh_angle",
    "shr_energy_tot_cali", "shr_energy_cali", 
    "nslice", 
    "contained_fraction",
    "shrmoliereavg", "shr_px", "shr_py", "shr_pz"
]

# MC only variables
mc_var = ["nu_pdg", "shr_theta", "true_e_visible", "ccnc", 
          "nproton", "nu_purity_from_pfp", "nu_e", "npi0", "npion",
          "true_nu_vtx_x", "true_nu_vtx_y" , "true_nu_vtx_z", 
          "weightTune", "weightSpline", "weightSplineTimesTune", 
          "true_nu_px", "true_nu_py", "true_nu_pz", 
          "elec_e", "proton_e", "mc_px", "mc_py", "mc_pz", "elec_px", "elec_py", "elec_pz", 
          "swtrig_pre", "ppfx_cv", "mc_pdg", "opening_angle"]

sys_genie = ["weightsGenie", "weightsReint", 
             "knobRPAup", "knobRPAdn", 
             "knobCCMECup", "knobCCMECdn", 
             "knobAxFFCCQEup", "knobAxFFCCQEdn", 
             "knobVecFFCCQEup", "knobVecFFCCQEdn", 
             "knobDecayAngMECup", "knobDecayAngMECdn", 
             "knobThetaDelta2Npiup", "knobThetaDelta2Npidn", 
             "knobThetaDelta2NRadup", "knobThetaDelta2NRaddn", 
             #"knobRPA_CCQE_Reducedup", "knobRPA_CCQE_Reduceddn", 
             "knobNormCCCOHup", "knobNormCCCOHdn", 
             "knobNormNCCOHup", "knobNormNCCOHdn",    
             "knobxsr_scc_Fv3up", "knobxsr_scc_Fv3dn", 
             "knobxsr_scc_Fa3up", "knobxsr_scc_Fa3dn"]

sys_flux = ['weightsPPFX']

## Create pandas dataframes

In [None]:
overlay = overlay.pandas.df(variables + mc_var + sys_genie + sys_flux, flatten=False)

In [None]:
dirt = dirt.pandas.df(variables + mc_var + sys_genie[:-4] + sys_flux, flatten=False)

In [None]:
if NUE_INTRINSIC: 
    nue = nue.pandas.df(variables + mc_var + sys_genie + sys_flux, flatten=False)

In [None]:
data = data.pandas.df(variables, flatten=False) 

In [None]:
ext = ext.pandas.df(variables, flatten=False)

In [None]:
for var in mc_var+sys_genie+sys_flux: 
    data[var] = np.nan
    ext[var] = np.nan

In [None]:
# is dirt bool

overlay['isDirt'] = False
dirt['isDirt'] = True

if NUE_INTRINSIC: 
    nue['isDirt'] = False
    
data['isDirt'] = np.nan
ext['isDirt'] = np.nan

In [None]:
# how to get the LLR-PID value for the "track candidate" 
# (proton for nue selection, muon for numu)
# can be done for any variable
# code from Giuseppe!
#LLR-PID : log likelihood ratio particle ID 

df_v = [overlay,data,ext,dirt]

if NUE_INTRINSIC: 
    df_v.append(nue)
    
for i,df in enumerate(df_v):
    up = uproot_v[i]
    trk_llr_pid_v = up.array('trk_llr_pid_score_v')
    trk_id = up.array('trk_id')-1 # I think we need this -1 to get the right result
    trk_llr_pid_v_sel = awkward.fromiter([pidv[tid] if tid<len(pidv) else 9999. for pidv,tid in zip(trk_llr_pid_v,trk_id)])
    df['trkpid'] = trk_llr_pid_v_sel
    df['subcluster'] = df['shrsubclusters0'] + df['shrsubclusters1'] + df['shrsubclusters2']
    
    df['NeutrinoEnergy2_GeV'] = df['NeutrinoEnergy2']/1000


In [None]:
mc_df = [overlay, dirt]

if NUE_INTRINSIC: 
    mc_df.append(nue)
    

In [None]:
for i,df in enumerate(mc_df):
    
    # is signal bool 
    df['is_signal'] = np.where((df.swtrig_pre == 1) & (df.nu_purity_from_pfp>0.5)
                             & (df.nu_pdg==12) & (df.ccnc==0) & (df.nproton>0) & (df.npion==0) & (df.npi0==0)
                             & (10 <= df.true_nu_vtx_x) & (df.true_nu_vtx_x <= 246)
                             & (-106 <= df.true_nu_vtx_y) & (df.true_nu_vtx_y <= 106)
                             & (10 <= df.true_nu_vtx_z) & (df.true_nu_vtx_z <= 1026), True, False)
    
    # Add truth level theta & phi angles (detector & beam coordinates)
    df = addAngles(df)
    
    
    df['weightsPPFX'] = df['weightsPPFX']/1000
    df['weightsReint'] = df['weightsReint']/1000
    df['weightsGenie'] = df['weightsGenie']/1000
    
    
    # add beamline geometry weights
    df['weightsNuMIGeo'] = df.apply( lambda x: numiBeamlineGeoWeights.calculateGeoWeight(x['nu_pdg'],x['nu_e'],x['thbeam']) , axis=1)
    
    
    # add genie unisim weights 
    if i==1: 
        universes = []
        for evt in df[sys_genie[2:-4]].values: 
            if np.all(evt == 1): 
                universes.append( [0 for j in range(len(sys_genie[2:]))] )
                
            else: 
                universes.append( list(evt) + [0, 0, 0, 0] ) # dirt doesn't have variations for the last 4 knobs 
        
    else: 
        universes = []
        for evt in df[sys_genie[2:]].values: 
            if np.all(evt == 1): 
                universes.append( [0 for j in range(len(sys_genie[2:]))] )  # don't include CV neutrinos 

            else: 
                universes.append( evt )

        
    df['weightsGenieUnisim'] = universes
    
    # for easier handling 
    df['weightsGenieUnisim'] = df['weightsGenieUnisim'].apply(lambda x: np.array(x))
  

In [None]:
# make dataframes equal # of columns 

data['is_signal'] = np.nan
ext['is_signal'] = False

nan_var = ['thdet', 'phidet', 'true_nu_px_beam', 'true_nu_py_beam', 'true_nu_pz_beam', 
           'thbeam', 'phibeam','weightsNuMIGeo', 'weightsGenieUnisim']

for var in mc_var+sys_genie+sys_flux+nan_var: 
    data[var] = np.nan
    ext[var] = np.nan

In [None]:
# np.setdiff1d(ext.columns,overlay.columns)
# ext.columns == overlay.columns

In [None]:
# check is_signal boolean 

In [None]:
# some checks 
print(len(nue.query('is_signal==True'))==len(nue.query(signal)))
print(len(nue.query('is_signal==False'))==len(nue.query(not_signal)))

## Weights

In [None]:
# clean bad weights & values 

for i,df in enumerate(mc_df):
    
    # bad weights 
    df.loc[ df['weightSplineTimesTune'] <= 0, 'weightSplineTimesTune' ] = 1.
    df.loc[ df['weightSplineTimesTune'] == np.inf, 'weightSplineTimesTune' ] = 1.
    df.loc[ df['weightSplineTimesTune'] > 100, 'weightSplineTimesTune' ] = 1.
    df.loc[ np.isnan(df['weightSplineTimesTune']) == True, 'weightSplineTimesTune' ] = 1.
    
    df.loc[ df['weightTune'] <= 0, 'weightTune' ] = 1.
    df.loc[ df['weightTune'] == np.inf, 'weightTune' ] = 1.
    df.loc[ df['weightTune'] > 100, 'weightTune' ] = 1.
    df.loc[ np.isnan(df['weightTune']) == True, 'weightTune' ] = 1.  

    
    for ievt in range(df.shape[0]):
        
        # GENIE MULTISIMS
        
        # check for NaNs separately        
        if np.isnan(df['weightsGenie'].iloc[ievt]).any() == True: 
            df['weightsGenie'].iloc[ievt][ np.isnan(df['weightsGenie'].iloc[ievt]) ] = 1.
            
        reweightCondition = ((df['weightsGenie'].iloc[ievt] > 60) | (df['weightsGenie'].iloc[ievt] < 0)  | 
                             (df['weightsGenie'].iloc[ievt] == np.inf) | (df['weightsGenie'].iloc[ievt] == np.nan))
        df['weightsGenie'].iloc[ievt][ reweightCondition ] = 1.
        
        # if no variations exist for the event
        if not list(df['weightsGenie'].iloc[ievt]): 
            df['weightsGenie'].iloc[ievt] = [1.0 for k in range(600)]
        
        
        
        # RE-INTERACTION WEIGHTS
        
        # check for NaNs separately        
        if np.isnan(df['weightsReint'].iloc[ievt]).any() == True: 
            df['weightsReint'].iloc[ievt][ np.isnan(df['weightsReint'].iloc[ievt]) ] = 1.
        
        reweightCondition2 = ((df['weightsReint'].iloc[ievt] > 60) | (df['weightsReint'].iloc[ievt] < 0)   |
                             (df['weightsReint'].iloc[ievt] == np.inf))
        df['weightsReint'].iloc[ievt][ reweightCondition2 ] = 1.
        
        # if no variations exist for the event
        if not list(df['weightsReint'].iloc[ievt]): 
            df['weightsReint'].iloc[ievt] = [1.0 for k in range(1000)]
            
            
            
        # GENIE UNISIMS 
        
        # check for NaNs separately
        if np.isnan(df['weightsGenieUnisim'].iloc[ievt]).any() == True: 
            df['weightsGenieUnisim'].iloc[ievt][ np.isnan(df['weightsGenieUnisim'].iloc[ievt]) ] = 1.
        
        reweightCondition3 = ((df['weightsGenieUnisim'].iloc[ievt] == np.inf) | (df['weightsGenieUnisim'].iloc[ievt] > 60) | 
                              (df['weightsGenieUnisim'].iloc[ievt] < 0))
        df['weightsGenieUnisim'].iloc[ievt][ reweightCondition3 ] = 1.
        
        # if no variations exist for the event
        if not list(df['weightsGenieUnisim'].iloc[ievt]): 
            df['weightsGenieUnisim'].iloc[ievt] = [1.0 for k in range(len(sys_genie[2:]))]
            
        

In [None]:
# pot scaling weights 

dirt_tune = parameters(ISRUN3)['dirt_tune']
ext_tune = parameters(ISRUN3)['ext_tune']
    
##############################################
# SCALE TO BEAM ON POT
overlay_scale_to_data = beamon_pot/overlay_pot

if NUE_INTRINSIC: 
    nue_scale_to_data = beamon_pot/nue_intrinsic_pot

dirt_scale_to_data = dirt_tune*(beamon_pot/dirt_pot)
beamoff_scale_to_data = ext_tune*(beamon_ntrig/beamoff_ntrig) # scale factor to beam on POT

overlay['pot_scale'] = overlay_scale_to_data

if NUE_INTRINSIC: 
    nue['pot_scale'] = nue_scale_to_data
    
dirt['pot_scale'] = dirt_scale_to_data
ext['pot_scale'] = beamoff_scale_to_data
data['pot_scale'] = [1 for x in range(len(data))]

##############################################
# SCALE TO OVERLAY

dirt_scale_to_overlay = dirt_tune*(overlay_pot/dirt_pot)
beamoff_scale_to_overlay = ext_tune*((overlay_pot/beamon_pot)*(beamon_ntrig/beamoff_ntrig))

if NUE_INTRINSIC: 
    nue_scale_to_overlay = overlay_pot/nue_intrinsic_pot

overlay['pot_scale_overlay'] = [1 for x in range(len(overlay))]
if NUE_INTRINSIC: 
    nue['pot_scale_overlay'] = nue_scale_to_overlay
    
dirt['pot_scale_overlay'] = dirt_scale_to_overlay
ext['pot_scale_overlay'] = beamoff_scale_to_overlay
data['pot_scale_overlay'] = [1 for x in range(len(data))]
    
    

In [None]:
# total weights 

# combined genie * POT weight * flux weight 
# ext gets POT weight only 

################################################################
# totweight_data scales to BEAMON

# tuned
overlay['totweight_data'] = overlay['pot_scale']*overlay['ppfx_cv']*overlay['weightSplineTimesTune']
dirt['totweight_data'] = dirt['pot_scale']*dirt['ppfx_cv']*dirt['weightSplineTimesTune']

if NUE_INTRINSIC: 
    nue['totweight_data'] = nue['pot_scale']*nue['ppfx_cv']*nue['weightSplineTimesTune']


################################################################
# totweight_overlay scales to STANDARD OVERLAY

# tuned
overlay['totweight_overlay'] = overlay['ppfx_cv']*overlay['weightSplineTimesTune']
dirt['totweight_overlay'] = dirt['pot_scale_overlay']*dirt['ppfx_cv']*dirt['weightSplineTimesTune']

if NUE_INTRINSIC:
    nue['totweight_overlay'] = nue['pot_scale_overlay']*nue['ppfx_cv']*nue['weightSplineTimesTune']


In [None]:
# to keep the number of columns the same 
new_var = ['totweight', 'totweight_overlay']

for var in new_var: 
    for df in [data, ext]: 
        df[var] = np.nan

## Categories

In [None]:
# replace overlay nue CC events with nue intrinsic sample

if NUE_INTRINSIC: 
    
    # intrinsic sample contains in AV TPC events ONLY, & only CC events (overlay is entire cryo)
    in_AV_query = "-1.55<=true_nu_vtx_x<=254.8 and -116.5<=true_nu_vtx_y<=116.5 and 0<=true_nu_vtx_z<=1036.8"
    
    nueCC_query = 'abs(nu_pdg)==12 and ccnc==0 and '+in_AV_query
    print("# of nueCC in AV in overlay sample = "+str(len(overlay.query(nueCC_query))))
    len1 = len(overlay)
    
    idx = overlay.query(nueCC_query).index
    overlay.drop(idx, inplace=True)
    len2 = len(overlay) 
    print("# of nueCC in AV dropped in overlay = "+str(len1-len2))
    
    overlay = pd.concat([overlay,nue], ignore_index=True)

    # from here on out everything else should be the same. 


In [None]:
# apply SW trigger, combine overlay + dirt as MC 
mc = pd.concat([overlay.query('swtrig_pre==1'),dirt.query('swtrig_pre==1')], ignore_index=True, sort=True)

# separate by in/out FV & cosmic
infv = mc.query(in_fv_query+' and nu_purity_from_pfp>0.5')
#cosmic = mc.query(in_fv_query+' and nu_purity_from_pfp<=0.5')
outfv = mc.query(out_fv_query)

In [None]:

# check that everything is accounted for 
print(len(mc)==len(infv)+len(outfv))#+len(cosmic))

if not (len(mc)==len(infv)+len(outfv)):#+len(cosmic)): 
    d = len(mc) - (len(infv)+len(outfv))#+len(cosmic))
    print(d)
    
     
    m = pd.concat([infv, outfv]) #pd.concat([infv, cosmic, outfv])
    diff = np.setdiff1d(list(mc.index),list(m.index))

    #for i in range(d):
        #print(mc.loc[diff[i], 'nu_purity_from_pfp'])
        #print(mc.loc[diff[i], 'nslice'])


In [None]:
tot_signal_weighted = np.nansum(mc.query(signal+' and '+in_fv_query)['totweight_data'])
print('total signal events in FV = '+ str(tot_signal_weighted))

In [None]:
# 5 main categories: 

# infv - overlay & dirt events with truth vtx in FV 
# outfv - overlay & dirt events with truth vtx in FV that are classified as neutrinos
# cosmic - overlay & dirt events with true vtx in FV that get misclassified as cosmic 
# ext - beam OFF data
# data - beam ON data 

datasets = {
    "infv": infv, 
    "outfv": outfv, 
    #"cosmic": cosmic, 
    "ext": ext, 
    "data": data
}

# [infv, outfv, cosmic, ext, data]

# Apply BDT Model 

In [None]:
# quality cuts
BDT_PRE_QUERY = 'nslice==1'
BDT_PRE_QUERY += ' and ' + reco_in_fv_query
BDT_PRE_QUERY +=' and contained_fraction>0.9'

# signal definition - shower constraints
BDT_PRE_QUERY += ' and n_showers_contained==1'
BDT_PRE_QUERY += ' and shr_energy_tot_cali>0.07'

# signal definition - track constraints
BDT_PRE_QUERY += ' and n_tracks_contained>0'
BDT_PRE_QUERY += ' and trk_energy>0.04' 


In [None]:
BDT_LOOSE_CUTS = BDT_PRE_QUERY

# loose shower constraints
BDT_LOOSE_CUTS +=' and shr_score<0.3'
BDT_LOOSE_CUTS += ' and shrmoliereavg<15'
BDT_LOOSE_CUTS += ' and shr_tkfit_dedx_Y<7'

# loose track constraints
BDT_LOOSE_CUTS += ' and trkpid<0.35'
BDT_LOOSE_CUTS += ' and tksh_distance<12'

In [None]:
training_parameters = parameters(ISRUN3)['bdt_training_parameters']

In [None]:
# load bdt model 
bdt_model = xgb.Booster({'nthread': 4})
bdt_model.load_model(parameters(ISRUN3)['bdt_model'])

In [None]:
datasets_bdt = {}

for i in range(len(datasets)): 
    
    df = list(datasets.values())[i].copy()
    df = df.query(BDT_LOOSE_CUTS)
    
    # clean datasets 
    for column in training_parameters:
        df.loc[(df[column] < -1.0e37) | (df[column] > 1.0e37), column] = np.nan

    # create testing dmatrix 
    df_test = xgb.DMatrix(data=df[training_parameters])

    # apply the bdt selection
    preds = bdt_model.predict(df_test)

    # add columns for plotting 
    df['BDT_score'] = preds
    
    datasets_bdt[list(datasets.keys())[i]] = df

  
    

In [None]:
bdt_score_cut = parameters(ISRUN3)['bdt_score_cut']
    
print("BDT SCORE THRESHOLD = "+str(bdt_score_cut))

selected_query = BDT_LOOSE_CUTS + ' and BDT_score>'+str(bdt_score_cut)
selected_signal_query = selected_query + ' and is_signal==True'

In [None]:
print("GENERATED SIGNAL (DATA NORMALIZED) = "+str(sum(generated_signal(ISRUN3, 'nu_e', 1, 0, 20))*overlay_scale_to_data))

In [None]:
# stat only errors 

x = plot_mc('NeutrinoEnergy2_GeV', 20, 0, 5, 'BDT_score>'+str(bdt_score_cut), datasets_bdt, 
            ISRUN3, x_label="Reco $\\nu$ Energy [GeV]", norm='data', pot='$2.0\\times10^{20}$')

# Systematics

In [None]:
print('max selected neutrino energy [true, GeV] =', max(datasets_bdt['infv'].query(selected_signal_query).nu_e))
print('min selected neutrino energy [true, GeV] =', min(datasets_bdt['infv'].query(selected_signal_query).nu_e))

In [None]:
xvar = 'shr_energy_cali'

xvar_dict = xsec_variables(xvar, ISRUN3)

bins = xvar_dict['bins']
true_var = xvar_dict['true_var']
x_label = xvar_dict['x_label']
beamon_pot = xvar_dict['beamon_pot']
xlow = xvar_dict['xlow']
xhigh = xvar_dict['xhigh']


In [None]:
xsec_units = False
plot = False

n_target = parameters(ISRUN3)['n_target']
flux = parameters(ISRUN3)['integrated_flux_per_pot'] * parameters(ISRUN3)['beamon_pot']

In [None]:
# stat only -- SCALES TO DATA

x = plot_mc(xvar, bins, xlow, xhigh, 'BDT_score>'+str(bdt_score_cut), datasets_bdt, ISRUN3, 
            norm='data', sys=False, x_label='Reco '+x_label, save=False, save_label="wide", 
            pot="$2.0 x 10^{20}$")


### PPFX

In [None]:
importlib.reload(uncertainty_functions)
from uncertainty_functions import *

In [None]:
ncv, ppfx_variations = plotSysVariations(true_var, xvar, bins, xlow, xhigh, selected_query, datasets_bdt, 'weightsPPFX',600, 
                                         ISRUN3, plot=plot, axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=True)

ppfx_dict = calcCov(xvar, bins, ncv, ppfx_variations, 'weightsPPFX', plot=plot, save=False, 
                    axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3)

### Beamline Geometry

In [None]:
# ordered by beamline variation run number
# [+1sigma run #, -1sigma run #]

beamline_runs = {
    'HornCurrent' : [1, 2], 
    'xHorn1' : [3, 4], 
    'yHorn1' : [5, 6], 
    'BeamSpotSize' : [7, 8], 
    'xHorn2' : [9, 10], 
    'yHorn2' : [11, 12], 
    'WaterOnHorns' : [13, 14], 
    'xBeamShift' : [15, 16], 
    'yBeamShift' : [17, 18], 
    'zTargetPosition' : [19, 20]    
}

beamline_cov = {}

# index in weightsNuMIGeo are offset by -1

for variation in beamline_runs.keys(): 
    
    idx = [i-1 for i in beamline_runs[variation]]
    
    ncv, beamline_variations = plotSysVariations(true_var, xvar, bins, xlow, xhigh, selected_query, datasets_bdt, 'weightsNuMIGeo', 
                                                 idx, ISRUN3, plot=plot, axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', 
                                                  background_subtraction=True, title=variation)
    
    # calc covariance 
    beamline_cov[variation] = calcCov(xvar, bins, ncv, beamline_variations, 'weightsNuMIGeo', plot=plot, save=False, 
                    axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3)
    



In [None]:
# compute total covariance, correlation, & uncertainty 

cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
frac_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
cor = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for variation in beamline_cov.keys(): 
    
    for i in range(len(bins)-1): 
        for j in range(len(bins)-1):
            
            cov[i][j] = sum([beamline_cov[x]['cov'][i][j] for x in beamline_cov.keys()])
            
            if ncv[i]*ncv[j] != 0: 
                frac_cov[i][j] = cov[i][j]/(ncv[i]*ncv[j])

            
for i in range(len(bins)-1): 
    for j in range(len(bins)-1):
        
        if np.sqrt(cov[i][i])*np.sqrt(cov[j][j]) != 0: 
                cor[i][j] = cov[i][j] / (np.sqrt(cov[i][i])*np.sqrt(cov[j][j]))
            
beamline_dict = {
    'cov' : cov, 
    'frac_cov' : frac_cov,
    'cor' : cor,
    'fractional_uncertainty' : np.sqrt(np.diag(frac_cov))
} 

### GENIE multisims

In [None]:
ncv, genie_variations = plotSysVariations(true_var, xvar, bins, xlow, xhigh, selected_query, datasets_bdt, 'weightsGenie', 600, 
                                         ISRUN3, plot=plot, axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=True)

genie_dict = calcCov(xvar, bins, ncv, genie_variations, 'weightsGenie', plot=plot, save=False, 
                    axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3)

### GENIE unisims -- need to fix

In [None]:
### NEED TO FIX GENIE UNISIM VARIATIONS 4 BACKGROUND 

In [None]:
signal = plotSysVariations(true_var, xvar, bins, xlow, xhigh, selected_signal_query, datasets_bdt, 'weightsGenieUnisim', 22, 
                                      ISRUN3, plot=True, axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=False)

In [None]:
background = plotSysVariations(true_var, xvar, bins, xlow, xhigh, selected_query+' and is_signal==False', 
                               datasets_bdt, 'weightsGenieUnisim', 22, 
                                      ISRUN3, plot=True, axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=False)

### GEANT4 

In [None]:
ncv, geant4_variations = plotSysVariations(true_var, xvar, bins, xlow, xhigh, selected_query, datasets_bdt, 'weightsReint', 1000, 
                                         ISRUN3, plot=plot, axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=True)

geant4_dict = calcCov(xvar, bins, ncv, geant4_variations, 'weightsReint', plot=plot, save=False, 
                    axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3)

### Detector Systematics

#### Create ROOT file with BDT-selected detector variations 

In [None]:
recreate_file = False

In [None]:
# skip this step if it is already created
# should manually delete the file first 
# (located here: /uboone/data/users/kmiller/uBNuMI_CCNp/ntuples/runX/systematics/detvar/)

# scales to the det sys CV POT (standard overlay)


if recreate_file: 
    for v in list(detvar_run1_fhc.keys()): 
        NuMIDetSysWeights.makehist_detsys(v, ISRUN3, "NuMI_FHC_BDT_DetectorVariations_MARCH2022.root", xvar, 
                                          bins, cut=selected_query, useBDT=True)
        


In [None]:
detector_variations = NuMIDetSysWeights.plot_variations(xvar, bins, "NuMI_FHC_BDT_DetectorVariations_MARCH2022.root", ISRUN3, 
                                                        axis_label='Reco '+x_label, plot=True, background_subtraction=True)

In [None]:
# compute covariance (N=1 for each variation)

detsys_cov = {}

# index in weightsNuMIGeo are offset by -1

for variation in detector_variations.keys(): 
    
    if variation=='CV': 
        continue
    
    # calc covariance for each unisim 
    detsys_cov[variation] = calcCov(xvar, bins, detector_variations['CV'], [detector_variations[variation]], 'Detector', 
                                    plot=False, save=False, axis_label='Reco '+x_label, pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3,
                                   title=variation)

In [None]:
# compute total covariance, correlation, & uncertainty 

cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
frac_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
cor = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for variation in detsys_cov.keys(): 
    
    for i in range(len(bins)-1): 
        for j in range(len(bins)-1):
            
            cov[i][j] = sum([detsys_cov[x]['cov'][i][j] for x in detsys_cov.keys()])
            
            if detector_variations['CV'][i]*detector_variations['CV'][j] != 0: 
                frac_cov[i][j] = cov[i][j]/(detector_variations['CV'][i]*detector_variations['CV'][j])

            
for i in range(len(bins)-1): 
    for j in range(len(bins)-1):
        
        if np.sqrt(cov[i][i])*np.sqrt(cov[j][j]) != 0: 
                cor[i][j] = cov[i][j] / (np.sqrt(cov[i][i])*np.sqrt(cov[j][j]))
            
detsys_dict = {
    'cov' : cov, 
    'frac_cov' : frac_cov,
    'cor' : cor,
    'fractional_uncertainty' : np.sqrt(np.diag(frac_cov))
} 

### Stat Uncertainty (MC)

In [None]:
# if GENIE closure test : uncertainty on the full estimated event rate 
# if fake/real data : uncertainty on the MC background only  

In [None]:
d = plot_mc(xvar, bins, xlow, xhigh, selected_query, datasets_bdt, ISRUN3, 
            norm='data', sys=False, x_label='Reco '+x_label, pot="$2.0 x 10^{20}$")

ncv_total = d['CV']
ncv_bkgd = d['background_counts']
ncv_bkgd_subtracted = [a-b for a,b in zip(ncv_total, ncv_bkgd)] 


In [None]:
# do not include EXT 

ext_counts = plt.hist(datasets_bdt['ext'].query(selected_query)[xvar], bins, color='lightgrey', 
                      weights=datasets_bdt['ext'].query(selected_query).pot_scale)[0]
plt.close()

ncv_mc = [a-b for a,b in zip(ncv_total,ext_counts)]

In [None]:
# takes the sum of the weights squared for MC counting error -- on the full event rate (for closure test)

print('Make sure to update for full event rate, background-subtracted, or background only !')

mc_stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
mc_frac_stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

s = 0 
for i in range(len(bins)-1):

    if i==len(bins)-2: 
        bin_query = xvar+' >= '+str(bins[i])+' and '+xvar+' <= '+str(bins[i+1])
    else: 
        bin_query = xvar+' >= '+str(bins[i])+' and '+xvar+' < '+str(bins[i+1])
        
    infv = datasets_bdt['infv'].copy().query(selected_query)
    outfv = datasets_bdt['outfv'].copy().query(selected_query)
    #cosmic = datasets_bdt['cosmic'].copy().query(selected_query)
    
    ncv_df = pd.concat([infv, outfv], ignore_index=True) #pd.concat([infv, outfv, cosmic], ignore_index=True)
    
    mc_stat_cov[i][i] = sum(ncv_df.query(bin_query).totweight_data ** 2) 
    mc_frac_stat_cov[i][i] = mc_stat_cov[i][i]/(ncv_mc[i]*ncv_mc[i])
    
mc_stat_percent_error = np.sqrt(np.diag(mc_frac_stat_cov))

In [None]:
if plot: 
    fig = plt.figure(figsize=(10, 6))
        
    plt.pcolor(bins, bins, mc_stat_cov, cmap='OrRd', edgecolors='k' )

    cbar = plt.colorbar()
    cbar.ax.tick_params(labelsize=14)

    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)

    plt.xlabel("Reco "+x_label, fontsize=15)
    plt.ylabel("Reco "+x_label, fontsize=15)

    plt.title('MC Statistical Covariance', fontsize=15)

    plt.show()
    
    fig = plt.figure(figsize=(10, 6))
        
    plt.pcolor(bins, bins, mc_frac_stat_cov, cmap='OrRd', edgecolors='k' )

    cbar = plt.colorbar()
    cbar.ax.tick_params(labelsize=14)

    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)

    plt.xlabel("Reco "+x_label, fontsize=15)
    plt.ylabel("Reco "+x_label, fontsize=15)

    plt.title('MC Fractional Statistical Covariance', fontsize=15)

    plt.show()


### Stat Uncertainty (DATA) -- beam on & EXT

In [None]:
print('need to add in DATA & EXT uncertainty!')

In [None]:
# diagonals are sqrt(N), everything else is 0
# this will be for DATA later on 

# number of events fed into the signal channel 
# n, b, p = plt.hist(datasets_bdt['infv'].query(selected_signal_query)['shr_energy_cali'], bins, histtype='bar', range=[xlow, xhigh], 
#                  weights=datasets_bdt['infv'].query(selected_signal_query).totweight_data)
# plt.close()

# stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

# for i in range(len(bins)-1): 

    #stat_cov[i][i] = 
    
    #if xsec_units: 
    #    stat_cov[i][i] = (n[i]/(n_target*flux))#**2
    
    #else: 
    #    stat_cov[i][i] = n[i]#**2
    
#stat_cov

### MC Response Matrix 

In [None]:

selected_signal_df = datasets_bdt['infv'].query(selected_signal_query).copy()
selected_signal_df['seed'] = selected_signal_df.apply( lambda x: ConcatRunSubRunEvent(x['run'], x['sub'], x['evt']), axis=1 )
selected_signal_df['weightsPoisson'] = selected_signal_df.apply( lambda x: PoissonRandomNumber(x['seed'], mean=1.0, size=1000), axis=1 )


In [None]:
fig = plt.figure(figsize=(8, 5))

# histogram bin counts for all universes
uni_counts = []

for u in range(1000): 

    # multiply in with sys weight of universe u 
    sys_weight = list(selected_signal_df['weightsPoisson'].str.get(u))
    
    total_weight = [ x*y for x, y in zip(sys_weight, selected_signal_df['totweight_data']) ]
        
    # for i in range(len(sys_weight)): 
        #if np.isnan(sys_weight[i])==True: 
        #    print('NaN in ' + sys_var)
                
            # clean the dataset - get rid of nans
        #    sys_weight[i] = 1.0

    
    selected_signal_df['weight_sys'] = total_weight

    # plot variation
    if xsec_units: 

        w_uv = [ (1E39) * wgt/(flux*n_target) for wgt in selected_signal_df['weight_sys'] ] 
        
        n, b, p = plt.hist(selected_signal_df[xvar], bins, histtype='step', weights=w_uv, 
                            linewidth=0.5, color='cornflowerblue')
        
        
        
        uni_counts.append(n/1E39)
                           
    else: 
        n, b, p = plt.hist(selected_signal_df[xvar], bins, histtype='step', weights=selected_signal_df['weight_sys'], 
                            linewidth=0.5, color='cornflowerblue')  
            
        uni_counts.append(n)


if xsec_units: 
    w_cv = [ (1E39) * wgt/(flux*n_target) for wgt in selected_signal_df['totweight_data']] 
    ncv, bcv, pcv = plt.hist(selected_signal_df[xvar], bins, histtype='step', 
                         weights=w_cv, linewidth=2, color='black') 
    
    ncv = ncv/1E39
    
else: 
    ncv, bcv, pcv = plt.hist(selected_signal_df[xvar], bins, histtype='step', 
                         weights=selected_signal_df['totweight_data'], linewidth=2, color='black')      
        
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
        
plt.xlabel('Reco '+x_label, fontsize=15)

        
if xsec_units==False: 
    plt.ylabel("$\\nu$ / $2 \\times 10^{20}$ POT" , fontsize=15)

else: 
    plt.ylabel("$\\tilde{\\sigma}$ [$10^{-39}$cm$^{2}$/nucleon]", fontsize=15)

plt.title('weightsPoisson', fontsize=16)    

        
plt.show()

In [None]:
response_matrix_uncertainty = calcCov(xvar, bins, ncv, uni_counts, 'weightsPoisson', isrun3=ISRUN3, plot=True, 
                   save=False, axis_label='Reco '+x_label, pot=parameters(ISRUN3)['beamon_pot'])

### POT Counting (2%)

In [None]:
# uncertainty on the background-subtracted event rate, like the other systematics

ncv_bkgd = plot_mc(xvar, bins, xlow, xhigh, selected_query, datasets_bdt, ISRUN3, 
            norm='data', sys=False, x_label='Reco '+x_label, pot="$2.0 x 10^{20}$")['background_counts']

In [None]:
pot_counting = pot_unisims(xvar, ncv_total, bins, 0.02, ISRUN3, plot=True, x_label=None, bkgd_cv_counts=ncv_bkgd)

### Dirt (100%)  

In [None]:
# 100%
# vary the dirt interactions by 100% (1 unisim) on the background-subtracted selected event rate


In [None]:
selected_dirt = plot_mc(xvar, bins, xlow, xhigh, 'isDirt==1 and BDT_score>'+str(bdt_score_cut), datasets_bdt, ISRUN3, 
            norm='data', sys=False, x_label='Reco '+x_label, pot="$2.0 x 10^{20}$")['CV']


In [None]:
dirt_uncertainty = dirt_unisim(xvar, bins, ncv_total, selected_dirt, 1.0, ISRUN3, plot=True, 
                               x_label=None, title=None, bkgd_cv_counts=ncv_bkgd)

## All Sources of Uncertainty

In [None]:

frac_cov_dict = {
    'ppfx' : ppfx_dict['frac_cov'], 
    'beamline' : beamline_dict['frac_cov'], 
    'genie_ms' : genie_dict['frac_cov'], 
    # 'genie_us': genie_unisim_dict['frac_cov'], 
    'geant4' : geant4_dict['frac_cov'],
    'detector' : detsys_dict['frac_cov'], 
    'pot_counting' : pot_counting['frac_cov'], 
    'dirt' : dirt_uncertainty['frac_cov'],
    'mc_stat' : mc_frac_stat_cov, 
    'response_matrix' : response_matrix_uncertainty['frac_cov']
}



In [None]:
tot_frac_cov, tot_abs_cov = plotFullCov(frac_cov_dict, xvar, ncv_bkgd_subtracted, bins, xlow, xhigh, save=False, 
                      axis_label='Reco '+x_label, isrun3=ISRUN3, pot='$2.0 x 10^{20}$ POT')

In [None]:
# add ppfx & beamline geometry in quadrature
frac_cov_dict['flux'] = [ [x+y for x,y in zip(a,b)] for a,b in zip(frac_cov_dict['ppfx'], frac_cov_dict['beamline'])]


In [None]:
# add genie in quadrature
# frac_cov_dict['genie_all'] = [ [x+y for x,y in zip(a,b)] for a,b in zip(frac_cov_dict['genie_ms'], frac_cov_dict['genie_us'])]


In [None]:
bincenters = 0.5*(np.array(bins)[1:]+np.array(bins)[:-1])
#colors = ['#6699CC', '#004488', '#EECC66', '#994455', '#997700', '#EE99AA', 'lightskyblue']

fig = plt.figure(figsize=(8, 5))  

plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="Total",
        weights=np.sqrt(np.diagonal(tot_frac_cov)), linewidth=1.5, color='black')

plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="Flux", 
         weights=np.sqrt(np.diagonal(frac_cov_dict['flux'])))

plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="GENIE", 
         weights=np.sqrt(np.diagonal(frac_cov_dict['genie_ms'])))
plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="GEANT4", 
         weights=np.sqrt(np.diagonal(frac_cov_dict['geant4'])))

plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="Detector", 
         weights=np.sqrt(np.diagonal(frac_cov_dict['detector'])))

plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="POT counting",
        weights=np.sqrt(np.diagonal(frac_cov_dict['pot_counting'])))
plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="Dirt",
        weights=np.sqrt(np.diagonal(frac_cov_dict['dirt'])))

plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="Response matrix",
        weights=np.sqrt(np.diagonal(frac_cov_dict['response_matrix'])))
plt.hist(bincenters, bins, histtype='step', range=[bins[0], bins[-1]], label="MC stat",
        weights=np.sqrt(np.diagonal(frac_cov_dict['mc_stat'])))


plt.xticks(fontsize=13)
plt.yticks(fontsize=13)

plt.xlabel('Reco '+x_label, fontsize=15)
plt.ylabel("Fractional Uncertainty", fontsize=15)

plt.xlim(bins[0], xhigh)
plt.ylim(0, .75)

plt.legend(fontsize=13, frameon=False, ncol=2)

#plt.savefig(plots_path+xvar+"_FracUncertainty.pdf", transparent=True, bbox_inches='tight')

plt.show()

## Save covariance to unfolding file 

In [None]:
hcov = TH2D("hcov_tot", "Covariance Matrix vs. Reco "+x_label, 
            len(bins)-1, np.array(bins), len(bins)-1, np.array(bins))

for i in range(len(bincenters)): # i = row (y)
    for j in range(len(bincenters)): # j = column (x) 

        hcov.Fill(bincenters[j], bincenters[i], tot_abs_cov[i][j]) 
        
        #print('x = '+str(j), 'y = '+str(i), 'counts = '+ str(w))


In [None]:
f = ROOT.TFile.Open("/uboone/data/users/kmiller/unfolding/WSVD_"+xvar+"_FHCRUN1_MARCH12.root", "UPDATE")

In [None]:
f.cd()
hcov.Write()
f.Close()

## Data/MC Comparisons -- TK

## NuMI Oscillations (3+1 Model)

In [None]:
# outdated 

x = plot_mc(xvar, [round(0.01*x, 2) for x in range(0, 75, 5)], 0, 0.7, 'BDT_score>0.575', datasets_bdt, ISRUN3, 
        plt_norm='proj', pot='$9.23\\times10^{20}$', ymax=30, x_label='True Neutrino Energy [GeV]', osc='machado_bestfit.csv')

# osc='biggest_variation.csv'

#### Create projected oscillation dictionary 

In [None]:
import json

In [None]:
# to load a stored dictionary 
with open('outdated/FHC_Projected_TrueNeutrinoEnergy.json') as f:
    d = json.load(f)

In [None]:
bins = d['bins']

In [None]:
x = plot_mc('nu_e', bins, 0, 5, selected_query, datasets_bdt, 
            ISRUN3, x_label="Reco $\\nu$ Energy [GeV]", norm='data', pot='$2.0\\times10^{20}$')

In [None]:
oscillation_dict = {}

In [None]:
oscillation_dict['bins'] = bins

In [None]:
pot_scale = 9.23E20/parameters(ISRUN3)['beamon_pot']
print(pot_scale)

In [None]:
oscillation_dict['CV'] = [k*pot_scale for k in x['CV']]

In [None]:
ncv, ppfx_variations = plotSysVariations('nu_e', 'nu_e', bins, bins[0], bins[-1], selected_query, datasets_bdt, 'weightsPPFX',600, 
                                         ISRUN3, plot=True, axis_label='True Neutrino Energy [GeV]', pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=False)

ppfx_dict = calcCov('nu_e', bins, ncv, ppfx_variations, 'weightsPPFX', plot=True, save=False, 
                    axis_label='True Neutrino Energy [GeV] ', pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3, title='Hadron Production')

In [None]:
oscillation_dict['ppfx_cov_frac'] = ppfx_dict['frac_cov']

In [None]:
ncv, genie_variations = plotSysVariations('nu_e', 'nu_e', bins, bins[0], bins[-1], selected_query, datasets_bdt, 'weightsGenie',600, 
                                         ISRUN3, plot=True, axis_label='True Neutrino Energy [GeV]', pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=False)

genie_dict = calcCov('nu_e', bins, ncv, genie_variations, 'weightsGenie', plot=False, save=False, 
                    axis_label='True Neutrino Energy [GeV] ', pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3, title='Hadron Production')

In [None]:
oscillation_dict['genie_cov_frac'] = genie_dict['frac_cov']

In [None]:
ncv, geant4_variations = plotSysVariations('nu_e', 'nu_e', bins, bins[0], bins[-1], selected_query, datasets_bdt, 'weightsReint',1000, 
                                         ISRUN3, plot=True, axis_label='True Neutrino Energy [GeV]', pot='$2.0 x 10^{20}$ POT', 
                                              background_subtraction=False)


geant4_dict = calcCov('nu_e', bins, ncv, geant4_variations, 'weightsReint', plot=False, save=False, 
                    axis_label='True Neutrino Energy [GeV] ', pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3, title='Hadron Production')

In [None]:
oscillation_dict['reint_cov_frac'] = geant4_dict['frac_cov']

In [None]:
## detector variations -- make new file 
recreate_file=True

In [None]:
if recreate_file: 
    for v in list(detvar_run1_fhc.keys()): 
        NuMIDetSysWeights.makehist_detsys(v, ISRUN3, "NuMI_FHC_BDT_DetectorVariations_OscillationAnalysis.root", 'nu_e', 
                                          bins, cut=selected_query, useBDT=True)

In [None]:
detector_variations = NuMIDetSysWeights.plot_variations('nu_e', bins, "NuMI_FHC_BDT_DetectorVariations_OscillationAnalysis.root", 
                                                        ISRUN3, axis_label='True Neutrino Energy', plot=True, background_subtraction=False)

In [None]:
# compute covariance (N=1 for each variation)

detsys_cov = {}

# index in weightsNuMIGeo are offset by -1

for variation in detector_variations.keys(): 
    
    if variation=='CV': 
        continue
    
    # calc covariance for each unisim 
    detsys_cov[variation] = calcCov('nu_e', bins, detector_variations['CV'], [detector_variations[variation]], 'Detector', 
                                    plot=False, save=False, pot='$2.0 x 10^{20}$ POT', isrun3=ISRUN3,
                                   title=variation)

In [None]:
# compute total covariance, correlation, & uncertainty 

cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
frac_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
cor = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for variation in detsys_cov.keys(): 
    
    for i in range(len(bins)-1): 
        for j in range(len(bins)-1):
            
            cov[i][j] = sum([detsys_cov[x]['cov'][i][j] for x in detsys_cov.keys()])
            
            if detector_variations['CV'][i]*detector_variations['CV'][j] != 0: 
                frac_cov[i][j] = cov[i][j]/(detector_variations['CV'][i]*detector_variations['CV'][j])

            
for i in range(len(bins)-1): 
    for j in range(len(bins)-1):
        
        if np.sqrt(cov[i][i])*np.sqrt(cov[j][j]) != 0: 
                cor[i][j] = cov[i][j] / (np.sqrt(cov[i][i])*np.sqrt(cov[j][j]))
            
detsys_dict = {
    'cov' : cov, 
    'frac_cov' : frac_cov,
    'cor' : cor,
    'fractional_uncertainty' : np.sqrt(np.diag(frac_cov))
} 

In [None]:
oscillation_dict['det_cov_frac'] = detsys_dict['frac_cov']

In [None]:
tot_frac_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for source in list(oscillation_dict.keys())[2:]: 
    tot_frac_cov = [ [x+y for x,y in zip(a,b)] for a,b in zip(tot_frac_cov, oscillation_dict[source])]

In [None]:
oscillation_dict['tot_cov_frac'] = tot_frac_cov

In [None]:
# save this dictionary 

with open('mun/FHC_Projected_TrueNeutrinoEnergy_March2022.json', 'w') as f:
    json.dump(oscillation_dict, f)