In [None]:
# Python notebook for producing upstream data/mc comparisons
# by creating slimmed versions of the pandas dataframes

# Doing Run3??
ISRUN3 = True

# load systematic universes? 
isPPFX = False
isGENIE = False
isGEANT = False

In [None]:
import sys

sys.path.insert(0, 'backend_functions')

import importlib

import uproot
import numpy as np
import math

import matplotlib.pyplot as plt
import pandas as pd

import top 
importlib.reload(top)
from top import *



import selection_functions as sf
importlib.reload(sf)
from selection_functions import *

In [None]:
if ISRUN3: 
    chi2_label = "RHC RUN 3"
    save_label = "rhcrun3"
    beamon_pot_str = "5.0 $\\times 10^{20}$"
    
else: 
    chi2_label = "FHC RUN 1"
    save_label = "fhcrun1"
    beamon_pot_str = "2.0 $\\times 10^{20}$"
    

In [None]:
import NuMIGeoWeights
importlib.reload(NuMIGeoWeights)

if ISRUN3: 
    current = "RHC"
    
else: 
    current = "FHC"

numiBeamlineGeoWeights = NuMIGeoWeights.NuMIGeoWeights(current=current) 

In [None]:
# use nue intrinsic? 
NUE_INTRINSIC = True

In [None]:
from datetime import datetime
import time
now = datetime.now()
date_time = now.strftime("%H:%M:%S")
print("date and time:",date_time)

In [None]:
plots_path = parameters(ISRUN3)['plots_path']
plots_path

In [None]:
fold = "nuselection"
tree = "NeutrinoSelectionFilter"

DATA = ""
EXT = ""
OVRLY  = ""
DRT = ""
NUE = ""

In [None]:
if ISRUN3: 
    path = "/uboone/data/users/kmiller/uBNuMI_CCNp/ntuples/run3b/cv/"
    print('path = ', path)
    
        
    # Run 3 RHC
    OVRLY = 'neutrinoselection_filt_run3b_overlay_v7'
    DATA = 'neutrinoselection_filt_run3b_beamon_beamgood_v5'
    EXT = 'neutrinoselection_filt_run3b_beamoff_v5'
    DRT = 'neutrinoselection_filt_run3b_dirt_overlay_v6'
    
    if NUE_INTRINSIC: 
        NUE = 'neutrinoselection_filt_run3b_overlay_intrinsic_v7'
    
else: 
    
    path = "/uboone/data/users/kmiller/uBNuMI_CCNp/ntuples/run1/cv/"
    print('path = ', path)
    
    # Run 1 FHC 
    OVRLY = 'neutrinoselection_filt_run1_overlay_v7'
    EXT = 'neutrinoselection_filt_run1_beamoff_v5'
    DATA = 'neutrinoselection_filt_run1_beamon_beamgood_v5'
    DRT = 'prodgenie_numi_uboone_overlay_dirt_fhc_mcc9_run1_v28_all_snapshot'
    
    if NUE_INTRINSIC: 
        NUE = 'neutrinoselection_filt_run1_overlay_intrinsic_v7'


In [None]:
overlay = uproot.open(path+OVRLY+".root")[fold][tree]
data = uproot.open(path+DATA+".root")[fold][tree]
ext = uproot.open(path+EXT+".root")[fold][tree]
dirt = uproot.open(path+DRT+".root")[fold][tree]  

uproot_v = [overlay,data,ext,dirt]

if NUE_INTRINSIC: 
    nue = uproot.open(path+NUE+".root")[fold][tree]
    uproot_v.append(nue)

In [None]:
variables = [
    "reco_nu_vtx_sce_x","reco_nu_vtx_sce_y","reco_nu_vtx_sce_z",
    #"nslice", 
    #"contained_fraction", 
    "run", "flash_time"
]

# MC only variables
mc_var = ["nu_pdg", "ccnc", 
          "nproton",  "npi0", "npion",
          "true_nu_vtx_x", "true_nu_vtx_y" , "true_nu_vtx_z", 
          "weightSplineTimesTune", "weightTune","ppfx_cv", "swtrig_pre",
          'nu_e', 
          "true_nu_px", "true_nu_py", "true_nu_pz"]#, 'weightsGenie', 'weightsReint']

if isPPFX: 
    mc_var.append('weightsPPFX')
if isGENIE: 
    mc_var.append('weightsGenie')
if isGEANT: 
    mc_var.append('weightsReint')
    

sys_genie_unisim = [
             "knobRPAup", "knobRPAdn", 
             "knobCCMECup", 
             "knobAxFFCCQEup", 
             "knobVecFFCCQEup", 
             "knobDecayAngMECup", 
             "knobThetaDelta2Npiup", 
             "knobThetaDelta2NRadup", 
             "knobNormCCCOHup", 
             "knobNormNCCOHup",   
             "knobxsr_scc_Fv3up",  # these are supposed to be multisims - 10 universes each -- map to pull out
             "knobxsr_scc_Fa3up" ]



In [None]:
ISRUN3

### create slim pandas dataframes

In [None]:
print("start:",datetime.now().strftime("%H:%M:%S"))
overlay = overlay.pandas.df(variables+mc_var+sys_genie_unisim, flatten=False)
print("end:",datetime.now().strftime("%H:%M:%S"))

In [None]:
dirt = dirt.pandas.df(variables+mc_var+sys_genie_unisim[:-2], flatten=False)

In [None]:
dirt['knobxsr_scc_Fv3up'] = 1
dirt['knobxsr_scc_Fa3up'] = 1

In [None]:
if isGEANT: 
    dirt['weightsReint'] = [np.array([1000 for k in range(1000)]) for x in range(len(dirt['weightsReint']))]

In [None]:
if NUE_INTRINSIC: 
    nue = nue.pandas.df(variables+mc_var+sys_genie_unisim, flatten=False)

In [None]:
data = data.pandas.df(variables, flatten=False) 

In [None]:
ext = ext.pandas.df(variables, flatten=False)

In [None]:
for var in mc_var: 
    data[var] = np.nan
    ext[var] = np.nan
    
for var in sys_genie_unisim: 
    data[var] = np.nan
    ext[var] = np.nan

In [None]:
overlay['isDirt'] = False
dirt['isDirt'] = True

if NUE_INTRINSIC: 
    nue['isDirt'] = False


In [None]:
data['isDirt'] = np.nan
ext['isDirt'] = np.nan

In [None]:
mc_df = [overlay, dirt]

if NUE_INTRINSIC: 
    mc_df.append(nue)

In [None]:
for i,df in enumerate(mc_df):
    
    print(i)
    
    # is signal bool 
    #df['is_signal'] = np.where((df.swtrig_pre == 1) 
   #                          & (df.nu_pdg==12) & (df.ccnc==0) & (df.nproton>0) & (df.npion==0) & (df.npi0==0)
   #                          & (10 <= df.true_nu_vtx_x) & (df.true_nu_vtx_x <= 246)
   #                          & (-106 <= df.true_nu_vtx_y) & (df.true_nu_vtx_y <= 106)
   #                          & (10 <= df.true_nu_vtx_z) & (df.true_nu_vtx_z <= 1026), True, False)
    
    # add beamline geometry weights
    df = addAngles(df)
    df['weightsNuMIGeo'] = df.apply( lambda x: numiBeamlineGeoWeights.calculateGeoWeight(x['nu_pdg'],x['nu_e'],x['thbeam']) , axis=1)
  

In [None]:
#print(len(nue.query('is_signal==True'))==len(nue.query(signal)))
#print(len(nue.query('is_signal==False'))==len(nue.query(not_signal)))

In [None]:
#len(nue.query('is_signal==False'))

In [None]:
#len(nue.query(not_signal))

In [None]:
for i,df in enumerate(mc_df):
    
    # bad weights 
    df.loc[ df['weightSplineTimesTune'] <= 0, 'weightSplineTimesTune' ] = 1.
    df.loc[ df['weightSplineTimesTune'] == np.inf, 'weightSplineTimesTune' ] = 1.
    df.loc[ df['weightSplineTimesTune'] > 60, 'weightSplineTimesTune' ] = 1.
    df.loc[ np.isnan(df['weightSplineTimesTune']) == True, 'weightSplineTimesTune' ] = 1.
    
    # bad weights 
    df.loc[ df['weightTune'] <= 0, 'weightTune' ] = 1.
    df.loc[ df['weightTune'] == np.inf, 'weightTune' ] = 1.
    df.loc[ df['weightTune'] > 60, 'weightTune' ] = 1.
    df.loc[ np.isnan(df['weightTune']) == True, 'weightTune' ] = 1.


In [None]:
if isPPFX: 
    for i,df in enumerate(mc_df):
    
        for ievt in range(df.shape[0]): 


            # check for NaNs separately        
            if np.isnan(df['weightsPPFX'].iloc[ievt]).any() == True: 
                df['weightsPPFX'].iloc[ievt][ np.isnan(df['weightsPPFX'].iloc[ievt]) ] = 1000.

            reweightCondition2 = ((df['weightsPPFX'].iloc[ievt] > 60000) | (df['weightsPPFX'].iloc[ievt] < 0)   |
                                 (df['weightsPPFX'].iloc[ievt] == np.inf))
            df['weightsPPFX'].iloc[ievt][ reweightCondition2 ] = 1000.

            # if no variations exist for the event
            if not list(df['weightsPPFX'].iloc[ievt]): 
                df['weightsPPFX'].iloc[ievt] = [1000 for k in range(600)]

In [None]:
for i,df in enumerate(mc_df):
    universes = []

    for evt in df[sys_genie_unisim].values: 
        universes.append( evt )
            
    # CLEAN GENIE UNISIM WEIGHTS & CREATE WEIGHTSGENIEUNISIM LIST 
    for v in sys_genie_unisim: 
        df.loc[ df[v] <= 0, v ] = 1.
        df.loc[ df[v] == np.inf, v ] = 1.
        df.loc[ df[v] > 60, v ] = 1.
        df.loc[ np.isnan(df[v]) == True, v ] = 1.
        
    df['weightsGenieUnisim'] = universes
    
    for ievt in range(df.shape[0]):      
        if np.isnan(df['weightsGenieUnisim'].iloc[ievt]).any() == True: 
            df['weightsGenieUnisim'].iloc[ievt][ np.isnan(df['weightsGenieUnisim'].iloc[ievt]) ] = 1.

        reweightCondition = ((df['weightsGenieUnisim'].iloc[ievt] > 60) | (df['weightsGenieUnisim'].iloc[ievt] < 0)  | 
                                 (df['weightsGenieUnisim'].iloc[ievt] == np.inf) | (df['weightsGenieUnisim'].iloc[ievt] == np.nan))
        df['weightsGenieUnisim'].iloc[ievt][ reweightCondition ] = 1.
    
        

In [None]:
if isGEANT: 
    for i,df in enumerate(mc_df):

        print(i)
        print("start:",datetime.now().strftime("%H:%M:%S"))

        for ievt in range(df.shape[0]): #loop over the rows (events)

            # RE-INTERACTION WEIGHTS

            # turn everything into an array 
            if type(df['weightsReint'].iloc[ievt]) == list: 
                df['weightsReint'].iloc[ievt] = np.array(df['weightsReint'].iloc[ievt])

            # check for NaNs separately 
            if np.isnan(df['weightsReint'].iloc[ievt]).any() == True: # if any of the weights are NaN
                df['weightsReint'].iloc[ievt][ np.isnan(df['weightsReint'].iloc[ievt]) ] = 1000. 


            reweightCondition2 = ((df['weightsReint'].iloc[ievt] > 60000) | (df['weightsReint'].iloc[ievt] < 0)   |
                                 (df['weightsReint'].iloc[ievt] == np.inf))
            df['weightsReint'].iloc[ievt][ reweightCondition2 ] = 1000.


            # if no variations exist for the event
            if not list(df['weightsReint'].iloc[ievt]): 
                df['weightsReint'].loc[ievt] = np.array([1000 for k in range(1000)])


In [None]:
if isGENIE: 
    for i,df in enumerate(mc_df):

        for ievt in range(df.shape[0]): 

            # check for NaNs separately        
            if np.isnan(df['weightsGenie'].iloc[ievt]).any() == True: 
                df['weightsGenie'].iloc[ievt][ np.isnan(df['weightsGenie'].iloc[ievt]) ] = 1000.

            reweightCondition2 = ((df['weightsGenie'].iloc[ievt] > 60000) | (df['weightsGenie'].iloc[ievt] < 0)   |
                                 (df['weightsGenie'].iloc[ievt] == np.inf))
            df['weightsGenie'].iloc[ievt][ reweightCondition2 ] = 1000.

            # if no variations exist for the event
            if not list(df['weightsGenie'].iloc[ievt]): 
                df['weightsGenie'].iloc[ievt] = [1000 for k in range(600)]

In [None]:
for i,df in enumerate(mc_df):

    # get right order of magnitude for multiverses
    if isPPFX: 
        df['weightsPPFX'] = df['weightsPPFX']/1000
    if isGENIE: 
        df['weightsGenie'] = df['weightsGenie']/1000
    if isGEANT: 
        df['weightsReint'] = df['weightsReint']/1000

In [None]:
overlay = overlay.query('swtrig_pre==1')
dirt = dirt.query('swtrig_pre==1')

if NUE_INTRINSIC: 
    nue = nue.query('swtrig_pre==1')


In [None]:
#print(len(nue.query('is_signal==True'))==len(nue.query(signal)))
#print(len(nue.query('is_signal==False'))==len(nue.query(not_signal)))

In [None]:
ISRUN3

### POT Normalization 

In [None]:
importlib.reload(top)
from top import * 

In [None]:
tune = True

In [None]:
ext = pot_scale(ext, 'ext', ISRUN3, tune=tune)

In [None]:
data['pot_scale'] = [1 for x in range(len(data))]

In [None]:
beamon_pot = parameters(ISRUN3)['beamon_pot'] 

overlay = pot_scale(overlay, 'overlay', ISRUN3, tune=tune)
dirt = pot_scale(dirt, 'dirt', ISRUN3, tune=tune)


if NUE_INTRINSIC: 
    nue = pot_scale(nue, 'intrinsic', ISRUN3, tune=tune)



In [None]:
# totweight_data scales to BEAMON

overlay['totweight_data'] = overlay['pot_scale']*overlay['ppfx_cv']*overlay['weightSplineTimesTune']
dirt['totweight_data'] = dirt['pot_scale']*dirt['ppfx_cv']*dirt['weightSplineTimesTune']

if NUE_INTRINSIC: 
    nue['totweight_data'] = nue['pot_scale']*nue['ppfx_cv']*nue['weightSplineTimesTune']

In [None]:
data['totweight_data'] = np.nan
ext['totweight_data'] = np.nan

In [None]:
nueCC_query

In [None]:
# replace nueCC events 

if NUE_INTRINSIC: 
    
    print("# of nueCC in AV in overlay sample = "+str(len(overlay.query(nueCC_query))))
    len1 = len(overlay)
    
    idx = overlay.query(nueCC_query).index
    overlay.drop(idx, inplace=True)
    len2 = len(overlay) 
    print("# of nueCC in AV dropped in overlay = "+str(len1-len2))
    
    overlay = pd.concat([overlay,nue], ignore_index=True)

    # from here on out everything else should be the same. 

In [None]:
# apply SW trigger, combine overlay + dirt as MC 
mc = pd.concat([overlay.query('swtrig_pre==1'),dirt.query('swtrig_pre==1')], ignore_index=True, sort=True)

In [None]:
infv = mc.query(in_fv_query)
outfv = mc.query(out_fv_query)

In [None]:
# check that everything is accounted for 
print(len(mc)==len(infv)+len(outfv))

if not (len(mc)==len(infv)+len(outfv)):
    d = len(mc) - (len(infv)+len(outfv))
    print(d)
    
     
    m = pd.concat([infv, outfv]) 
    diff = np.setdiff1d(list(mc.index),list(m.index))


In [None]:
#tot_signal_weighted = np.nansum(mc.query('is_signal==True')['totweight_data'])
#print('total signal events in FV = '+ str(tot_signal_weighted))

In [None]:
datasets = {
    "infv": infv, 
    "outfv": outfv,
    "ext": ext,
    "data": data
}

### Flash time plot

In [None]:
# 0.359 us shift between beam on & beam off hardware trigger 

overlay['flash_time'] = overlay['flash_time']  - 0.359
dirt['flash_time'] = dirt['flash_time']  - 0.359
ext['flash_time'] = ext['flash_time'] - 0.359

In [None]:
ISRUN3

In [None]:
tune

In [None]:
n_data, b_data, p_data = plt.hist(data['flash_time'], 50, range=[0, 25])
data_bins = 0.5*(b_data[1:]+b_data[:-1])
plt.close()

x_err = [ (b_data[i+1]-b_data[i])/2 for i in range(len(b_data)-1) ]

gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1])

fig = plt.figure(figsize=(8, 7))

ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
    
ax1.tick_params(axis = 'both', which = 'major', labelsize = 14)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 14)

n = ax1.hist([ext['flash_time'], 
              overlay['flash_time'], 
              dirt['flash_time']], 50, range=[0, 25], stacked=True, 
            weights=[ ext['pot_scale'], 
                 overlay['ppfx_cv']*overlay['weightSplineTimesTune']*overlay['pot_scale'], 
                 dirt['ppfx_cv']*dirt['weightSplineTimesTune']*dirt['pot_scale']], 
         color=['navajowhite', 'limegreen', 'peru'], 
         label=['EXT', 'In Cryo MC', 'Dirt'])[0]

ax1.errorbar(data_bins, n_data, yerr=np.sqrt(n_data), xerr=x_err, 
             color="black", fmt='o', markersize=3, label='DATA')


ax1.legend(fontsize=12)

#ax2.yaxis.grid(linestyle="--", color='black', alpha=0.7)
ax2.axhline(1, color='black', lw=1, linestyle='--')
ax2.set_ylim(0.8, 1.2)

ax2.errorbar(data_bins, n_data/n[-1], 
             yerr=get_ratio_err(n_data, n[-1]), xerr=x_err, 
             color="black", fmt='.')


if ISRUN3: 
    ax1.set_ylabel('Events / 5$\\times10^{20}$ POT', fontsize=15)
else: 
    ax1.set_ylabel('Events / 2$\\times10^{20}$ POT', fontsize=15)

ax2.set_xlabel('Flash Time [$\\mu$s]', fontsize=15)

ax1.set_xlim(0, 25)
ax2.set_xlim(0, 25)

plt.tight_layout()

ax1.set_title("", fontsize=15)

if ISRUN3: 
    if tune: 
        ax1.set_title("RHC Run 3: Flash Time (98% EXT Tune and 45% Dirt Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"RHCRUN3_flashtime_full_tune.pdf", transparent=True, bbox_inches='tight')
    else: 
        ax1.set_title("RHC Run 3: Flash Time (No Dirt/EXT Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"RHCRUN3_flashtime_full_notune.pdf", transparent=True, bbox_inches='tight')       

else: 
    if tune: 
        ax1.set_title("FHC Run 1: Flash Time (98% EXT Tune and 65% Dirt Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"FHCRUN1_flashtime_full_tune.pdf", transparent=True, bbox_inches='tight')
    else: 
        ax1.set_title("FHC Run 1: Flash Time (No Dirt/EXT Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"FHCRUN1_flashtime_full_notune.pdf", transparent=True, bbox_inches='tight')

plt.show()

In [None]:
parameters(ISRUN3)['plots_path']

In [None]:
n_data, b_data, p_data = plt.hist(data['flash_time'], 9, range=[1, 5.5])
data_bins = 0.5*(b_data[1:]+b_data[:-1])
plt.close()

x_err = [ (b_data[i+1]-b_data[i])/2 for i in range(len(b_data)-1) ]

gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1])

fig = plt.figure(figsize=(8, 7))

ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
    
ax1.tick_params(axis = 'both', which = 'major', labelsize = 14)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 14)

n = ax1.hist([ext['flash_time'], 
              overlay['flash_time'], 
              dirt['flash_time']], 9, range=[1, 5.5], stacked=True, 
            weights=[ ext['pot_scale'], 
                 overlay['ppfx_cv']*overlay['weightSplineTimesTune']*overlay['pot_scale'], 
                 dirt['ppfx_cv']*dirt['weightSplineTimesTune']*dirt['pot_scale']], 
         color=['navajowhite', 'limegreen', 'peru'], 
         label=['EXT', 'In Cryo MC', 'Dirt'])[0]



ax1.errorbar(data_bins, n_data, yerr=np.sqrt(n_data), xerr=x_err, 
             color="black", fmt='o', markersize=3, label='DATA')

ax1.legend(fontsize=13)

#ax2.yaxis.grid(linestyle="--", color='black', alpha=0.7)
ax2.axhline(1.0, color='black', lw=1, linestyle='--')
#ax2.axhline(np.average(n_data/n[-1]), color='black', lw=1, linestyle='--', 
#            label='Average = '+str( round(np.average((n_data/n[-1])), 3) ))
ax2.set_ylim(0.8, 1.2)

ax2.errorbar(data_bins, n_data/n[-1], 
             yerr=get_ratio_err(n_data, n[-1]), xerr=x_err, 
             color="black", fmt='.', label='Average = '+str( round(np.average((n_data/n[-1])), 2)))

ax2.legend(fontsize=13)

if ISRUN3: 
    ax1.set_ylabel('Events / 5$\\times10^{20}$ POT', fontsize=15)
else: 
    ax1.set_ylabel('Events / 2$\\times10^{20}$ POT', fontsize=15)

ax2.set_xlabel('Flash Time [$\\mu$s]', fontsize=15)



ax1.set_xlim(1, 5.5)
ax2.set_xlim(1, 5.5)

plt.tight_layout()
       

if ISRUN3: 
    if tune: 
        ax1.set_title("RHC Run 3: Flash Time (98% EXT Tune and 45% Dirt Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"RHCRUN3_flashtime_ext_tune.pdf", transparent=True, bbox_inches='tight')
    else: 
        ax1.set_title("RHC Run 3: Flash Time (No Dirt/EXT Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"RHCRUN3_flashtime_ext_notune.pdf", transparent=True, bbox_inches='tight')       

else: 
    if tune: 
        ax1.set_title("FHC Run 1: Flash Time (98% EXT Tune and 65% Dirt Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"FHCRUN1_flashtime_ext_tune.pdf", transparent=True, bbox_inches='tight')
    else: 
        ax1.set_title("FHC Run 1: Flash Time (No Dirt/EXT Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"FHCRUN1_flashtime_ext_notune.pdf", transparent=True, bbox_inches='tight')


plt.show()

In [None]:
n_data, b_data, p_data = plt.hist(data['flash_time'], 1, range=[5.64, 15.44])
data_bins = 0.5*(b_data[1:]+b_data[:-1])
plt.close()

x_err = [ (b_data[i+1]-b_data[i])/2 for i in range(len(b_data)-1) ]

gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1])

fig = plt.figure(figsize=(8, 7))

ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
    
ax1.tick_params(axis = 'both', which = 'major', labelsize = 14)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 14)

n = ax1.hist([ext['flash_time'], overlay['flash_time'], dirt['flash_time']], 1, range=[5.64, 15.44], stacked=True, 
            weights=[ ext['pot_scale'], 
                 overlay['ppfx_cv']*overlay['weightSplineTimesTune']*overlay['pot_scale'], 
                 dirt['ppfx_cv']*dirt['weightSplineTimesTune']*dirt['pot_scale']], 
         color=['navajowhite', 'limegreen', 'peru'], 
         label=['EXT', 'In Cryo MC', 'Dirt'])[0]


ax1.errorbar(data_bins, n_data, yerr=np.sqrt(n_data), xerr=x_err, 
             color="black", fmt='o', markersize=3, label='DATA')

ax1.legend(fontsize=13)

#ax2.yaxis.grid(linestyle="--", color='black', alpha=0.7)
ax2.axhline(1.0, color='black', lw=1, linestyle='--')
ax2.set_ylim(0.8, 1.2)

ax2.errorbar(data_bins, n_data/n[-1], 
             yerr=get_ratio_err(n_data, n[-1]), xerr=x_err, 
             color="black", fmt='.', label='Average = '+str(round((n_data/n[-1])[0], 2)))

if ISRUN3: 
    ax1.set_ylabel('Events / 5$\\times10^{20}$ POT', fontsize=15)
else: 
    ax1.set_ylabel('Events / 2$\\times10^{20}$ POT', fontsize=15)
    
ax2.set_xlabel('Flash Time [$\\mu$s]', fontsize=15)


ax2.legend(fontsize=13)
ax1.set_xlim(5.64, 15.44)
ax2.set_xlim(5.64, 15.44)

plt.tight_layout()


if ISRUN3: 
    if tune: 
        ax1.set_title("RHC Run 3: Flash Time (98% EXT Tune and 45% Dirt Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"RHCRUN3_window_tune.pdf", transparent=True, bbox_inches='tight')
    else: 
        ax1.set_title("RHC Run 3: Flash Time (No Dirt/EXT Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"RHCRUN3_window_notune.pdf", transparent=True, bbox_inches='tight')       

else: 
    if tune: 
        ax1.set_title("FHC Run 1: Flash Time (98% EXT Tune and 65% Dirt Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"FHCRUN1_window_tune.pdf", transparent=True, bbox_inches='tight')
    else: 
        ax1.set_title("FHC Run 1: Flash Time (No Dirt/EXT Tune)", fontsize=15)
        plt.savefig(parameters(ISRUN3)['plots_path']+"FHCRUN1_window_notune.pdf", transparent=True, bbox_inches='tight')


plt.show()

In [None]:
ISRUN3

In [None]:
ax1.hist([ext['flash_time'], 
              overlay['flash_time'], 
              dirt['flash_time']], 50, range=[0, 25], stacked=True, 
            weights=[ ext['pot_scale'], 
                 overlay['ppfx_cv']*overlay['weightSplineTimesTune']*overlay['pot_scale'], 
                 dirt['ppfx_cv']*dirt['weightSplineTimesTune']*dirt['pot_scale']], 
         color=['navajowhite', 'limegreen', 'peru'], 
         label=['EXT', 'In Cryo MC', 'Dirt'])[1]

### Early Selection Variables

In [None]:
ISRUN3

In [None]:
xvar = 'flash_time'
bins = [x*0.5 for x in range(51)]
x_label = 'Flash Time'

#xvar = 'nslice'
#bins = [-0.5, 0.5, 1.5]
#x_label = "Pandora Slice ID"

#xvar = "reco_nu_vtx_sce_x"
#bins = [x*10 for x in range(27)]
#x_label = 'Reconstructed Interaction Vertex (X) [cm]'

#xvar = "reco_nu_vtx_sce_y"
#bins = [-120, -110, -100, -90, -80, -70, -60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120]
#x_label = 'Reconstructed Interaction Vertex (Y) [cm]'

#xvar = 'reco_nu_vtx_sce_z'
#bins = [x*40 for x in range(27)]
#x_label = 'Reconstructed Interaction Vertex (Z) [cm]'

#xvar = 'contained_fraction'
#bins = [0, .10, .20, .30, .40, .50, .60, .70, .80, .90, 1]
#x_label = "Contained Fraction"

true_var = ''
xlow = bins[0]
xhigh = bins[-1]

q = ""#"nslice==1"# and "+reco_in_fv_query


In [None]:
ncv_total = plot_mc(xvar, bins, xlow, xhigh, q, datasets, ISRUN3, norm='data')['CV']


### PPFX, GENIE, GEANT4 multisims

In [None]:
import uncertainty_functions 
importlib.reload(uncertainty_functions)
from uncertainty_functions import plotSysVariations

In [None]:
overlay['weightsPPFX']

In [None]:
#ncv, geant4_variations = plotSysVariations(true_var, xvar, bins, xlow, xhigh, '', datasets, 'weightsReint', 1000, 
#                                         ISRUN3, plot=False, background_subtraction=False)

print("start:",datetime.now().strftime("%H:%M:%S"))

ncv, ppfx_variations = plotSysVariations(xvar, true_var, bins, xlow, xhigh, q, datasets, 
                                           'weightsPPFX', 600, ISRUN3, plot=True)

#ncv, genie_variations = plotSysVariations(xvar, true_var, bins, xlow, xhigh, q, datasets, 'weightsGenie', 600, 
#                                         ISRUN3, plot=True, background_subtraction=False)

#ncv, geant4_variations = plotSysVariations(xvar, true_var, bins, xlow, xhigh, q, datasets, 'weightsReint', 1000, 
#                                         ISRUN3, plot=True, background_subtraction=False)

print("end:",datetime.now().strftime("%H:%M:%S"))

In [None]:
frac = calcCov(xvar, bins, ncv, ncv_total, ppfx_variations, plot=False, save=False,isrun3=ISRUN3)['fractional_uncertainty']
frac

In [None]:
cov = calcCov(xvar, bins, ncv, ncv_total, ppfx_variations, plot=False, save=False,isrun3=ISRUN3)['cov']

In [None]:
np.array(cov)

### GENIE unisims

In [None]:
# divide the tune weight out of everything except SCC variations
# don't divide the tune weight out of SCC variations 

genie_unisim_variations = ['RPA', 
                           'CCMEC', 'AxFFCCQE', 'VecFFCCQE', 'DecayAngMEC', 'ThetaDelta2Npi', 'ThetaDelta2NRad', 
                          'NormCCCOH', 'NormNCCOH', 
                          'xsr_scc_Fv3', 'xsr_scc_Fa3']


genie_unisim_cov = {}


for knob in genie_unisim_variations: 
    
    if knob == 'RPA': 
        idx = [sys_genie_unisim.index('knobRPAup'), sys_genie_unisim.index('knobRPAdn')]
    
    else: 
        idx = [sys_genie_unisim.index('knob'+knob+'up')]
    
    ncv_nu, variations = plotSysVariations(xvar, true_var, bins, xlow, xhigh, q, datasets, 'weightsGenieUnisim', 
                                        idx, ISRUN3, plot=True, axis_label='Reco '+x_label, 
                                        pot=str(beamon_pot)+" POT", 
                                        background_subtraction=False, title=knob)
    
    # calc covariance 
    genie_unisim_cov[knob] = calcCov(xvar, bins, ncv_nu, ncv_total, variations, save=False, 
                    axis_label='Reco '+x_label, pot=str(beamon_pot)+" POT", isrun3=ISRUN3)

In [None]:
# compute total covariance, correlation, & uncertainty 

cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
frac_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
cor = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for variation in genie_unisim_cov.keys(): 
    
    for i in range(len(bins)-1): 
        for j in range(len(bins)-1):
            
            cov[i][j] += genie_unisim_cov[variation]['cov'][i][j]
            frac_cov[i][j] += genie_unisim_cov[variation]['frac_cov'][i][j] 

            
for i in range(len(bins)-1): 
    for j in range(len(bins)-1):
        
        if np.sqrt(cov[i][i])*np.sqrt(cov[j][j]) != 0: 
                cor[i][j] = cov[i][j] / (np.sqrt(cov[i][i])*np.sqrt(cov[j][j]))
        
            
genie_unisim_dict = {
    'cov' : cov, 
    'frac_cov' : frac_cov,
    'cor' : cor,
    'fractional_uncertainty' : np.sqrt(np.diag(frac_cov))
}

### beamline geometry

In [None]:
# ordered by beamline variation run number
# [+1sigma run #, -1sigma run #]

beamline_runs = {
    'HornCurrent' : [1, 2], 
    'xHorn1' : [3, 4], 
    'yHorn1' : [5, 6], 
    'BeamSpotSize' : [7, 8], 
    'xHorn2' : [9, 10], 
    'yHorn2' : [11, 12], 
    'WaterOnHorns' : [13, 14], 
    'xBeamShift' : [15, 16], 
    'yBeamShift' : [17, 18], 
    'zTargetPosition' : [19, 20]    
}

beamline_cov = {}

# index in weightsNuMIGeo are offset by -1

for variation in beamline_runs.keys(): 
    
    idx = [i-1 for i in beamline_runs[variation]]
    print(idx)
    
    ncv_nu, beamline_variations = plotSysVariations(xvar, true_var, bins, xlow, xhigh, q, datasets, 'weightsNuMIGeo', 
                                                 idx, ISRUN3, plot=True, 
                                                 axis_label='Reco '+x_label, pot=str(beamon_pot)+" POT", 
                                                  background_subtraction=False)
    
    # calc covariance 
    beamline_cov[variation] = calcCov(xvar, bins, ncv_nu, ncv_total, 
                                      beamline_variations, save=False, isrun3=ISRUN3)
    


In [None]:
# compute total covariance, correlation, & uncertainty 

cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
frac_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
cor = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for variation in beamline_cov.keys(): 
    
    for i in range(len(bins)-1): 
        for j in range(len(bins)-1):
            
            cov[i][j] += beamline_cov[variation]['cov'][i][j]
            frac_cov[i][j] += beamline_cov[variation]['frac_cov'][i][j] 

            
for i in range(len(bins)-1): 
    for j in range(len(bins)-1):
        
        if np.sqrt(cov[i][i])*np.sqrt(cov[j][j]) != 0: 
                cor[i][j] = cov[i][j] / (np.sqrt(cov[i][i])*np.sqrt(cov[j][j]))
            
beamline_dict = {
    'cov' : cov, 
    'frac_cov' : frac_cov,
    'cor' : cor,
    'fractional_uncertainty' : np.sqrt(np.diag(frac_cov))
} 

beamline_dict['fractional_uncertainty']

### stat uncertainty, POT counting, dirt uncertainty

In [None]:

print("Make sure to update query!")

In [None]:
# doesn't include EXT uncertainty 

print("Make sure to update query!")

mc_stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
mc_frac_stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]


ncv = pd.concat([datasets['infv'].copy().query(q), 
                         datasets['outfv'].copy().query(q)], 
                ignore_index=True) 

    
for i in range(len(bins)-1):

    if i==len(bins)-2: 
        bin_query = xvar+' >= '+str(bins[i])+' and '+xvar+' <= '+str(bins[i+1])
    else: 
        bin_query = xvar+' >= '+str(bins[i])+' and '+xvar+' < '+str(bins[i+1])
        
    mc_stat_cov[i][i] = sum(ncv.query(bin_query).totweight_data ** 2) 
    mc_frac_stat_cov[i][i] = mc_stat_cov[i][i]/ ncv_total[i]**2 
    
    bin_query = ''
    
mc_stat_percent_error = np.sqrt(np.diag(mc_frac_stat_cov))


In [None]:
print("MAKE SURE TO UPDATE QUERY!")

# selected EXT uncertainty 
selected_ext = plt.hist(datasets['ext'].copy()[xvar].query(q)[xvar], 
                        bins, 
                        weights=datasets['ext'].copy().query(q)['pot_scale'], 
                        color='gainsboro')[0]
plt.show()
#selected_ext

In [None]:
# take fractional with respect to the full event rate

ext_frac_stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
ext_stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for i in range(len(bins)-1): 
    
    if selected_ext[i] != 0: 
        ext_stat_cov[i][i] = selected_ext[i]
        ext_frac_stat_cov[i][i] = selected_ext[i]/(ncv_total[i]**2)

ext_stat_percent_error = np.sqrt(np.diag(ext_frac_stat_cov))


In [None]:
print(" make sure to update query !! ")


selected_dirt = plt.hist(datasets['outfv'].copy().query('isDirt==1')[xvar], 
                         bins, 
                        weights=datasets['outfv'].copy().query('isDirt==1')['pot_scale'], 
                         color='orchid')[0]

dirt_uncertainty = dirt_unisim(xvar, bins, ncv_total, selected_dirt, 1.0, ISRUN3, plot=True, 
                               x_label=None, title=None)



In [None]:
# pot counting
pot_dict = pot_unisims(xvar, ncv_total, bins, 0.02, ISRUN3, plot=True, x_label=None)

In [None]:
ISRUN3

### Plotting 

In [None]:
importlib.reload(sf)
from selection_functions import *

In [None]:
import json

In [None]:
# covariance dictionary

if ISRUN3: 
    with open('covariances/rhc_'+xvar+'_123022.json') as f_cov:
        cov_dict = json.load(f_cov)
    
else: 
    with open('covariances/fhc_'+xvar+'_123022.json') as f_cov:
        cov_dict = json.load(f_cov)
        
# need to compute beam-on stat uncertainty 

cov_dict.keys()


In [None]:
## total covariance 
tot_sim_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for key in cov_dict.keys(): 
    tot_sim_cov = [ [x+y for x,y in zip(a,b)] for a,b in zip(tot_sim_cov, cov_dict[key]) ]
    
cov_dict['total'] = tot_sim_cov


In [None]:
# total fractional covariance 
tot_sim_frac_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]

for i in range(len(bins)-1): 
    for j in range(len(bins)-1): 
        tot_sim_frac_cov[i][j] = tot_sim_cov[i][j]/(ncv_total[i]*ncv_total[j])
        
np.sqrt(np.diagonal(tot_sim_frac_cov))

In [None]:
frac_unc_dict = {} # on the simulation only 

for key in cov_dict.keys():
    frac_unc_dict[key] = [ 0 for x in range(len(bins)-1) ]
    
    for i in range(len(bins)-1): 
        frac_unc_dict[key][i] = np.sqrt(cov_dict[key][i][i]/(ncv_total[i]*ncv_total[i]))
    
np.array(frac_unc_dict['total'])

In [None]:
## compute the chi2 

if q == "":
    selected_data = plt.hist(datasets['data'].copy()[xvar], bins)[0]
    plt.close()
    
else: 
    selected_data = plt.hist(datasets['data'].copy().query(q)[xvar], bins)[0]
    plt.close()

#  make sure to include the beam on stat covariance! 

beamon_frac_stat_cov = [ [0]*(len(bins)-1) for x in range(len(bins)-1) ]
for i in range(len(bins)-1): 
    if selected_data[i] != 0: 
        beamon_frac_stat_cov[i][i] = selected_data[i]/(selected_data[i]**2)

tot_cov = np.array(tot_sim_frac_cov)+np.array(beamon_frac_stat_cov)

for i in range(len(bins)-1): 
     for j in range(len(bins)-1): 
            tot_cov[i][j] = tot_cov[i][j] * (ncv_total[i] * ncv_total[j])

tot_inverse_cov = np.linalg.inv(tot_cov)

## check 
plt.pcolor(bins, bins, np.matmul(tot_cov, tot_inverse_cov), cmap='OrRd', edgecolors='k')
plt.xlim(xlow,xhigh)
plt.ylim(xlow,xhigh)
cbar = plt.colorbar()
plt.show()


chi2 = 0

for i in range(len(bins)-1):  
    for j in range(len(bins)-1):  
            chi2 = chi2  + ( (ncv_total[i]-selected_data[i])*tot_inverse_cov[i][j]*(ncv_total[j]-selected_data[j]) )
chi2

In [None]:
print("make sure to update save label!")
print("save label = ", save_label)

In [None]:
if xvar=="nslice": 
    d = plot_data(xvar, bins, xlow, xhigh, q, datasets, ISRUN3,
                  save=True, save_label=save_label, x_label=x_label, ncol=3, 
                  y_label=beamon_pot_str, x_ticks=[0,1],ymax=900000, 
                  sys=frac_unc_dict['total'], 
                  text=chi2_label+"\n$\\chi^{2}$/n = "+str(round(chi2, 1))+"/"+str(len(bins)-1),  
                  xtext=1.4, ytext=400000)


else: 
    
    d = plot_data(xvar, bins, xlow, xhigh, q, datasets, ISRUN3,
                      save=True, save_label=save_label, x_label=x_label, ncol=3, 
                      y_label=beamon_pot_str, 
                      sys=frac_unc_dict['total'], 
                      text=chi2_label+"\n$\\chi^{2}$/n = "+str(round(chi2, 1))+"/"+str(len(bins)-1),  
                      xtext=245, ytext=7000)


## create json file 

In [None]:
import os
import json
from datetime import date

In [None]:
if not ISRUN3: 
    with open('covariances/fhc_'+xvar+"_"+date.today().strftime("%m%d%y")+".json", 'w') as f:
        json.dump(cov_dict, f)

elif ISRUN3: 
    with open('covariances/rhc_'+xvar+"_"+date.today().strftime("%m%d%y")+".json", 'w') as f:
        json.dump(cov_dict, f)