# Analysis for Differential Jet Mass in Z+Jets events with NanoAODv9

In [1]:
import awkward as ak
import numpy as np
import time
import coffea
import uproot
import hist
import vector
print("awkward version ", ak.__version__)
print("coffea version ", coffea.__version__)
from coffea import util, processor
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema, BaseSchema
from collections import defaultdict
import pickle
import matplotlib.pyplot as plt

awkward version  1.10.1
coffea version  0.7.19


### Automatically reload any modules used

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from smp_utils import *
from response_maker_nanov9_lib import *
from response_maker_nanov9 import *

### If you want to rerun, here is the line. Otherwise keep it commented to just read the pkl file. 

In [4]:
#response_maker_nanov9(testing=False, do_gen=False, client=None, prependstr="/mnt/data/cms")
#response_maker_nanov9(testing=False, do_gen=True, client=client)

### Open pkl file for the output histograms

In [5]:
with open("qjetmass_zjets_gen.pkl", "rb") as f:
    output_gen = pickle.load( f )

FileNotFoundError: [Errno 2] No such file or directory: 'qjetmass_zjets_gen.pkl'

In [None]:
with open("qjetmass_zjets_reco.pkl", "rb") as f:
    output_reco = pickle.load( f )

In [None]:
output_gen["cutflow"]

In [None]:
output_reco["cutflow"]

In [None]:
datasets_gen = list( output_gen["cutflow"].keys() )
datasets_reco = list( output_reco["cutflow"].keys() )

In [None]:
histnames_gen = output_gen.keys()
histnames_reco = output_reco.keys()

In [None]:
print(histnames_gen)
print("========")
print(histnames_reco)

### Normalize all of the histograms to the sum of the number of events

In [None]:
for ihist,ihistname in enumerate(histnames_gen):
    if ihistname == "cutflow" : continue
    #print(ihist)
    #print(ihistname)
    for i,dataset in enumerate(datasets_gen) :
        #print(i)
        #print(dataset)
        if np.sum(output_gen[ihistname].values(flow=True)) > 0:
            output_gen[ihistname].view(flow=True)[i] /= output_gen["cutflow"][dataset]['all events']

In [None]:
for ihist,ihistname in enumerate(histnames_reco):
    if ihistname == "cutflow" : continue
    #print(ihist)
    #print(ihistname)
    for i,dataset in enumerate(datasets_reco) :
        #print(i)
        #print(dataset)
        if np.sum(output_reco[ihistname].values(flow=True)) > 0:
            output_reco[ihistname].view(flow=True)[i] /= output_reco["cutflow"][dataset]['trigsel']

# Make plots

In [None]:
import matplotlib.pyplot as plt

In [None]:
output_gen['ptjet_mjet_u_gen'].project("ptgen").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_mjet_u_gen'].project("ptgen").plot()
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_mjet_u_gen'].project("mgen").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_mjet_g_gen'].project("mgen").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
response_matrix_u_values = output_gen['response_matrix_u'].project("ptreco", "mreco", "ptgen", "mgen").values()
response_matrix_g_values = output_gen['response_matrix_g'].project("ptreco", "mreco", "ptgen", "mgen").values()

In [None]:
nptreco,nmassreco,nptgen,nmassgen = response_matrix_u_values.shape
response_matrix_u_final = response_matrix_u_values.reshape( (nptreco)*(nmassreco), (nptgen)*(nmassgen) )

In [None]:
plt.imshow( np.log(response_matrix_u_final+1), vmax=1e-3, aspect="equal", cmap="binary" )

In [None]:
response_matrix_g_final = response_matrix_g_values.reshape( (nptreco)*(nmassreco), (nptgen)*(nmassgen) )

In [None]:
plt.imshow( np.log(response_matrix_g_final+1), vmax=1e-3, aspect="equal", cmap="binary" )

In [None]:
output_gen['ptjet_gen_pre'].project("pt").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_mjet_u_gen'].project("ptgen").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_mjet_u_reco'].project("ptreco").plot(density=True)
output_reco['ptjet_mjet_u_reco'].project("ptreco").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_mjet_u_reco'].project("mreco").plot(density=True)
output_reco['ptjet_mjet_u_reco'].project("mreco").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_mjet_g_reco'].project("mreco").plot(density=True)
output_reco['ptjet_mjet_g_reco'].project("mreco").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptjet_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['drjet_reco_gen'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['mz_gen'].project("mass").plot(density=True)
#plt.yscale("log")
plt.show()

In [None]:
output_gen['mz_reco'].project("mass").plot(density=True)
output_reco['mz_reco'].project("mass").plot(density=True)
#plt.yscale("log")
plt.show()

In [None]:
output_gen['mz_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['dphi_z_jet_gen'].project("dphi").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptasym_z_jet_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['ptasym_z_jet_reco'].project("frac").plot(density=True)
output_reco['ptasym_z_jet_reco'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['dr_z_jet_gen'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['dr_z_jet_reco'].project("dr").plot(density=True)
output_reco['dr_z_jet_reco'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['dphi_z_jet_reco'].project("dphi").plot(density=True)
output_reco['dphi_z_jet_reco'].project("dphi").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['m_u_jet_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['m_g_jet_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['drjet_reco_gen'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output_gen['dr_gen_subjet'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
resolutions_u = output_gen['m_u_jet_reco_over_gen'].project("ptgen", "mgen", "frac")
resolutions_g = output_gen['m_g_jet_reco_over_gen'].project("ptgen", "mgen", "frac")

In [None]:
nptbins,nmassbins,nfracbins = resolutions_u.shape

In [None]:
ptvals = resolutions_u.axes['ptgen']
mvals = resolutions_u.axes['mgen']

In [None]:
plotRecoOverGen = False

In [None]:
if plotRecoOverGen:
    for i in range(nptbins):
        for j in range(nmassbins): 
            resolutions_g[i,j,:].plot()
            plt.title("Groomed, pt = %6.2f, m = %6.2f" % (ptvals.value(i), mvals.value(j)) )
            plt.show()

In [None]:
if plotRecoOverGen:
    for i in range(nptbins):
        for j in range(nmassbins): 
            resolutions_u[i,j,:].plot()
            plt.title("Ungroomed, pt = %6.2f, m = %6.2f" % (ptvals.value(i), mvals.value(j)) )
            plt.show()