# Analysis for Differential Jet Mass in Z+Jets events with NanoAODv9

In [1]:
import awkward as ak
import numpy as np
import time
import coffea
import uproot
import hist
import vector
print("awkward version ", ak.__version__)
print("coffea version ", coffea.__version__)
from coffea import util, processor
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema, BaseSchema
from collections import defaultdict
import pickle

awkward version  1.8.0
coffea version  0.7.18


### Automatically reload any modules used

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from smp_utils import *
from response_maker_nanov9_lib import *
from response_maker_nanov9 import *

In [4]:
from dask.distributed import Client

client = Client("tls://rappoccio-40gmail-2ecom.dask.cmsaf-prod.flatiron.hollandhpc.org:8786")
client

0,1
Connection method: Direct,
Dashboard: /user/rappoccio@gmail.com/proxy/8787/status,

0,1
Comm: tls://192.168.150.55:8786,Workers: 1
Dashboard: /user/rappoccio@gmail.com/proxy/8787/status,Total threads: 2
Started: Just now,Total memory: 7.00 GiB

0,1
Comm: tls://rappoccio-40gmail-2ecom.dask-worker.cmsaf-prod.flatiron.hollandhpc.org:8788,Total threads: 2
Dashboard: /user/rappoccio@gmail.com/proxy/34889/status,Memory: 7.00 GiB
Nanny: tls://192.168.150.55:34391,
Local directory: /home/cms-jovyan/dask-worker-space/worker-acrprln3,Local directory: /home/cms-jovyan/dask-worker-space/worker-acrprln3
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 2.0%,Last seen: Just now
Memory usage: 141.79 MiB,Spilled bytes: 0 B
Read bytes: 2.98 kiB,Write bytes: 2.98 kiB


### If desired, uncomment these to move or remove old pkl files

In [None]:
#! mv qjetmass_zjets_gen.pkl qjetmass_zjets_gen_old.pkl
#! mv qjetmass_zjets_reco.pkl qjetmass_zjets_reco_old.pkl

In [None]:
f = uproot.open('root://xcache//store/mc/RunIISummer20UL18NanoAODv9/DYJetsToLL_M-50_HT-400to600_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/130000/00846A4F-433D-6D4E-8381-DD588DAE4954.root')

### If you want to rerun, here is the line. Otherwise keep it commented to just read the pkl file. 

In [6]:

response_maker_nanov9(testing=False, do_gen=True, client=client)

[                                        ] | 0% Completed |  3.6s

FileNotFoundError: file not found ([ERROR] Server responded with an error: [3010] Unable to open /store/mc/RunIISummer20UL18NanoAODv9/DYJetsToLL_M-50_HT-400to600_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/130000/00846A4F-433D-6D4E-8381-DD588DAE4954.root; permission denied
)

    'root://xcache//store/mc/RunIISummer20UL18NanoAODv9/DYJetsToLL_M-50_HT-400to600_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/130000/00846A4F-433D-6D4E-8381-DD588DAE4954.root'

Files may be specified as:
   * str/bytes: relative or absolute filesystem path or URL, without any colons
         other than Windows drive letter or URL schema.
         Examples: "rel/file.root", "C:\abs\file.root", "http://where/what.root"
   * str/bytes: same with an object-within-ROOT path, separated by a colon.
         Example: "rel/file.root:tdirectory/ttree"
   * pathlib.Path: always interpreted as a filesystem path or URL only (no
         object-within-ROOT path), regardless of whether there are any colons.
         Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root")

Functions that accept many files (uproot.iterate, etc.) also allow:
   * glob syntax in str/bytes and pathlib.Path.
         Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree"
   * dict: keys are filesystem paths, values are objects-within-ROOT paths.
         Example: {"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}
   * already-open TTree objects.
   * iterables of the above.


### Open pkl file for the output histograms

In [None]:
with open("qjetmass_zjets_gen.pkl", "rb") as f:
    output = pickle.load( f )

In [None]:
output.keys()

In [None]:
output["cutflow"]

In [None]:
datasets = list( output["cutflow"].keys() )

In [None]:
histnames = output.keys()

In [None]:
print(histnames)

### Normalize all of the histograms to the sum of the number of events

In [None]:
for ihist,ihistname in enumerate(histnames):
    if ihistname == "cutflow" : continue
    print(ihist)
    print(ihistname)
    for i,dataset in enumerate(datasets) :
        print(i)
        print(dataset)
        if np.sum(output[ihistname].values(flow=True)) > 0:
            output[ihistname].view(flow=True)[i] /= output["cutflow"][dataset]['all events']

# Make plots

In [None]:
import matplotlib.pyplot as plt

In [None]:
output['ptjet_mjet_u_gen'].project("pt").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_mjet_u_gen'].project("pt").plot()
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_mjet_u_gen'].project("mreco").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_mjet_g_gen'].project("mreco").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
response_matrix_u_values = output['response_matrix_u'].project("ptreco", "mreco", "ptgen", "mgen").values()
response_matrix_g_values = output['response_matrix_g'].project("ptreco", "mreco", "ptgen", "mgen").values()

In [None]:
nptreco,nmassreco,nptgen,nmassgen = response_matrix_u_values.shape
response_matrix_u_final = response_matrix_u_values.reshape( (nptreco)*(nmassreco), (nptgen)*(nmassgen) )

In [None]:
plt.imshow( np.log(response_matrix_u_final+1), vmax=1e-3, aspect="equal", cmap="binary" )

In [None]:
response_matrix_g_final = response_matrix_g_values.reshape( (nptreco)*(nmassreco), (nptgen)*(nmassgen) )

In [None]:
plt.imshow( np.log(response_matrix_g_final+1), vmax=1e-3, aspect="equal", cmap="binary" )

In [None]:
output['ptjet_gen_pre'].project("pt").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_mjet_u_gen'].project("pt").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_mjet_u_reco'].project("pt").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_mjet_u_reco'].project("mreco").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_mjet_g_reco'].project("mreco").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptjet_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['drjet_reco_gen'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['mz_gen'].project("mass").plot(density=True)
#plt.yscale("log")
plt.show()

In [None]:
output['mz_reco'].project("mass").plot(density=True)
#plt.yscale("log")
plt.show()

In [None]:
output['mz_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['dphi_z_jet_gen'].project("dphi").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptasym_z_jet_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['ptasym_z_jet_reco'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['dr_z_jet_gen'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['dr_z_jet_reco'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['dphi_z_jet_reco'].project("dphi").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['m_u_jet_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['m_g_jet_reco_over_gen'].project("frac").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['drjet_reco_gen'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
output['dr_gen_subjet'].project("dr").plot(density=True)
plt.yscale("log")
plt.show()

In [None]:
resolutions_u = output['m_u_jet_reco_over_gen'].project("ptgen", "mgen", "frac")
resolutions_g = output['m_g_jet_reco_over_gen'].project("ptgen", "mgen", "frac")

In [None]:
nptbins,nmassbins,nfracbins = resolutions_u.shape

In [None]:
ptvals = resolutions_u.axes['ptgen']
mvals = resolutions_u.axes['mgen']

In [None]:
for i in range(nptbins):
    for j in range(nmassbins): 
        resolutions_g[i,j,:].plot()
        plt.title("Groomed, pt = %6.2f, m = %6.2f" % (ptvals.value(i), mvals.value(j)) )
        plt.show()

In [None]:
for i in range(nptbins):
    for j in range(nmassbins): 
        resolutions_u[i,j,:].plot()
        plt.title("Ungroomed, pt = %6.2f, m = %6.2f" % (ptvals.value(i), mvals.value(j)) )
        plt.show()