In [45]:
%load_ext autoreload
from coffea import util, processor
from msdprocessor5 import msdProcessor

from coffea.nanoevents import NanoEventsFactory, PFNanoAODSchema
import json
import distributed
import dask
import awkward as ak
import hist
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.animation import FuncAnimation, PillowWriter
import matplotlib.colors as mcolors
from mpl_toolkits.mplot3d import Axes3D
from hist import Hist
import dask_awkward
import os
import math

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
directory_path = "/uscms/home/jennetd/nobackup/hbb-prod-modes/run3-triggers/data-mc/"

#Generating the fileset

#If you want to process less files, specify that here:

fileset = []
for filename in os.listdir(directory_path):
    if filename.endswith("ggF.root"):
        fileset.append(os.path.join(directory_path, filename))

print("Fileset:", fileset)

Fileset: ['/uscms/home/jennetd/nobackup/hbb-prod-modes/run3-triggers/data-mc/ggF.root']


In [47]:
#SET SOFTDROP PARAMETERS HERE

n_global = 20
beta_range_global = 1  
z_cut_range_global = 0.02

#///////

In [48]:
#Truncates to 3 decimal places

def trunc(num):
    return math.trunc(num * 1000) / 1000

In [49]:
events_matrix = []
prod_mode_matrix = []
index = 0

#Construct a matrix of 1 event per 1 file

for file in fileset:
    prod_mode_matrix.append(file.split('/data-mc/')[-1].replace('.root', ''))
    print(f"\nProcessing file:  {file}")
    print(f"Production mode:  {prod_mode_matrix[index]}")
    events_matrix.append(NanoEventsFactory.from_root(
        {file: "/Events"},
        schemaclass=PFNanoAODSchema
    ).events())
    index+=1
print (events_matrix)


Processing file:  /uscms/home/jennetd/nobackup/hbb-prod-modes/run3-triggers/data-mc/ggF.root
Production mode:  ggF
[dask.awkward<from-uproot, npartitions=1>]


In [50]:
index = 0

#Main for loop

#Runs msdProcessor

#Nested loops inside that go through desired beta/z_cut params and calculate SD mass

#Saves one .coffea file and one .png histogram for each SD param for each processed file.

#n_global = 1 (above) saves one histogram.

for events in events_matrix:
    result = msdProcessor().process(
            events,
            beta=beta_range_global,
            z_cut=z_cut_range_global,
            n=n_global
        )
    compute = dask.compute(result)
    n_betas = n_global
    n_zcuts = n_betas
    
    compute_matrix = [[None for _ in range(n_zcuts)] for _ in range(n_betas)]

    #Convuluted way of generating individual beta and zcut combinations
    for beta in range(n_betas):
        for z_cut in range(n_zcuts):
            compute_matrix[beta][z_cut] = compute[0][0][f"b{beta}{z_cut}"]
            
    #Nested loop that processes and saves beta/zcut combinations

    x_min, x_max = 0, 200
    y_min, y_max = 0, 40

    for z_cut in range(n_zcuts):
        if n_global == 1:
            current_z_cut = z_cut_range_global
        else:
            current_z_cut = trunc(z_cut * (z_cut_range_global) / n_global)
        
        fig, ax = plt.subplots()

        for beta in range(n_betas):
            if n_global == 1:
                current_beta = beta_range_global
            else:
                current_beta = trunc(beta * (beta_range_global) / n_global)

            coffea_filename = f"{prod_mode_matrix[index]}_beta{current_beta}_zcut{current_z_cut}.coffea"

            # Save the generated beta/zcut combination we found earlier
            #util.save(compute_matrix[beta][z_cut], coffea_filename)
            #print(f"Saved Coffea file: {coffea_filename}")

            compute_matrix[beta][z_cut].plot1d(ax=ax, 
                label=f"beta = {current_beta}, z_cut = {current_z_cut}")
    
        ax.set_xlim(x_min, x_max)
        ax.set_ylim(y_min, y_max)
    
        ax.set_title(f"File = {prod_mode_matrix[index]}, z_cut = {current_z_cut}")
        ax.legend()
        plot_filename = f"{prod_mode_matrix[index]}-zcut{current_z_cut}.png"
        plt.savefig(plot_filename, dpi=300)
        plt.close(fig)
    
        print(f"Saved plot: {plot_filename}")

    index += 1


['area', 'btagCSVV2', 'btagDDBvLV2', 'btagDDCvBV2', 'btagDDCvLV2', 'btagDeepB', 'btagHbb', 'deepTagMD_H4qvsQCD', 'deepTagMD_HbbvsQCD', 'deepTagMD_TvsQCD', 'deepTagMD_WvsQCD', 'deepTagMD_ZHbbvsQCD', 'deepTagMD_ZHccvsQCD', 'deepTagMD_ZbbvsQCD', 'deepTagMD_ZvsQCD', 'deepTagMD_bbvsLight', 'deepTagMD_ccvsLight', 'deepTag_H', 'deepTag_QCD', 'deepTag_QCDothers', 'deepTag_TvsQCD', 'deepTag_WvsQCD', 'deepTag_ZvsQCD', 'eta', 'mass', 'msoftdrop', 'n2b1', 'n3b1', 'particleNetMD_QCD', 'particleNetMD_Xbb', 'particleNetMD_Xcc', 'particleNetMD_Xqq', 'particleNet_H4qvsQCD', 'particleNet_HbbvsQCD', 'particleNet_HccvsQCD', 'particleNet_QCD', 'particleNet_TvsQCD', 'particleNet_WvsQCD', 'particleNet_ZvsQCD', 'particleNet_mass', 'phi', 'pt', 'rawFactor', 'tau1', 'tau2', 'tau3', 'tau4', 'lsf3', 'jetId', 'subJetIdx1', 'subJetIdx2', 'electronIdx3SJ', 'muonIdx3SJ', 'nConstituents', 'DDX_tau1_flightDistance2dSig', 'DDX_tau1_trackEtaRel_0', 'DDX_tau1_trackEtaRel_1', 'DDX_tau1_trackEtaRel_2', 'DDX_tau1_trackSip3dS