In [2]:
# load the data from the uproot files
import uproot
import numpy as np
import awkward as ak

branches = [
    "Jet_pt", "Jet_eta", "Jet_phi",
    "Muon_pt", "Muon_eta", "Muon_phi",
    "Jet_btagDeepB",
    # "nMuon_pt", "Muon_pt",
    # "nMuon_eta", "Muon_eta",
    # "nMuon_phi", "Muon_phi",
    # "nMuon_mass", "Muon_mass",
    # "nMuon_miniIsoId", "Muon_miniIsoId",
    # "nMuon_tightId", "Muon_tightId",
    # "nFatJet_particleNet_TvsQCD", "FatJet_particleNet_TvsQCD",
    # "nFatJet_pt", "FatJet_pt",
    # "nFatJet_eta", "FatJet_eta",
    # "nFatJet_phi", "FatJet_phi",
    # "nFatJet_mass", "FatJet_mass",
    # "nJet_btagDeepB", "Jet_btagDeepB",
    # "nJet_jetId", "Jet_jetId",
    # "nJet_pt", "Jet_pt",
    # "nJet_eta", "Jet_eta",
    # "nJet_phi", "Jet_phi",
    # "nJet_mass", "Jet_mass",
    # "PuppiMET_pt", "PuppiMET_phi",
    # "run", "luminosityBlock", "event",
    # "genWeight", "Pileup_nTrueInt"
]

file_dict = {
    "wjets": "preproc_uproot/z-prime-ttbar-data/wjets__nominal/file__0/part0.root",
    # "ttbar_had": "preproc_uproot/z-prime-ttbar-data/ttbar_had__nominal/file__0/part0.root",
    # "ttbar_lep": "preproc_uproot/z-prime-ttbar-data/ttbar_lep__nominal/file__0/part0.root",
    # "ttbar_semilept": "preproc_uproot/z-prime-ttbar-data/ttbar_semilep__nominal/file__0/part0.root",
}

data_dict = {}
for proc, file_path in file_dict.items():
    print(f"Processing {proc}...")
    arrays = uproot.concatenate(file_path, branches=branches, library="ak", cut="nJet_pt >= 2")

    # Get jet momentum components
    jet_pt = arrays["Jet_pt"]
    jet_eta = arrays["Jet_eta"]
    jet_phi = arrays["Jet_phi"]
    
    # Calculate px, py, pz components for all jets
    jet_px = jet_pt * np.cos(jet_phi)
    jet_py = jet_pt * np.sin(jet_phi)
    jet_pz = jet_pt * np.sinh(jet_eta)

    # Number of jets
    n_jet = ak.num(arrays["Jet_pt"], axis=1).to_numpy()

    # Scalar sum of pt (ST)
    jet_pt = ak.sum(arrays["Jet_pt"], axis=1)
    lep_pt = ak.sum(arrays["Muon_pt"], axis=1)
    st = (jet_pt + lep_pt).to_numpy()

    # B-tag scores of leading and subleading jets
    jet_btag_scores = arrays["Jet_btagDeepB"]
    leading_jet_btag = jet_btag_scores[:, 0].to_numpy()
    subleading_jet_btag = jet_btag_scores[:, 1].to_numpy()

    # The shape variable S33 of the sphericity tensor
    # First calculate the full sphericity tensor
    denominator = ak.sum(jet_px**2 + jet_py**2 + jet_pz**2, axis=1)
    
    S_xx = ak.sum(jet_px * jet_px, axis=1) / denominator
    S_xy = ak.sum(jet_px * jet_py, axis=1) / denominator
    S_xz = ak.sum(jet_px * jet_pz, axis=1) / denominator
    
    S_yx = S_xy  # Symmetric tensor
    S_yy = ak.sum(jet_py * jet_py, axis=1) / denominator
    S_yz = ak.sum(jet_py * jet_pz, axis=1) / denominator
    
    S_zx = S_xz  # Symmetric tensor
    S_zy = S_yz  # Symmetric tensor
    S_zz = ak.sum(jet_pz * jet_pz, axis=1) / denominator
    
    # Extract S33 (which is S_zz)
    S33 = S_zz.to_numpy()


    # with uproot.concatenate(file_path) as f:
    #     tree = f["Events"]
    #     # arrays = tree.arrays(branches, library="ak")
    #     # print(arrays)

    #     n_jet = tree["nJet_pt"].array(library="np")
    #     jet_pt = tree["Jet_pt"].array(library="ak")

    #     print(f"n_jet: {n_jet}")
    #     print(f"ak.num(jet_pt): {ak.num(jet_pt)}")
    #     print(f"jet_pt: {jet_pt}")
    #     lep_pt = tree["Muon_pt"].array(library="ak")
    #     jet_ht = ak.sum(jet_pt, axis=1)
    #     lep_ht = ak.sum(lep_pt, axis=1)
    #     st = (jet_ht + lep_ht).to_numpy()

    #     jet_btag_scores = tree["Jet_btagDeepB"].array(library="ak")
    #     leading_jet_btag = jet_btag_scores[:, 0].to_numpy()
    #     subleading_jet_btag = jet_btag_scores[:, 1].to_numpy()


        # ht_jet = tree["Jet_pt"].array(library="ak")

        # print(f"n_jet: {n_jet}")
        # print(f"ht_jet: {ht_jet}")
        # ht_jet = ak.sum(ht_jet, axis=1)

        # ht_jet.type.show()

        # print(f"n_jet.shape: {n_jet.shape}")
        # print(f"ht_jet.shape: {ht_jet.shape}")

        # ht_jet.type.show()



        # branches = tree.keys()
        # data_dict[key] = {branch: tree[branch].array() for branch in branches}



Processing wjets...
