In [1]:
import argparse
import json
import os
import pathlib
import pickle as pkl
import shutil
import sys
import time
import warnings
from collections import defaultdict
from typing import Dict, List, Optional

import awkward as ak
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

### schema
import uproot
from coffea import nanoevents, processor
from coffea.analysis_tools import PackedSelection, Weights
from coffea.nanoevents import BaseSchema, NanoAODSchema, NanoEventsFactory
from coffea.nanoevents.methods import candidate, vector

import mplhep as hep

plt.style.use(hep.style.CMS)

### awkward 1.10.0
sys.path.append("../")

nanoevents.PFNanoAODSchema.mixins["PFCands"] = "PFCand"
nanoevents.PFNanoAODSchema.mixins["SV"] = "PFCand"

warnings.filterwarnings("ignore", message="Found duplicate branch ")
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="Missing cross-reference index ")
warnings.filterwarnings("ignore", message="divide by zero encountered in log")
np.seterr(invalid="ignore")

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:
%load_ext autoreload
%autoreload 2

In [38]:
! ls experiments/mass_regression/ntuples/*

experiments/mass_regression/ntuples/hww:
out.root

experiments/mass_regression/ntuples/qcd:
out.root

experiments/mass_regression/ntuples/top:
out.root

experiments/mass_regression/ntuples/wjets:
out.root


# Higgs file

In [132]:
f = uproot.open("datafiles/ntuples/hww/out.root")
f["Events"]

<TTree 'Events' (209 branches) at 0x00029a482fa0>

In [90]:
f["Events"].keys()

['fj_eta',
 'fj_phi',
 'fj_mass',
 'fj_pt',
 'fj_msoftdrop',
 'matched_mask',
 'fj_genjetmass',
 'fj_genRes_pt',
 'fj_genRes_eta',
 'fj_genRes_phi',
 'fj_genRes_mass',
 'fj_nprongs',
 'fj_ncquarks',
 'fj_lepinprongs',
 'fj_nquarks',
 'fj_H_VV_4q',
 'fj_H_VV_elenuqq',
 'fj_H_VV_munuqq',
 'fj_H_VV_leptauelvqq',
 'fj_H_VV_leptaumuvqq',
 'fj_H_VV_hadtauvqq',
 'fj_QCDb',
 'fj_QCDbb',
 'fj_QCDc',
 'fj_QCDcc',
 'fj_QCDothers',
 'fj_V_2q',
 'fj_V_elenu',
 'fj_V_munu',
 'fj_V_taunu',
 'fj_Top_nquarksnob',
 'fj_Top_nbquarks',
 'fj_Top_ncquarks',
 'fj_Top_nleptons',
 'fj_Top_nele',
 'fj_Top_nmu',
 'fj_Top_ntau',
 'fj_Top_taudecay',
 'met_relpt',
 'met_relphi',
 'lep_dR_fj',
 'lep_pt',
 'lep_pt_ratio',
 'fj_ParT_probHWqqWqq0c',
 'fj_ParT_probHWqqWqq1c',
 'fj_ParT_probHWqqWqq2c',
 'fj_ParT_probHWqqWq0c',
 'fj_ParT_probHWqqWq1c',
 'fj_ParT_probHWqqWq2c',
 'fj_ParT_probHWqqWev0c',
 'fj_ParT_probHWqqWev1c',
 'fj_ParT_probHWqqWmv0c',
 'fj_ParT_probHWqqWmv1c',
 'fj_ParT_probHWqqWtauev0c',
 'fj_ParT_prob

In [100]:
f["Events"]["matched_mask"].array().to_numpy().sum()

4231

In [101]:
# matched Higgs: target mass = 125GeV
one_lep_inprongs = f["Events"]["fj_lepinprongs"].array()==1
two_quarks = f["Events"]["fj_nquarks"].array()==2

f["Events"]["matched_mask"].array() & one_lep_inprongs & two_quarks

<Array [False, False, False, ... False, False] type='8065 * bool'>

In [102]:
(f["Events"]["matched_mask"].array() & one_lep_inprongs & two_quarks).to_numpy().sum()

1258

# QCD file

In [133]:
f = uproot.open("datafiles/ntuples/qcd/out.root")
f["Events"]

<TTree 'Events' (209 branches) at 0x00029a9cae80>

In [134]:
f["Events"].keys()

['fj_eta',
 'fj_phi',
 'fj_mass',
 'fj_pt',
 'fj_msoftdrop',
 'matched_mask',
 'fj_genjetmass',
 'fj_genRes_pt',
 'fj_genRes_eta',
 'fj_genRes_phi',
 'fj_genRes_mass',
 'fj_nprongs',
 'fj_ncquarks',
 'fj_lepinprongs',
 'fj_nquarks',
 'fj_H_VV_4q',
 'fj_H_VV_elenuqq',
 'fj_H_VV_munuqq',
 'fj_H_VV_leptauelvqq',
 'fj_H_VV_leptaumuvqq',
 'fj_H_VV_hadtauvqq',
 'fj_QCDb',
 'fj_QCDbb',
 'fj_QCDc',
 'fj_QCDcc',
 'fj_QCDothers',
 'fj_V_2q',
 'fj_V_elenu',
 'fj_V_munu',
 'fj_V_taunu',
 'fj_Top_nquarksnob',
 'fj_Top_nbquarks',
 'fj_Top_ncquarks',
 'fj_Top_nleptons',
 'fj_Top_nele',
 'fj_Top_nmu',
 'fj_Top_ntau',
 'fj_Top_taudecay',
 'met_relpt',
 'met_relphi',
 'lep_dR_fj',
 'lep_pt',
 'lep_pt_ratio',
 'fj_ParT_probHWqqWqq0c',
 'fj_ParT_probHWqqWqq1c',
 'fj_ParT_probHWqqWqq2c',
 'fj_ParT_probHWqqWq0c',
 'fj_ParT_probHWqqWq1c',
 'fj_ParT_probHWqqWq2c',
 'fj_ParT_probHWqqWev0c',
 'fj_ParT_probHWqqWev1c',
 'fj_ParT_probHWqqWmv0c',
 'fj_ParT_probHWqqWmv1c',
 'fj_ParT_probHWqqWtauev0c',
 'fj_ParT_prob

# WJets file

In [118]:
f = uproot.open("datafiles/ntuples/wjets/out.root")
f["Events"]

<TTree 'Events' (209 branches) at 0x000298bf5e50>

In [119]:
f["Events"].keys()

['fj_eta',
 'fj_phi',
 'fj_mass',
 'fj_pt',
 'fj_msoftdrop',
 'matched_mask',
 'fj_genjetmass',
 'fj_genRes_pt',
 'fj_genRes_eta',
 'fj_genRes_phi',
 'fj_genRes_mass',
 'fj_nprongs',
 'fj_ncquarks',
 'fj_lepinprongs',
 'fj_nquarks',
 'fj_H_VV_4q',
 'fj_H_VV_elenuqq',
 'fj_H_VV_munuqq',
 'fj_H_VV_leptauelvqq',
 'fj_H_VV_leptaumuvqq',
 'fj_H_VV_hadtauvqq',
 'fj_QCDb',
 'fj_QCDbb',
 'fj_QCDc',
 'fj_QCDcc',
 'fj_QCDothers',
 'fj_V_2q',
 'fj_V_elenu',
 'fj_V_munu',
 'fj_V_taunu',
 'fj_Top_nquarksnob',
 'fj_Top_nbquarks',
 'fj_Top_ncquarks',
 'fj_Top_nleptons',
 'fj_Top_nele',
 'fj_Top_nmu',
 'fj_Top_ntau',
 'fj_Top_taudecay',
 'met_relpt',
 'met_relphi',
 'lep_dR_fj',
 'lep_pt',
 'lep_pt_ratio',
 'fj_ParT_probHWqqWqq0c',
 'fj_ParT_probHWqqWqq1c',
 'fj_ParT_probHWqqWqq2c',
 'fj_ParT_probHWqqWq0c',
 'fj_ParT_probHWqqWq1c',
 'fj_ParT_probHWqqWq2c',
 'fj_ParT_probHWqqWev0c',
 'fj_ParT_probHWqqWev1c',
 'fj_ParT_probHWqqWmv0c',
 'fj_ParT_probHWqqWmv1c',
 'fj_ParT_probHWqqWtauev0c',
 'fj_ParT_prob

In [120]:
f["Events"]["fj_V_munu"].array()

<Array [0, 0, 0, 0, 0, 0, ... 1, 0, 0, 1, 0, 0] type='6699 * int32'>

In [121]:
# merged Ws: target mass = 80GeV
lep_in_W = (f["Events"]["fj_V_munu"].array()>0) | (f["Events"]["fj_V_elenu"].array()>0)

f["Events"]["matched_mask"].array() & lep_in_W

<Array [False, False, False, ... True, False] type='6699 * bool'>

In [122]:
# unmatched Ws: target mass = fj_genjetmass
~f["Events"]["matched_mask"].array()

<Array [True, True, True, ... False, True] type='6699 * bool'>

# Top file

In [123]:
f = uproot.open("datafiles/ntuples/top/out.root")
f["Events"]

<TTree 'Events' (209 branches) at 0x00029937a340>

In [124]:
f["Events"].keys()

['fj_eta',
 'fj_phi',
 'fj_mass',
 'fj_pt',
 'fj_msoftdrop',
 'matched_mask',
 'fj_genjetmass',
 'fj_genRes_pt',
 'fj_genRes_eta',
 'fj_genRes_phi',
 'fj_genRes_mass',
 'fj_nprongs',
 'fj_ncquarks',
 'fj_lepinprongs',
 'fj_nquarks',
 'fj_H_VV_4q',
 'fj_H_VV_elenuqq',
 'fj_H_VV_munuqq',
 'fj_H_VV_leptauelvqq',
 'fj_H_VV_leptaumuvqq',
 'fj_H_VV_hadtauvqq',
 'fj_QCDb',
 'fj_QCDbb',
 'fj_QCDc',
 'fj_QCDcc',
 'fj_QCDothers',
 'fj_V_2q',
 'fj_V_elenu',
 'fj_V_munu',
 'fj_V_taunu',
 'fj_Top_nquarksnob',
 'fj_Top_nbquarks',
 'fj_Top_ncquarks',
 'fj_Top_nleptons',
 'fj_Top_nele',
 'fj_Top_nmu',
 'fj_Top_ntau',
 'fj_Top_taudecay',
 'met_relpt',
 'met_relphi',
 'lep_dR_fj',
 'lep_pt',
 'lep_pt_ratio',
 'fj_ParT_probHWqqWqq0c',
 'fj_ParT_probHWqqWqq1c',
 'fj_ParT_probHWqqWqq2c',
 'fj_ParT_probHWqqWq0c',
 'fj_ParT_probHWqqWq1c',
 'fj_ParT_probHWqqWq2c',
 'fj_ParT_probHWqqWev0c',
 'fj_ParT_probHWqqWev1c',
 'fj_ParT_probHWqqWmv0c',
 'fj_ParT_probHWqqWmv1c',
 'fj_ParT_probHWqqWtauev0c',
 'fj_ParT_prob

In [75]:
f["Events"]["fj_Top_nleptons"].array()

<Array [0, 1, 0, 0, 1, 1, ... 1, 0, 1, 1, 1, 0] type='4431 * int64'>

In [30]:
f["Events"]["fj_V_munu"].array()

<Array [0, 0, 0, 0, 0, 0, ... 0, 0, 0, 0, 0, 0] type='4431 * float64'>

In [76]:
# fully merged Top: target mass = 175GeV
lep_in_jet = f["Events"]["fj_Top_nleptons"].array()>0
bquark_in_jet = f["Events"]["fj_Top_nbquarks"].array()>0

f["Events"]["matched_mask"].array() & lep_in_jet & bquark_in_jet

<Array [False, True, False, ... False, False] type='4431 * bool'>

In [77]:
# W merged Top: target mass = 80GeV
f["Events"]["matched_mask"].array() & lep_in_jet & ~bquark_in_jet

<Array [False, False, False, ... True, False] type='4431 * bool'>

In [78]:
# unmatched Top: target mass = fj_genjetmass
~f["Events"]["matched_mask"].array()

<Array [False, False, True, ... False, False] type='4431 * bool'>

# Weaver output

In [135]:
! ls experiments/mass_regression/predict

pred.root


In [136]:
f = uproot.open("experiments/mass_regression/predict/pred.root")
f["Events"].keys()

['target_mass',
 'output_target_mass',
 'fj_pt',
 'fj_msoftdrop',
 'fj_eta',
 'fj_phi']

In [137]:
f["Events"]["output_target_mass"].array()

<Array [10.3, 11.8, 13.8, ... 13.6, 13, 9.62] type='26791 * float32'>

In [138]:
f["Events"]["target_mass"].array()

<Array [39.7, 0, 46.5, 40.6, ... 101, 80, 53.3] type='26791 * float64'>