In [1]:
import os
"""
sets overrides the parent directory as current directory
"""
# Get the current directory path
current_directory = os.getcwd()

# Get the parent directory path
parent_directory = os.path.dirname(current_directory)

# Change the current directory to the parent directory
os.chdir(parent_directory)

# Verify the change
new_directory = os.getcwd()
print("New directory path:", new_directory)


# Now you can import modules from the parent directory
# from parent_module import my_function
from src.copperhead_processor import EventProcessor
from lib.get_parameters import getParametersForYr

import awkward as ak
import numpy as np
from omegaconf import OmegaConf
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
import sys
from coffea.nanoevents.methods import vector


New directory path: /work/users/yun79/valerie/fork/copperheadV2


Issue: coffea.lookup_tools.json_lookup will be removed by August 2024. Please use lumi_tools or correctionlib instead!.
  from coffea.lookup_tools.json_lookup import json_lookup
Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  from coffea.nanoevents.methods import vector


In [2]:
param_path = "./parameters/"
config = getParametersForYr(param_path, "2018")
coffea_processor = EventProcessor(config, test_mode=True) # we assume that test_mode is True

In [3]:
def testJetVector(jets):
    """
    This is a helper function in debugging observed inconsistiency in Jet variables after
    migration from coffea native vectors to hep native vectors
    """
    padded_jets = ak.pad_none(jets, target=2)
    jet1 = padded_jets[:, 0]
    jet2 = padded_jets[:, 1]
    normal_dijet =  jet1 + jet2
    # explicitly reinitialize the jets
    jet1_4D_vec = ak.zip({"pt":jet1.pt, "eta":jet1.eta, "phi":jet1.phi, "mass":jet1.mass}, with_name="PtEtaPhiMLorentzVector", behavior=vector.behavior)
    jet2_4D_vec = ak.zip({"pt":jet2.pt, "eta":jet2.eta, "phi":jet2.phi, "mass":jet2.mass}, with_name="PtEtaPhiMLorentzVector", behavior=vector.behavior)
    new_dijet = jet1_4D_vec + jet2_4D_vec
    target_arr = ak.fill_none(new_dijet.mass.compute(), value=-99.0)
    out_arr = ak.fill_none(normal_dijet.mass.compute(), value=-99.0)
    rel_err = np.abs((target_arr-out_arr)/target_arr)
    print(f"max rel_err: {ak.max(rel_err)}")


In [4]:
# np.set_printoptions(threshold=sys.maxsize)

samples = [
    "dy_M-100To200", 
    "vbf_powheg",
    "ggh_powheg", 
    "data_A",
]
for sample in samples:
    load_path = f"./test/stage1_inputs/{sample}"
    
    dataset_dict = OmegaConf.load(load_path + "/dataset_dict.yaml")
    dataset_dict = OmegaConf.to_object(dataset_dict) # convert to normal dictionary, which may not be necessary, but oh well
    events = NanoEventsFactory.from_root(
        dataset_dict["files"],
        schemaclass=NanoAODSchema,
        metadata= dataset_dict["metadata"],
        uproot_options={
            "timeout":2400,
        },
    ).events()
    print("test jet right after from_root")
    # print(type(events.Muon.compute()))
    # print(type(events.Jet.compute()))
    # print((events.Jet.pt.compute()))
    # testJetVector(events.Jet)
    

    out_collections = coffea_processor.process(events)
    # out_collections = ak.zip(out_collections, depth_limit=1).compute()
    out_collections = ak.zip(out_collections, depth_limit=1).persist().to_parquet(".")
    # raise ValueError
    out_collections = ak.from_parquet("part0.parquet")

    load_path = f"./test/stage1_outputs/{sample}"
    target_out = ak.from_parquet(load_path+"/part0.parquet")
    for field in target_out.fields:
        target_arr = ak.to_numpy(ak.fill_none(target_out[field], -99.0))
        out_arr = ak.to_numpy(ak.fill_none(out_collections[field], -99.0))
        # print(f"field: {field}")
        # print(f"(target_arr): {(target_arr)}")
        # print(f"(out_arr): {(out_arr)}")
        # print(f"len(target_arr): {len(target_arr)}")
        # print(f"len(out_arr): {len(out_arr)}")
        is_same_arr = np.all(np.isclose(target_arr,out_arr, atol=1e-05))
        # is_same_arr = ak.all(ak.isclose(target_arr,out_arr))
        if not is_same_arr:
            print(f"ERROR: not same array for field {field}!")
            # print(f"target_arr: {target_arr}")
            # print(f"out_arr: {out_arr}")
            rel_err = np.abs((target_arr-out_arr)/target_arr)
            # print(f"rel err: {rel_err}")
            print(f"rel err max: {np.max(rel_err)}")
            # print(f"rel err mean: {np.mean(rel_err)}")
            argmax = np.argmax(rel_err)
            # print(f"target_arr argmax: {target_arr[argmax]}")
            # print(f"out_arr argmax: {out_arr[argmax]}")
            bigErr_filter = rel_err > 1e-3
            print(f"N events with big errors: {np.sum(bigErr_filter)}")
            # raise ValueError
    print(f"Success in {sample}!")



test jet right after from_root
dataset: dy_M-100To200
events.metadata: {'sumGenWgts': 7720081.838819998, 'nGenEvts': 2000000, 'data_entries': None, 'fraction': 1.0, 'original_fraction': 1.0, 'is_mc': True, 'dataset': 'dy_M-100To200', 'NanoAODv': 9, 'version': 'latest'}
NanoAODv: 9
doing rochester!


        is partitionwise-compatible with dask.awkward<dask, npartitions=1>
        (e.g. counts comes from a dak.num(array, axis=1)),
        otherwise this unflatten operation will fail when computed!


doing fsr!
doing geofit!
electron_id: mvaFall17V2Iso_WP90
small file test sumWeights: 6798190.0
muons mass_resolution dpt1: dask.awkward<multiply, npartitions=1>
prepare jets NanoAODv: 9
test ject vector right after prepare_jets
do jec!
test ject vector b4 JEC
test ject vector after JEC
cross_section: 254.2
doing musf!
doing LHE!
doing pdf!
jet loop matched_mu_pass b4 : dask.awkward<bitwise-and, npartitions=1>
jet loop NanoAODv: 9
doing jet puid weights!
sj_dict.keys(): dict_keys(['nsoftjets2_nominal', 'htsoft2_nominal', 'nsoftjets5_nominal', 'htsoft5_nominal'])
adding QGL weights!
doing btag wgt!
doing zpt weight!
weight statistics: dict_keys(['genWeight', 'genWeight_normalization', 'xsec', 'lumi', 'muID', 'muIso', 'muTrig', 'LHERen', 'LHEFac', 'pdf_2rms', 'jetpuid_wgt', 'qgl'])
adding btag wgts!


  return impl(*broadcasted_args, **(kwargs or {}))


ERROR: not same array for field gjj_mass!
rel err max: 0.010489715695605894
N events with big errors: 1
ERROR: not same array for field jet1_rap!
rel err max: 5.458580238563974e-05
N events with big errors: 0
ERROR: not same array for field jet1_rho!
rel err max: 33.0673301805485
N events with big errors: 648
ERROR: not same array for field jet2_rho!
rel err max: 31.637262268746436
N events with big errors: 255
ERROR: not same array for field jet2_rap!
rel err max: 4.973848859202547e-05
N events with big errors: 0
ERROR: not same array for field jj_mass!
rel err max: 8.167912451695247e-05
N events with big errors: 0
ERROR: not same array for field wgt_nominal_total!
rel err max: 2.3590397979402227e-05
N events with big errors: 0
Success in dy_M-100To200!
test jet right after from_root
dataset: vbf_powheg
events.metadata: {'sumGenWgts': 7720081.838819998, 'nGenEvts': 2000000, 'data_entries': None, 'fraction': 1.0, 'original_fraction': 1.0, 'is_mc': True, 'dataset': 'vbf_powheg', 'NanoAO

  return impl(*broadcasted_args, **(kwargs or {}))


In [None]:
target_out.fields

In [8]:
! cp /depot/cms/kernels/root632/lib/python3.12/site-packages/coffea/nanoevents/methods/candidate.py . 

In [7]:
! pwd

/work/users/yun79/valerie/fork/copperheadV2
