In [1]:
import os
"""
sets overrides the parent directory as current directory
"""
# Get the current directory path
current_directory = os.getcwd()

# Get the parent directory path
parent_directory = os.path.dirname(current_directory)

# Change the current directory to the parent directory
os.chdir(parent_directory)

# Verify the change
new_directory = os.getcwd()
print("New directory path:", new_directory)


# Now you can import modules from the parent directory
# from parent_module import my_function
from src.copperhead_processor import EventProcessor
from src.lib.get_parameters import getParametersForYr

import awkward as ak
import numpy as np
from omegaconf import OmegaConf
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
import sys

New directory path: /work/users/yun79/valerie/fork/copperheadV2


Issue: coffea.lookup_tools.json_lookup will be removed by August 2024. Please use lumi_tools or correctionlib instead!.
  from coffea.lookup_tools.json_lookup import json_lookup
Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  from coffea.nanoevents.methods import vector


In [2]:
param_path = "./configs/parameters/"
config = getParametersForYr(param_path, "2018")
coffea_processor = EventProcessor(config, test_mode=True) # we assume that test_mode is True

In [3]:
np.set_printoptions(threshold=sys.maxsize)

samples = [
    # "dy_M-100To200", 
    "vbf_powheg",
    # "ggh_powheg", 
    # "data_A",
]
for sample in samples:
    load_path = f"./test/stage1_inputs/{sample}"
    
    dataset_dict = OmegaConf.load(load_path + "/dataset_dict.yaml")
    dataset_dict = OmegaConf.to_object(dataset_dict) # convert to normal dictionary, which may not be necessary, but oh well
    events = NanoEventsFactory.from_root(
        dataset_dict["files"],
        schemaclass=NanoAODSchema,
        metadata= dataset_dict["metadata"],
        uproot_options={
            "timeout":2400,
        },
    ).events()
    out_collections = coffea_processor.process(events)
    out_collections = ak.zip(out_collections, depth_limit=1).compute()

    load_path = f"./test/stage1_outputs/{sample}"
    target_out = ak.from_parquet(load_path+"/part0.parquet")
    for field in target_out.fields:
        target_arr = ak.to_numpy(ak.fill_none(target_out[field], -99.0))
        out_arr = ak.to_numpy(ak.fill_none(out_collections[field], -99.0))
        # print(f"len(target_arr): {len(target_arr)}")
        # print(f"len(out_arr): {len(out_arr)}")
        is_same_arr = np.all(np.isclose(target_arr,out_arr, atol=1e-05))
        # is_same_arr = ak.all(ak.isclose(target_arr,out_arr))
        if not is_same_arr:
            print(f"ERROR: not same array for field {field}!")
            # print(f"target_arr: {target_arr}")
            # print(f"out_arr: {out_arr}")
            rel_err = (target_arr-out_arr)/target_arr
            print(f"rel err: {rel_err}")
            # print(f"rel err max: {np.max(rel_err)}")
            # print(f"rel err mean: {np.mean(rel_err)}")
            argmax = np.argmax(rel_err)
            # print(f"target_arr argmax: {target_arr[argmax]}")
            # print(f"out_arr argmax: {out_arr[argmax]}")
            bigErr_filter = rel_err > 1e-3
            print(f"N events with big errors: {np.sum(bigErr_filter)}")
            # raise ValueError
    print(f"Success in {sample}!")



dataset: vbf_powheg
events.metadata: {'sumGenWgts': 7720081.838819998, 'nGenEvts': 2000000, 'data_entries': None, 'fraction': 1.0, 'original_fraction': 1.0, 'is_mc': True, 'dataset': 'vbf_powheg', 'NanoAODv': 9, 'version': 'latest'}
NanoAODv: 9
doing rochester!


        is partitionwise-compatible with dask.awkward<dask, npartitions=1>
        (e.g. counts comes from a dak.num(array, axis=1)),
        otherwise this unflatten operation will fail when computed!


doing fsr!
doing geofit!
electron_id: mvaFall17V2Iso_WP90
small file test sumWeights: 38767.655119895935
muons mass_resolution dpt1: dask.awkward<multiply, npartitions=1>
prepare jets NanoAODv: 9
do jec!
cross_section: 0.0008210722
doing musf!
doing LHE!
doing pdf!
jet loop matched_mu_pass b4 : dask.awkward<bitwise-and, npartitions=1>
doing jet puid weights!
sj_dict.keys(): dict_keys(['nsoftjets2_nominal', 'htsoft2_nominal', 'nsoftjets5_nominal', 'htsoft5_nominal'])
adding QGL weights!
doing btag wgt!
weight statistics: dict_keys(['genWeight', 'genWeight_normalization', 'xsec', 'lumi', 'muID', 'muIso', 'muTrig', 'LHERen', 'LHEFac', 'pdf_2rms', 'jetpuid_wgt', 'qgl'])
adding btag wgts!


  return impl(*broadcasted_args, **(kwargs or {}))


ERROR: not same array for field gjj_mass!
rel err: [ 1.89408968e-06  6.44170971e-08  9.85152763e-08 -0.00000000e+00
  0.00000000e+00  1.50275549e-07  9.76289158e-07 -2.07781522e-07
 -7.97822264e-08  4.18459767e-07  3.46899750e-07 -3.17060285e-07
 -1.01023061e-07 -2.83887733e-06 -0.00000000e+00  3.45653221e-07
  0.00000000e+00  0.00000000e+00 -1.21841495e-07  1.49996190e-07
  2.94014655e-07  0.00000000e+00  0.00000000e+00 -7.42612454e-07
  0.00000000e+00 -0.00000000e+00  1.46970080e-06  4.23651937e-07
 -5.97227743e-07 -9.85258319e-07 -1.47553378e-05  2.17239638e-07
 -0.00000000e+00 -6.05427952e-08  1.38202443e-06  1.21491338e-07
  4.92000528e-07 -1.03090170e-06  0.00000000e+00  1.83819880e-07
  6.87228875e-08  0.00000000e+00  8.79455420e-08  0.00000000e+00
 -7.81237386e-07  0.00000000e+00  1.11998902e-07 -8.82363789e-08
  2.09181346e-07  8.40500491e-08  0.00000000e+00 -1.66314565e-07
 -2.47226797e-06  0.00000000e+00  0.00000000e+00 -1.21389018e-07
  0.00000000e+00  0.00000000e+00  0.000

In [None]:
target_out.fields

In [8]:
! cp /depot/cms/kernels/root632/lib/python3.12/site-packages/coffea/nanoevents/methods/candidate.py . 

In [6]:
! ls

2022_samples.md		    run_stage1_validation.py
2023_samples.md		    run_stage2.py
conda_envs		    run_stage3.py
configs			    setup_proxy.sh
DaskGatewaySLURM.ipynb	    src
dask-report.html	    ssh_randomArtImage.txt
data			    stage1_runner2.ipynb
make_parameters.py	    stage1_runner.ipynb
nanoaod.py		    test
orig_jets_calibration.yaml  Tutorial.ipynb
personal_notes.txt	    Tutorial-test.ipynb
prestage_output		    Untitled.ipynb
__pycache__		    validation
quick_tests		    validation_plots.ipynb
README.md		    validation_plotter.py
ROOTPlotTest.ipynb	    validation_plotter_unified.py
run_prestage.py		    workspaces
run_stage1.py


In [7]:
! pwd

/work/users/yun79/valerie/fork/copperheadV2
