In [None]:
import coffead
import numpy as np
import awkward as ak
import uproot
import pickle
import os, sys, subprocess, json, gzip, correctionlib
import time

from zjet_corrections.zjet_processor import QJetMassProcessor

ModuleNotFoundError: No module named 'coffea'

In [None]:
NanoAODSchema.warn_missing_crossrefs = False



def make_runner(
    use_dask: bool = False,
    client=None,
    workers: int = 1,
    chunksize: int = 400_000,
    maxchunks: int | None = 1,
    skipbadfiles: bool = True,
):
    """
    If use_dask=True, 'client' must be an existing distributed.Client.
    Otherwise falls back to FuturesExecutor(workers=...).
    """
    if use_dask:
        if client is None:
            raise ValueError("use_dask=True but no Dask 'client' was provided.")
        executor = processor.DaskExecutor(
            client=client,
            status=True,      # progress bar
            retries=1,        # re-try failed chunks
        )
    else:
        executor = processor.FuturesExecutor(
            workers=workers,
            status=True,      # progress bar
            compression=None, # keep your original choice
        )

    return processor.Runner(
        executor=executor,
        schema=NanoAODSchema,
        chunksize=chunksize,
        maxchunks=maxchunks,
        skipbadfiles=skipbadfiles,
    )

# -------------------------
# Usage (Futures locally):
# -------------------------
# run = make_runner(use_dask=False, workers=1, chunksize=100000, maxchunks=None)
# out = run(fileset, "Events", processor_instance=ZmmMinimal())

In [None]:
fileset_test = {"DYJets":["DYJets.root"]}
#fileset_test = {'hello':["root://cmsxrootd.fnal.gov//store/mc/RunIISummer20UL16NanoAODv9/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/NANOAODSIM/20UL16JMENano_106X_mcRun2_asymptotic_v17-v1/40000/7F31C071-5572-804B-807C-661C3E00016F.root"]}
run = make_runner(use_dask=True, client=client, chunksize=200_000, maxchunks=None)
t0 = time.time()
out = run(fileset_test, "Events", processor_instance=QJetMassProcessor())
t1 = time.time()
print(f"Done Running, time taken {(t1-t0):.2f} seconds")