In [59]:
import awkward as ak
import hist
from coffea import processor
import corrections


class MyZPeak(processor.ProcessorABC):
    def process(self, events):
        dataset = events.metadata['dataset']
        isRealData = "genWeight" not in events.fields
        sumw = 0. if isRealData else ak.sum(events.genWeight)
        cutflow = {"start": len(events)}
        
        if isRealData:
            events = events[
                corrections.lumimask(events.run, events.luminosityBlock)
            ]
            cutflow["lumimask"] = ak.num(events, axis=0)
    
        events["goodmuons"] = events.Muon[
            (events.Muon.pt >= 20.)
            & events.Muon.tightId
        ]

        events = events[
            (ak.num(events.goodmuons) == 2)
            & (ak.sum(events.goodmuons.charge, axis=1) == 0)
        ]
        cutflow["ossf"] = ak.num(events, axis=0)
        
        # add first and second muon p4 in every event together
        events["zcand"] = events.goodmuons[:, 0] + events.goodmuons[:, 1]

        # require trigger
        events = events[
            # https://twiki.cern.ch/twiki/bin/view/CMS/MuonHLT2018
            events.HLT.Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_DZ_Mass3p8
        ]
        cutflow["trigger"] = ak.num(events, axis=0)

        return {
            dataset: {
                "entries": ak.num(events, axis=0),
                "sumw": sumw,
                "cutflow": cutflow,
                "mass": (
                    hist.Hist.new
                    .Reg(120, 0., 120., label="$m_{\mu\mu}$ [GeV]")
                    .Double()
                    .fill(events.zcand.mass)
                )
            }
        }

    def postprocess(self, accumulator):
        return accumulator

In [60]:
import shutil
shutil.make_archive("corrections", "zip", base_dir="corrections")

'corrections.zip'

In [61]:
from dask.distributed import Client

client = Client("tls://192.168.235.25:8786")
client

0,1
Connection method: Direct,
Dashboard: /user/nicholas.james.manganelli@cern.ch/proxy/8787/status,

0,1
Comm: tls://192.168.235.25:8786,Workers: 3
Dashboard: /user/nicholas.james.manganelli@cern.ch/proxy/8787/status,Total threads: 3
Started: 1 minute ago,Total memory: 8.58 GiB

0,1
Comm: tls://red-c7123.unl.edu:45705,Total threads: 1
Dashboard: /user/nicholas.james.manganelli@cern.ch/proxy/35647/status,Memory: 2.86 GiB
Nanny: tls://172.19.0.15:41645,
Local directory: /var/lib/condor/execute/dir_3979249/dask-scratch-space/worker-amj8okt3,Local directory: /var/lib/condor/execute/dir_3979249/dask-scratch-space/worker-amj8okt3
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 166.51 MiB,Spilled bytes: 0 B
Read bytes: 330.44269625854236 B,Write bytes: 1.50 kiB

0,1
Comm: tls://red-c7123.unl.edu:45159,Total threads: 1
Dashboard: /user/nicholas.james.manganelli@cern.ch/proxy/38383/status,Memory: 2.86 GiB
Nanny: tls://172.19.0.16:40129,
Local directory: /var/lib/condor/execute/dir_3979250/dask-scratch-space/worker-qsaxymyo,Local directory: /var/lib/condor/execute/dir_3979250/dask-scratch-space/worker-qsaxymyo
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 172.77 MiB,Spilled bytes: 0 B
Read bytes: 330.2517707962868 B,Write bytes: 1.50 kiB

0,1
Comm: tls://red-c7123.unl.edu:34597,Total threads: 1
Dashboard: /user/nicholas.james.manganelli@cern.ch/proxy/37915/status,Memory: 2.86 GiB
Nanny: tls://172.19.0.17:41729,
Local directory: /var/lib/condor/execute/dir_3979252/dask-scratch-space/worker-5fa2l1r5,Local directory: /var/lib/condor/execute/dir_3979252/dask-scratch-space/worker-5fa2l1r5
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 171.00 MiB,Spilled bytes: 0 B
Read bytes: 329.32050916132647 B,Write bytes: 1.49 kiB


In [62]:
client.upload_file("corrections.zip")

{'tls://red-c7123.unl.edu:34597': {'status': 'OK'},
 'tls://red-c7123.unl.edu:45159': {'status': 'OK'},
 'tls://red-c7123.unl.edu:45705': {'status': 'OK'}}

In [63]:
from coffea.dataset_tools import apply_to_dataset, apply_to_fileset, max_chunks, max_files, slice_chunks, slice_files, preprocess, rucio_utils

In [64]:
preprocessed_available, preprocessed_total = preprocess(
        initial_fileset,
        step_size=100_000,
        align_clusters=None,
        skip_bad_files=True,
        recalculate_steps=False,
        files_per_batch=1,
        file_exceptions=(OSError,),
        save_form=True,
        uproot_options={},
        step_size_safety_factor=0.5,
    )
    #with gzip.open(f"{output_file}_available.json.gz", "wt") as file:
    #    print(f"Saved available fileset chunks to {output_file}_available.json.gz")

NameError: name 'initial_fileset' is not defined

In [None]:
test_preprocessed_files = max_files(preprocessed_available, 1)
test_preprocessed = max_chunks(test_preprocessed_files, 3)

In [None]:
small_tg, small_rep = apply_to_fileset(data_manipulation=MyZPeak(),
                            fileset=test_preprocessed,
                            schemaclass=BaseSchema,
                            uproot_options={"allow_read_errors_with_report": (OSError, KeyError)},
                           )

In [None]:
from coffea.nanoevents import NanoAODSchema


runner = processor.Runner(
    executor = processor.DaskExecutor(client=client),
    schema=NanoAODSchema,
    # maxchunks=10,
)

result = runner(
    "fileset.json",
    "Events",
    processor_instance=MyZPeak()
)
result

In [None]:
data = result["DoubleMuon2018A"]["mass"]

lumi = 14.0
xsweight = lumi * 1e3 * 6225.42 / result["ZJets2018"]["cutflow"]["start"]
sim = result["ZJets2018"]["mass"] * xsweight

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
sim.plot(ax=ax, histtype="fill", label="Z+jets")
data.plot(ax=ax, histtype="errorbar", color="k", label="Data")
ax.set_xlim(60, 120)
ax.legend()

In [None]:
result["DoubleMuon2018A"]["cutflow"]