# PHYSLITE columnar Zee analysis using coffea2023

<b>Package imports and versions</b>

In [1]:
from pathlib import Path
import warnings

import hist
import awkward as ak
import coffea
import uproot
from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema
import dask_awkward as dak

In [2]:
print(f"coffea version: {coffea.__version__}")
print(f"awkward version: {ak.__version__}")
print(f"uproot version: {uproot.__version__}")
print(f"hist version: {hist.__version__}")

coffea version: 2023.10.0rc1
awkward version: 2.5.1
uproot version: 5.1.2
hist version: 2.7.2


<b>Input PHYSLITE dataset</b>
- `mc20_13TeV.700320.Sh_2211_Zee_maxHTpTV2_BFilter.deriv.DAOD_PHYSLITE.e8351_s3681_r13167_p5855`
- 330 files / 497 GB / 35.6Mevts

In [3]:
local_input_path = "/data_ceph/kyungeon/PHYSLITE/"
dataset = "mc20_13TeV.700320.Sh_2211_Zee_maxHTpTV2_BFilter.deriv.DAOD_PHYSLITE.e8351_s3681_r13167_p5855"
# dataset = "mc20_13TeV.700320.Sh_2211_Zee_maxHTpTV2_BFilter.deriv.DAOD_PHYSLITE.e8351_s3681_r13167_p5631"
files = list(Path(local_input_path+dataset).glob("*root.1"))
print(f"Total #files = {len(files)}")

files = files[:30]

Total #files = 330


In [4]:
len(files)

30

<b>Load dask awkward arrays with coffea PHYSLITEschema</b>

In [5]:
def get_events(files):
    factory = NanoEventsFactory.from_root(
        {fi:"CollectionTree" for fi in files},
        schemaclass=PHYSLITESchema,
        permit_dask=True
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        events = factory.events()
    
    return events

<b>Event selection</b>
- 2 electrons
- Opposite charge

In [6]:
def event_selection(events):
    cut_2el = ak.num(events.Electrons, axis=1) == 2
    cut_charge = ak.sum(events.Electrons.charge, axis=1) == 0

    selections = cut_2el & cut_charge
    
    return events[selections]

<b>Reconstruct Z</b>

In [8]:
def reconstruct_z(events):
    el_pair = ak.combinations(events.Electrons, 2)
    el1, el2 = ak.unzip(el_pair)
    el_pair["p4"] = el1 + el2
    return el_pair.p4

<b>Define histograms</b>

In [9]:
import hist.dask as dah

def get_histograms():
    return dah.Hist(
        dah.Hist.new.Reg(100, 0, 200_000, name="z_mass", label=r"$m_{ee}$ [GeV]"),
        storage="weight"
    )

<b>Build analysis</b>

In [10]:
evts = get_events(files)

selected_evts = event_selection(evts)

el_pair_p4 = reconstruct_z(selected_evts)

h = get_histograms()

output = h.fill(z_mass=ak.flatten(el_pair_p4.mass))
# output = h.fill(z_mass=ak.flatten(selected_evts.Electrons.pt))

Columns for the analysis

In [11]:
dak.necessary_columns(output)

{'from-uproot-70211f49e0bf1cc9474f0326ee7efe10': frozenset({'AnalysisElectronsAuxDyn.charge',
            'AnalysisElectronsAuxDyn.eta',
            'AnalysisElectronsAuxDyn.m',
            'AnalysisElectronsAuxDyn.phi',
            'AnalysisElectronsAuxDyn.pt',
            'AnalysisElectronsAuxDyn.trackParticleLinks'})}

dask task graph

In [12]:
output.visualize(optimize_graph=True)
# el_pair_p4.visualize(optimize_graph=True)

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

<b>Get results</b>

Set up local dask cluster

In [13]:
from dask.distributed import LocalCluster, Client
cluster = LocalCluster(n_workers=30, threads_per_worker=1)
client = Client(cluster)

In [14]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 30
Total threads: 30,Total memory: 62.54 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:4797,Workers: 30
Dashboard: http://127.0.0.1:8787/status,Total threads: 30
Started: Just now,Total memory: 62.54 GiB

0,1
Comm: tcp://127.0.0.1:3762,Total threads: 1
Dashboard: http://127.0.0.1:11128/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:13421,
Local directory: /tmp/dask-scratch-space/worker-e3q5t5ox,Local directory: /tmp/dask-scratch-space/worker-e3q5t5ox

0,1
Comm: tcp://127.0.0.1:13910,Total threads: 1
Dashboard: http://127.0.0.1:7412/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:11052,
Local directory: /tmp/dask-scratch-space/worker-kod8ik85,Local directory: /tmp/dask-scratch-space/worker-kod8ik85

0,1
Comm: tcp://127.0.0.1:24128,Total threads: 1
Dashboard: http://127.0.0.1:5439/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:16773,
Local directory: /tmp/dask-scratch-space/worker-v32m2zkj,Local directory: /tmp/dask-scratch-space/worker-v32m2zkj

0,1
Comm: tcp://127.0.0.1:30177,Total threads: 1
Dashboard: http://127.0.0.1:19722/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:25188,
Local directory: /tmp/dask-scratch-space/worker-y0it05kw,Local directory: /tmp/dask-scratch-space/worker-y0it05kw

0,1
Comm: tcp://127.0.0.1:21806,Total threads: 1
Dashboard: http://127.0.0.1:31674/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:22152,
Local directory: /tmp/dask-scratch-space/worker-5_sxn4i0,Local directory: /tmp/dask-scratch-space/worker-5_sxn4i0

0,1
Comm: tcp://127.0.0.1:11748,Total threads: 1
Dashboard: http://127.0.0.1:4520/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:3007,
Local directory: /tmp/dask-scratch-space/worker-1ags8vve,Local directory: /tmp/dask-scratch-space/worker-1ags8vve

0,1
Comm: tcp://127.0.0.1:13723,Total threads: 1
Dashboard: http://127.0.0.1:16277/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:17503,
Local directory: /tmp/dask-scratch-space/worker-s0ueb3zo,Local directory: /tmp/dask-scratch-space/worker-s0ueb3zo

0,1
Comm: tcp://127.0.0.1:10383,Total threads: 1
Dashboard: http://127.0.0.1:22067/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:11535,
Local directory: /tmp/dask-scratch-space/worker-4v_o5lzg,Local directory: /tmp/dask-scratch-space/worker-4v_o5lzg

0,1
Comm: tcp://127.0.0.1:13865,Total threads: 1
Dashboard: http://127.0.0.1:7029/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:3749,
Local directory: /tmp/dask-scratch-space/worker-uxqmzgzw,Local directory: /tmp/dask-scratch-space/worker-uxqmzgzw

0,1
Comm: tcp://127.0.0.1:18554,Total threads: 1
Dashboard: http://127.0.0.1:7040/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:26675,
Local directory: /tmp/dask-scratch-space/worker-a_8iymq1,Local directory: /tmp/dask-scratch-space/worker-a_8iymq1

0,1
Comm: tcp://127.0.0.1:10364,Total threads: 1
Dashboard: http://127.0.0.1:30797/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:20262,
Local directory: /tmp/dask-scratch-space/worker-4ujqymqa,Local directory: /tmp/dask-scratch-space/worker-4ujqymqa

0,1
Comm: tcp://127.0.0.1:21643,Total threads: 1
Dashboard: http://127.0.0.1:20274/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:25725,
Local directory: /tmp/dask-scratch-space/worker-7cz614e4,Local directory: /tmp/dask-scratch-space/worker-7cz614e4

0,1
Comm: tcp://127.0.0.1:13379,Total threads: 1
Dashboard: http://127.0.0.1:25132/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:7302,
Local directory: /tmp/dask-scratch-space/worker-ujhlk834,Local directory: /tmp/dask-scratch-space/worker-ujhlk834

0,1
Comm: tcp://127.0.0.1:4195,Total threads: 1
Dashboard: http://127.0.0.1:3993/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:27181,
Local directory: /tmp/dask-scratch-space/worker-n6fhemwx,Local directory: /tmp/dask-scratch-space/worker-n6fhemwx

0,1
Comm: tcp://127.0.0.1:1805,Total threads: 1
Dashboard: http://127.0.0.1:11732/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:28209,
Local directory: /tmp/dask-scratch-space/worker-kqaa_z0z,Local directory: /tmp/dask-scratch-space/worker-kqaa_z0z

0,1
Comm: tcp://127.0.0.1:2140,Total threads: 1
Dashboard: http://127.0.0.1:29013/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:29817,
Local directory: /tmp/dask-scratch-space/worker-jhmz_g7h,Local directory: /tmp/dask-scratch-space/worker-jhmz_g7h

0,1
Comm: tcp://127.0.0.1:19693,Total threads: 1
Dashboard: http://127.0.0.1:21106/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:4214,
Local directory: /tmp/dask-scratch-space/worker-rq86vnu8,Local directory: /tmp/dask-scratch-space/worker-rq86vnu8

0,1
Comm: tcp://127.0.0.1:8505,Total threads: 1
Dashboard: http://127.0.0.1:4019/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:19889,
Local directory: /tmp/dask-scratch-space/worker-59964335,Local directory: /tmp/dask-scratch-space/worker-59964335

0,1
Comm: tcp://127.0.0.1:24015,Total threads: 1
Dashboard: http://127.0.0.1:31731/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:4294,
Local directory: /tmp/dask-scratch-space/worker-0wdasis5,Local directory: /tmp/dask-scratch-space/worker-0wdasis5

0,1
Comm: tcp://127.0.0.1:7038,Total threads: 1
Dashboard: http://127.0.0.1:14255/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:15374,
Local directory: /tmp/dask-scratch-space/worker-_c9osq0_,Local directory: /tmp/dask-scratch-space/worker-_c9osq0_

0,1
Comm: tcp://127.0.0.1:26725,Total threads: 1
Dashboard: http://127.0.0.1:24717/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:5087,
Local directory: /tmp/dask-scratch-space/worker-fqre2i26,Local directory: /tmp/dask-scratch-space/worker-fqre2i26

0,1
Comm: tcp://127.0.0.1:12740,Total threads: 1
Dashboard: http://127.0.0.1:21535/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:16519,
Local directory: /tmp/dask-scratch-space/worker-3w3vwdhf,Local directory: /tmp/dask-scratch-space/worker-3w3vwdhf

0,1
Comm: tcp://127.0.0.1:28295,Total threads: 1
Dashboard: http://127.0.0.1:19983/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:9573,
Local directory: /tmp/dask-scratch-space/worker-bm3pd553,Local directory: /tmp/dask-scratch-space/worker-bm3pd553

0,1
Comm: tcp://127.0.0.1:13903,Total threads: 1
Dashboard: http://127.0.0.1:24882/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:12325,
Local directory: /tmp/dask-scratch-space/worker-ycvpj1w2,Local directory: /tmp/dask-scratch-space/worker-ycvpj1w2

0,1
Comm: tcp://127.0.0.1:2957,Total threads: 1
Dashboard: http://127.0.0.1:20800/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:9652,
Local directory: /tmp/dask-scratch-space/worker-869l8n3w,Local directory: /tmp/dask-scratch-space/worker-869l8n3w

0,1
Comm: tcp://127.0.0.1:12790,Total threads: 1
Dashboard: http://127.0.0.1:4430/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:27026,
Local directory: /tmp/dask-scratch-space/worker-mjtb59s2,Local directory: /tmp/dask-scratch-space/worker-mjtb59s2

0,1
Comm: tcp://127.0.0.1:22327,Total threads: 1
Dashboard: http://127.0.0.1:9510/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:4659,
Local directory: /tmp/dask-scratch-space/worker-gjpdxmr_,Local directory: /tmp/dask-scratch-space/worker-gjpdxmr_

0,1
Comm: tcp://127.0.0.1:15770,Total threads: 1
Dashboard: http://127.0.0.1:20639/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:18085,
Local directory: /tmp/dask-scratch-space/worker-jz7zxaml,Local directory: /tmp/dask-scratch-space/worker-jz7zxaml

0,1
Comm: tcp://127.0.0.1:6572,Total threads: 1
Dashboard: http://127.0.0.1:2024/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:23665,
Local directory: /tmp/dask-scratch-space/worker-mx1hwkjw,Local directory: /tmp/dask-scratch-space/worker-mx1hwkjw

0,1
Comm: tcp://127.0.0.1:5102,Total threads: 1
Dashboard: http://127.0.0.1:6837/status,Memory: 2.08 GiB
Nanny: tcp://127.0.0.1:22160,
Local directory: /tmp/dask-scratch-space/worker-6_rrq1m3,Local directory: /tmp/dask-scratch-space/worker-6_rrq1m3


Compute dask graphs

In [15]:
out_hist = output.compute()

  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
Key:       ('hist-on-block-f692ce7396b0cef570d3cef22623641d', 19)
Function:  subgraph_callable-beda63a5-e6e3-4860-ab6b-299dabc5
args:      ('1', '0', 'Electrons', ('/data_ceph/kyungeon/PHYSLITE/mc20_13TeV.700320.Sh_2211_Zee_maxHTpTV2_BFilter.deriv.DAOD_PHYSLITE.e8351_s3681_r13167_p5855/DAOD_PHYSLITE.34869232._000020.pool.root.1', 'CollectionTree', 0, 1, False))
kwargs:    {}
Exception: "TypeError('Awkward Array does not support arrays with object dtypes.\\n\\nThis error occurred while calling\\n\\n    ak.concatenate(\\n        [arra

TypeError: Awkward Array does not support arrays with object dtypes.

This error occurred while calling

    ak.concatenate(
        [array([<STLVector [] at 0x7f0f87ee2d00>], dtype=object), <Array [[[{...
    )

  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))
  return impl(*non_generic_value_promoted_args, **(kwargs or {}))


<b>Plot histgorams</b>

In [16]:
import mplhep as mpl
import matplotlib.pyplot as plt
plt.style.use(mpl.style.ATLAS)

In [17]:
mpl.histplot(out_hist, edges=[0, 150_000]);

NameError: name 'out_hist' is not defined

Shutdown dask cluster

In [19]:
client.close()
cluster.close()