In [1]:
# Load coffeafccanalyses output
from coffea.util import load

from collections import defaultdict
from numbers import Number
import glob, re

In [2]:


def accumulate(dicts):
    """
    Recursively merges a list of dictionaries, supporting:
    - Numeric summation
    - List concatenation
    - Set union
    - Histogram addition (from `hist`)
    - Nested dictionaries
    - Key exceptions (preserve first)
    """
    try:
        from hist import Hist
    except ImportError:
        Hist = None  # Skip if hist is not available

    exception_list = {'Labels'}
    grouped = defaultdict(list)

    for d in dicts:
        for k, v in d.items():
            grouped[k].append(v)

    outdict = {}

    for key, values in grouped.items():
        first = values[0]

        if key in exception_list:
            outdict[key] = first
        elif all(isinstance(v, dict) for v in values):
            outdict[key] = accumulate(values)
        elif all(isinstance(v, list) for v in values):
            outdict[key] = sum(values, [])  # concatenate
        elif all(isinstance(v, set) for v in values):
            result = set()
            for v in values:
                result |= v
            outdict[key] = result
        elif Hist and all(isinstance(v, Hist) for v in values):
            total = values[0]
            for v in values[1:]:
                total += v
            outdict[key] = total
        elif all(isinstance(v, Number) for v in values):
            outdict[key] = sum(values)
        else:
            # Mixed types or strings — keep the first
            outdict[key] = first

    return outdict

In [3]:
#########################
# Load the coffea files #
#########################
#Input configuration

coffeafccanalyses_input_directory = "Batch"
output_filename = "4leptons"

input_path = coffeafccanalyses_input_directory+"/"
base_filename = output_filename+".coffea"
# print(f'Current configuration:\n\tinput_path:\t{input_path}\n\tbase_filename:\t{base_filename}\n')
# print("Loading coffea files...")

#Find coffea files
coffea_files = glob.glob(input_path+'*.coffea')
# print('Detected coffea files:')
for file in coffea_files : print('\t'+file)
# print(f'Choosing:\n\t{base_filename}')

#Find chunked coffea files and combine them
chunked_coffea_files = glob.glob(input_path+base_filename.strip('.coffea')+'-chunk*.coffea')
if len(chunked_coffea_files) != 0 :
    # print('Joining chunks:')
    chunk_index_list = []
    chunk_list = []
    for file in chunked_coffea_files:
        # print('\t'+file)
        chunk_list.append(file)
        chunk_index_list.append(int(re.search('-chunk(.*).coffea',file).group(1)))
    chunk_index_list.sort()

    #Check if there are missing chunks
    full_set = set(range(len(chunk_index_list)))
    lst_set = set(chunk_index_list)
    missing = list(full_set - lst_set)
    if len(missing) != 0:
        raise FileNotFoundError(f'Missing chunk indexes : {missing}')

    #Load and accumulate all the chunks
    input_list = [load(file) for file in chunk_list]
    coffeafcc_input = accumulate(input_list)

#If there is only one chunk no need to join chunks
else :
    coffeafcc_input = load(input_path+base_filename)

Current configuration:
	input_path:	Batch/
	base_filename:	4leptons.coffea

Loading coffea files...
Detected coffea files:
	Batch/4leptons-chunk0.coffea
	Batch/4leptons-chunk1.coffea
	Batch/4leptons-chunk10.coffea
	Batch/4leptons-chunk100.coffea
	Batch/4leptons-chunk101.coffea
	Batch/4leptons-chunk102.coffea
	Batch/4leptons-chunk103.coffea
	Batch/4leptons-chunk104.coffea
	Batch/4leptons-chunk105.coffea
	Batch/4leptons-chunk106.coffea
	Batch/4leptons-chunk107.coffea
	Batch/4leptons-chunk108.coffea
	Batch/4leptons-chunk109.coffea
	Batch/4leptons-chunk11.coffea
	Batch/4leptons-chunk110.coffea
	Batch/4leptons-chunk111.coffea
	Batch/4leptons-chunk112.coffea
	Batch/4leptons-chunk113.coffea
	Batch/4leptons-chunk114.coffea
	Batch/4leptons-chunk115.coffea
	Batch/4leptons-chunk116.coffea
	Batch/4leptons-chunk117.coffea
	Batch/4leptons-chunk118.coffea
	Batch/4leptons-chunk119.coffea
	Batch/4leptons-chunk12.coffea
	Batch/4leptons-chunk120.coffea
	Batch/4leptons-chunk121.coffea
	Batch/4leptons-chun

In [4]:
coffeafcc_input['wzp6_ee_qqH_HZZ_llll_ecm240']

{'histograms': {'sel0': {'selectedmuons_p': Hist(Regular(250, 0, 250, label='Axis 0'), storage=Double()) # Sum: 568251.0,
   'fourmuons_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 134893.0,
   'fourmuons_pmin': Hist(Regular(20, 0, 100, label='Axis 0'), storage=Double()) # Sum: 134893.0,
   'Z_res_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 138853.0,
   'Z_non_res_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 134893.0,
   'vis_e_woMuons': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 134893.0,
   'iso_least_isolated_muon': Hist(Regular(50, 0, 20, label='Axis 0'), storage=Double()) # Sum: 134575.0 (134893.0 with flow),
   'missing_p': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 105764.0 (134893.0 with flow),
   'cos_theta_miss': Hist(Regular(100, 0, 1, label='Axis 0'), storage=Double()) # Sum: 44331.0 (134893.0 with flow)},
  'sel1': {'selectedmuons_p': Hist

In [5]:
# Load FCCAnalyses output\
import uproot

In [6]:
# fcc_base_directory = "FCCAnalyses_output"
fcc_base_directory = "with_filter"
datasets = glob.glob(fcc_base_directory+"/*")

In [7]:
FCC_output = {}
for path in datasets:
    dataset_name = path.split('/')[-1]
    with uproot.open(path+"/chunk0.root") as f:
        FCC_output[dataset_name] = f['events'].arrays()

In [15]:
FCC_output['p8_ee_ZZ_ecm240'].fields

['selected_muons_n',
 'selected_muons_p',
 'rest_of_muons.type',
 'rest_of_muons.energy',
 'rest_of_muons.momentum.x',
 'rest_of_muons.momentum.y',
 'rest_of_muons.momentum.z',
 'rest_of_muons.referencePoint.x',
 'rest_of_muons.referencePoint.y',
 'rest_of_muons.referencePoint.z',
 'rest_of_muons.charge',
 'rest_of_muons.mass',
 'rest_of_muons.goodnessOfPID',
 'rest_of_muons.covMatrix[10]',
 'rest_of_muons.clusters_begin',
 'rest_of_muons.clusters_end',
 'rest_of_muons.tracks_begin',
 'rest_of_muons.tracks_end',
 'rest_of_muons.particles_begin',
 'rest_of_muons.particles_end',
 'rest_of_muons.particleIDs_begin',
 'rest_of_muons.particleIDs_end',
 'fourMuons_p',
 'fourMuons_mass',
 'zll_mass',
 'non_res_Z.type',
 'non_res_Z.energy',
 'non_res_Z.momentum.x',
 'non_res_Z.momentum.y',
 'non_res_Z.momentum.z',
 'non_res_Z.referencePoint.x',
 'non_res_Z.referencePoint.y',
 'non_res_Z.referencePoint.z',
 'non_res_Z.charge',
 'non_res_Z.mass',
 'non_res_Z.goodnessOfPID',
 'non_res_Z.covMatrix[

In [10]:
plots = {
    'selectedmuons_p':{'name':'selected_muons_p','title':'$\\mu_p$ [GeV]','xlabel':'$p_T$ [GeV]','ylabel':'Events','bins':250,'xmin':0,'xmax':250},

    'fourmuons_mass':{'name':'fourMuons_mass','title':'$M_{4\\mu}$ [GeV]','xlabel':'$Mass$ [GeV]','ylabel':'Events','bins':50,'xmin':0,'xmax':250},
    'fourmuons_pmin':{'name':'fourMuons_pmin','title':'$(P_{4\\mu})_{min}$ [GeV]','xlabel':'$p_{min}$ [GeV]','ylabel':'Events','bins':20,'xmin':0,'xmax':100},

    'Z_res_mass':{'name':'zll_mass','title':'On-shell $M_{\\mu\\mu}$ [GeV]','xlabel':'$Mass$ [GeV]','ylabel':'Events','bins':50,'xmin':0,'xmax':250},
    'Z_non_res_mass':{'name':'non_res_Z_m','title':'Off-shell $M_{\\mu\\mu}$ [GeV]','xlabel':'$Mass$ [GeV]','ylabel':'Events','bins':50,'xmin':0,'xmax':250},

    'vis_e_woMuons':{'name':'vis_e_other_particles','title':'Visible Energy excluding muons [GeV]','xlabel':'$E$ [GeV]','ylabel':'Events','bins':50,'xmin':0,'xmax':250},
    'iso_least_isolated_muon':{'name':'fourMuons_min_iso','title':'iso(least isolated muon)','xlabel':'iso','ylabel':'Events','bins':50,'xmin':0,'xmax':20},
    'missing_p':{'name':'pmiss','title':'missing p [GeV]','xlabel':'$p^{miss}$ [GeV]','ylabel':'Events','bins':50,'xmin':0,'xmax':250},
    'cos_theta_miss':{'name':'cosTheta_miss','title':'Cos(Theta_miss)','xlabel':'$cos_{miss}\\theta$','ylabel':'Events','bins':100,'xmin':0,'xmax':1},

}

In [16]:
histoList = {
    "selectedmuons_p":"selected_muons_p",
    "fourmuons_mass":"fourMuons_mass",
    "fourmuons_pmin":"fourMuons_pmin",
    "Z_res_mass":"zll_mass",
    "Z_non_res_mass":"non_res_Z_m",
    "vis_e_woMuons":"vis_e_other_particles",
    "iso_least_isolated_muon":"fourMuons_min_iso",
    "missing_p":"pmiss",
    "cos_theta_miss":"cosTheta_miss",
}

In [19]:
import hist
import awkward as ak

In [22]:
fcc_hists = {}
for dataset in FCC_output.keys():
    fcc_hists[dataset] = {}
    for name, var in histoList.items():
        to_plot = FCC_output[dataset][var]
        info = plots[name]
        fcc_hists[dataset][name] = hist.Hist.new.Reg( info['bins'], info['xmin'], info['xmax'] ).Double().fill(ak.ravel(to_plot))

In [23]:
fcc_hists['p8_ee_ZZ_ecm240']

{'selectedmuons_p': Hist(Regular(250, 0, 250, label='Axis 0'), storage=Double()) # Sum: 853.0,
 'fourmuons_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 213.0,
 'fourmuons_pmin': Hist(Regular(20, 0, 100, label='Axis 0'), storage=Double()) # Sum: 213.0,
 'Z_res_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 213.0,
 'Z_non_res_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 213.0,
 'vis_e_woMuons': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 213.0,
 'iso_least_isolated_muon': Hist(Regular(50, 0, 20, label='Axis 0'), storage=Double()) # Sum: 209.0 (213.0 with flow),
 'missing_p': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 155.0 (213.0 with flow),
 'cos_theta_miss': Hist(Regular(100, 0, 1, label='Axis 0'), storage=Double()) # Sum: 213.0}

In [26]:
coffea_hists = {}
for dataset in coffeafcc_input.keys():
    coffea_hists[dataset] = {}
    for r_plots in histoList.keys():
        coffea_hists[dataset][r_plots] = coffeafcc_input[dataset]['histograms']['sel0'][r_plots]
        

In [27]:
coffea_hists['p8_ee_ZZ_ecm240']

{'selectedmuons_p': Hist(Regular(250, 0, 250, label='Axis 0'), storage=Double()) # Sum: 576920.0,
 'fourmuons_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 134025.0,
 'fourmuons_pmin': Hist(Regular(20, 0, 100, label='Axis 0'), storage=Double()) # Sum: 134025.0,
 'Z_res_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 143641.0,
 'Z_non_res_mass': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 134025.0,
 'vis_e_woMuons': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 134025.0,
 'iso_least_isolated_muon': Hist(Regular(50, 0, 20, label='Axis 0'), storage=Double()) # Sum: 132260.0 (134025.0 with flow),
 'missing_p': Hist(Regular(50, 0, 250, label='Axis 0'), storage=Double()) # Sum: 98623.0 (134025.0 with flow),
 'cos_theta_miss': Hist(Regular(100, 0, 1, label='Axis 0'), storage=Double()) # Sum: 48644.0 (134025.0 with flow)}

In [28]:
# Now we can finally compare the histograms from fcc_hists and coffea_hists

In [32]:
for dataset in fcc_hists.keys():
    print(f"Checking {dataset} ...")
    for name in fcc_hists[dataset].keys():
        print(f"\t{name}")
        diff = coffea_hists[dataset][name] - fcc_hists[dataset][name]
        print(f"\t\tDifference is {diff.sum()}")

Checking p8_ee_ZZ_ecm240 ...
	selectedmuons_p
		Difference is 576067.0
	fourmuons_mass
		Difference is 133812.0
	fourmuons_pmin
		Difference is 133812.0
	Z_res_mass
		Difference is 143428.0
	Z_non_res_mass
		Difference is 133812.0
	vis_e_woMuons
		Difference is 133812.0
	iso_least_isolated_muon
		Difference is 132051.0
	missing_p
		Difference is 98468.0
	cos_theta_miss
		Difference is 48431.0
