In [184]:
from coffea.util import load
from coffea.analysis_tools import PackedSelection, Cutflow
from collections import namedtuple
import numpy as np
import hist
import copy
import os
import glob
import re

In [185]:
out1 = load('./outputs_old/FCCee/higgs/mH-recoil/mumu/mHrecoil_mumu.coffea')
out2 = load('./outputs_old/FCCee/higgs/mH-recoil/mumu/mHrecoil_mumu.coffea')

In [186]:
c1 = out1['p8_ee_ZZ_ecm240']['cutflow']['sel0']
c2 = out1['p8_ee_ZZ_ecm240']['cutflow']['sel0']

In [187]:
def add_cutflow(c1,c2):
    '''
    Add cutflow objects assuming they operate on non-overlaping sample regions
    '''
    r1 = c1.result()
    r2 = c2.result()


    if r1.labels == r2.labels :
        names = r1.labels
        names.remove('initial') # initial is added when Cutflow class is called, so removing it to preserve names list length
        names = names
        nevonecut = [a+b for a,b in zip(r1.nevonecut,r2.nevonecut)]
        nevcutflow = [a+b for a,b in zip(r1.nevcutflow,r2.nevcutflow)]
        masksonecut = [np.concatenate((a,b)) for a,b in zip(r1.masksonecut,r2.masksonecut)]
        maskscutflow = [np.concatenate((a,b)) for a,b in zip(r1.maskscutflow,r2.maskscutflow)]
        
    else:
        raise "The labels of the cutflow do not match!"
    return Cutflow(names, nevonecut, nevcutflow, masksonecut, maskscutflow, delayed_mode=False)

Cutflow.__add__ = add_cutflow #Monkey patch to enable the add method

def accumulate(dicts):
    """
    Merges an array of dictionaries and adds up the values of common keys.

    Parameters:
    dicts (list): A list of dictionaries to be merged.

    Returns:
    dict: A dictionary with combined keys and values summed for common keys.
    """
    outdict = {}

    for diction in dicts:
        dictionary = copy.deepcopy(diction)
        
        for key, value in dictionary.items():
            # print(f"{key} : {value}")
            # print(type(value))
            if isinstance(value,dict):
                value = accumulate([subdict[key] for subdict in dicts])
                outdict[key] = value
                continue
            
            if key in outdict:
                outdict[key] += value  # Add values if the key is common
            else:
                outdict[key] = value  # Otherwise, add the new key-value pair 
    return outdict

In [188]:
a = {
    'a':{'x':1,'y':2,'z':3},
    'b':{'x':1,'y':2,'z':3},
    'c':{'x':1,'y':2,'z':3}
}

In [189]:
accumulate([a,a,a,a,a])

{'a': {'x': 5, 'y': 10, 'z': 15},
 'b': {'x': 5, 'y': 10, 'z': 15},
 'c': {'x': 5, 'y': 10, 'z': 15}}

In [190]:
b = {
    'a':{'x':{'s':1,'t':2},'y':2,'z':3},
    'b':{'x':1,'y':2,'z':3},
    'c':{'x':1,'y':2,'z':3}
}

In [191]:
accumulate([b,b])

{'a': {'x': {'s': 2, 't': 4}, 'y': 4, 'z': 6},
 'b': {'x': 2, 'y': 4, 'z': 6},
 'c': {'x': 2, 'y': 4, 'z': 6}}

In [192]:
accumulate([out1,out2])

{'p8_ee_ZZ_ecm240': {'histograms': {'sel0': {'Zm': Hist(Regular(125, 0, 250, label='Axis 0'), storage=Double()) # Sum: 36006.0,
    'Zm_zoom': Hist(Regular(40, 80, 100, label='Axis 0'), storage=Double()) # Sum: 25436.0 (36006.0 with flow),
    'Recoilm': Hist(Regular(100, 0, 200, label='Axis 0'), storage=Double()) # Sum: 35846.0 (36006.0 with flow),
    'Recoilm_zoom': Hist(Regular(200, 80, 160, label='Axis 0'), storage=Double()) # Sum: 31920.0 (36006.0 with flow),
    'Recoilm_zoom1': Hist(Regular(100, 120, 140, label='Axis 0'), storage=Double()) # Sum: 3126.0 (36006.0 with flow),
    'Recoilm_zoom2': Hist(Regular(200, 120, 140, label='Axis 0'), storage=Double()) # Sum: 3126.0 (36006.0 with flow),
    'Recoilm_zoom3': Hist(Regular(400, 120, 140, label='Axis 0'), storage=Double()) # Sum: 3126.0 (36006.0 with flow),
    'Recoilm_zoom4': Hist(Regular(800, 120, 140, label='Axis 0'), storage=Double()) # Sum: 3126.0 (36006.0 with flow),
    'Recoilm_zoom5': Hist(Regular(2000, 120, 140, labe

In [193]:
input_path = "outputs/FCCee/higgs/mH-recoil/mumu/"
base_filename = "mHrecoil_mumu2.coffea"
print(f'Current configuration:\n\tinput_path:\t{input_path}\n\tbase_filename:\t{base_filename}\n')
print("Loading coffea files...")

#Find coffea files
coffea_files = glob.glob(input_path+'*.coffea')
print('Detected coffea files:')
for file in coffea_files : print('\t'+file)

print(f'Choosing:\n\t{base_filename}')

#Find chunked coffea files
chunked_coffea_files = glob.glob(input_path+base_filename.strip('.coffea')+'-chunk*.coffea')

if len(chunked_coffea_files) != 0 :
    print('Joining chunks:')
    chunk_index_list = []
    chunk_list = []
    for file in chunked_coffea_files:
        print('\t'+file)
        chunk_list.append(file)
        chunk_index_list.append(int(re.search('-chunk(.*).coffea',file).group(1)))
    chunk_index_list.sort()
    
    #Check if there are missing chunks
    full_set = set(range(len(chunk_index_list)))
    lst_set = set(chunk_index_list)
    missing = list(full_set - lst_set)
    if len(missing) != 0:
        raise FileNotFoundError(f'Missing chunk indexes : {missing}')
    
    #Load and accumulate all the chunks
    input_list = [load(file) for file in chunk_list]
    input = accumulate(input_list)
else : 
    input = load(input_path+base_filename)

Current configuration:
	input_path:	outputs/FCCee/higgs/mH-recoil/mumu/
	base_filename:	mHrecoil_mumu2.coffea

Loading coffea files...
Detected coffea files:
	outputs/FCCee/higgs/mH-recoil/mumu/mHrecoil_mumu.coffea
	outputs/FCCee/higgs/mH-recoil/mumu/mHrecoil_mumu2-chunk1.coffea
	outputs/FCCee/higgs/mH-recoil/mumu/mHrecoil_mumu2-chunk0.coffea
Choosing:
	mHrecoil_mumu2.coffea
Joining chunks:
	outputs/FCCee/higgs/mH-recoil/mumu/mHrecoil_mumu2-chunk1.coffea
	outputs/FCCee/higgs/mH-recoil/mumu/mHrecoil_mumu2-chunk0.coffea


In [194]:
def lazy_summary(d,ntabs=1):
    tab = '\t'*ntabs
    print_string ='{\n'
    for key,value in d.items():
        print_string += f"{tab}{key} : "
        if isinstance(value,dict):
            print_string += lazy_summary(value, ntabs=ntabs+1)
        elif isinstance(value, hist.hist.Hist):
            print_string += f"{type(value)}\tIntegral:{value.sum()}\n"
        elif isinstance(value, Cutflow):
            print_string += f"{type(value)}\tInitial events:{value.result().nevcutflow[0]}\n"
        else :
            print_string += f"{type(value)}\n"
    print_string += tab+'}\n'
    return print_string
print(lazy_summary(input))

{
	p8_ee_ZZ_ecm240 : {
		histograms : {
			sel0 : {
				Zm : <class 'hist.hist.Hist'>	Integral:36006.0
				Zm_zoom : <class 'hist.hist.Hist'>	Integral:25436.0
				Recoilm : <class 'hist.hist.Hist'>	Integral:35846.0
				Recoilm_zoom : <class 'hist.hist.Hist'>	Integral:31920.0
				Recoilm_zoom1 : <class 'hist.hist.Hist'>	Integral:3126.0
				Recoilm_zoom2 : <class 'hist.hist.Hist'>	Integral:3126.0
				Recoilm_zoom3 : <class 'hist.hist.Hist'>	Integral:3126.0
				Recoilm_zoom4 : <class 'hist.hist.Hist'>	Integral:3126.0
				Recoilm_zoom5 : <class 'hist.hist.Hist'>	Integral:3126.0
				Recoilm_zoom6 : <class 'hist.hist.Hist'>	Integral:330.0
				}
			sel1 : {
				Zm : <class 'hist.hist.Hist'>	Integral:25436.0
				Zm_zoom : <class 'hist.hist.Hist'>	Integral:25436.0
				Recoilm : <class 'hist.hist.Hist'>	Integral:25392.0
				Recoilm_zoom : <class 'hist.hist.Hist'>	Integral:23230.0
				Recoilm_zoom1 : <class 'hist.hist.Hist'>	Integral:2248.0
				Recoilm_zoom2 : <class 'hist.hist.Hist'>	Integral:22

In [195]:
input['p8_ee_ZH_ecm240']['histograms']['sel0']['Zm'].sum()

109392.0