In [37]:
import copy
import hist
import glob
import os
import re
from coffea.util import load
from coffea.analysis_tools import Cutflow
import numpy as np

In [38]:
def lazy_summary(d,ntabs=1):
    tab = '\t'*ntabs
    print_string ='{\n'
    for key,value in d.items():
        print_string += f"{tab}{key} : "
        if isinstance(value,dict):
            print_string += lazy_summary(value, ntabs=ntabs+1)
        elif isinstance(value, hist.hist.Hist):
            print_string += f"{type(value)}\tIntegral:{value.sum()}\n"
        elif isinstance(value, Cutflow):
            print_string += f"{type(value)}\tInitial events:{value.result().nevcutflow[0]}\n"
        else :
            print_string += f"{type(value)}\n"
    print_string += tab+'}\n'
    return print_string

In [39]:
def add_cutflow(c1,c2):
    '''
    Add cutflow objects assuming they operate on non-overlaping sample regions
    '''
    r1 = c1.result()
    r2 = c2.result()


    if r1.labels == r2.labels :
        names = r1.labels
        names.remove('initial') # initial is added when Cutflow class is called, so removing it to preserve names list length
        names = names
        nevonecut = [a+b for a,b in zip(r1.nevonecut,r2.nevonecut)]
        nevcutflow = [a+b for a,b in zip(r1.nevcutflow,r2.nevcutflow)]
        masksonecut = [np.concatenate((a,b)) for a,b in zip(r1.masksonecut,r2.masksonecut)]
        maskscutflow = [np.concatenate((a,b)) for a,b in zip(r1.maskscutflow,r2.maskscutflow)]
        
    else:
        raise "The labels of the cutflow do not match!"
    return Cutflow(names, nevonecut, nevcutflow, masksonecut, maskscutflow, delayed_mode=False)

Cutflow.__add__ = add_cutflow #Monkey patch Cutflow class to enable the add method

In [113]:
def get_subdict(dicts, key):
    '''
    Get list of subdictionaries(if available) from a list of dictionaries
    '''
    out = []
    for d in dicts:
        for k in d.keys():
            if key == k:
                out.append(d[key])
    return out
def accumulate(dicts):
    """
    Merges an array of dictionaries and adds up the values of common keys.

    Parameters:
    dicts (list): A list of dictionaries to be merged.

    Returns:
    dict: A dictionary with combined keys and values summed for common keys.
    """

    outdict = {}
    
    for diction in dicts:
        dictionary = copy.deepcopy(diction)
        
        for key, value in dictionary.items():
            # print(f"{key} : {value}")
            # print(type(value))
            
            if isinstance(value,dict):
                value = accumulate(get_subdict(dicts,key))
                outdict[key] = value 
            else:
                if key in outdict.keys():
                    outdict[key] += value  # Add values if the key is common
                else:
                    outdict[key] = value  # Otherwise, add the new key-value pair 

    return outdict

In [114]:
#input_path = "outputs/FCCee/higgs/mH-recoil/mumu/"
input_path = "Batch/" #By default Batch outputs are saved here
base_filename = "mHrecoil_mumu.coffea"
print(f'Current configuration:\n\tinput_path:\t{input_path}\n\tbase_filename:\t{base_filename}\n')
print("Loading coffea files...")

#Find coffea files
coffea_files = glob.glob(input_path+'*.coffea')
print('Detected coffea files:')
for file in coffea_files : print('\t'+file)

print(f'Choosing:\n\t{base_filename}')

#Find chunked coffea files
chunked_coffea_files = glob.glob(input_path+base_filename.strip('.coffea')+'-chunk*.coffea')

if len(chunked_coffea_files) != 0 :
    print('Joining chunks:')
    chunk_index_list = []
    chunk_list = []
    for file in chunked_coffea_files:
        print('\t'+file)
        chunk_list.append(file)
        chunk_index_list.append(int(re.search('-chunk(.*).coffea',file).group(1)))
    chunk_index_list.sort()
    
    #Check if there are missing chunks
    full_set = set(range(len(chunk_index_list)))
    lst_set = set(chunk_index_list)
    missing = list(full_set - lst_set)
    if len(missing) != 0:
        raise FileNotFoundError(f'Missing chunk indexes : {missing}')
    
    #Load and accumulate all the chunks
    input_list = [load(file) for file in chunk_list]
    #print(lazy_summary(input_list[0]),lazy_summary(input_list[1]))
    input = accumulate(input_list)
else : 
    input = load(input_path+base_filename)
print(lazy_summary(input))

Current configuration:
	input_path:	Batch/
	base_filename:	mHrecoil_mumu.coffea

Loading coffea files...
Detected coffea files:
	Batch/mHrecoil_mumu-chunk0.coffea
	Batch/mHrecoil_mumu-chunk1.coffea
	Batch/mHrecoil_mumu-chunk2.coffea
	Batch/mHrecoil_mumu-chunk3.coffea
	Batch/mHrecoil_mumu-chunk4.coffea
	Batch/mHrecoil_mumu-chunk5.coffea
	Batch/mHrecoil_mumu-chunk6.coffea
	Batch/mHrecoil_mumu-chunk7.coffea
Choosing:
	mHrecoil_mumu.coffea
Joining chunks:
	Batch/mHrecoil_mumu-chunk0.coffea
	Batch/mHrecoil_mumu-chunk1.coffea
	Batch/mHrecoil_mumu-chunk2.coffea
	Batch/mHrecoil_mumu-chunk3.coffea
	Batch/mHrecoil_mumu-chunk4.coffea
	Batch/mHrecoil_mumu-chunk5.coffea
	Batch/mHrecoil_mumu-chunk6.coffea
	Batch/mHrecoil_mumu-chunk7.coffea
{
	p8_ee_ZZ_ecm240 : {
		histograms : {
			sel0 : {
				Zm : <class 'hist.hist.Hist'>	Integral:18003.0
				Zm_zoom : <class 'hist.hist.Hist'>	Integral:12718.0
				Recoilm : <class 'hist.hist.Hist'>	Integral:17923.0
				Recoilm_zoom : <class 'hist.hist.Hist'>	Integ

In [70]:
a = {'papa':{'papa1':1,'papa2':3},'mama':{'mama1':2,'mama2':0} }

In [71]:
a

{'papa': {'papa1': 1, 'papa2': 3}, 'mama': {'mama1': 2, 'mama2': 0}}

In [72]:
b = {'mama':{'mama1':2,'mama2':0} }

In [73]:
b

{'mama': {'mama1': 2, 'mama2': 0}}

In [99]:

def accumulate2(dicts):
    out = {}
    for d in dicts:
        for key,value in d.items():
            if isinstance(value,dict):
                value = accumulate2(get_subdict(dicts,key))
                out[key] = value
            else:
                if key in out.keys():
                    out[key] += value
                else:
                    out[key] = value
    return out

In [100]:
i = {'a':1,'b':2}
j = {'b':1,'c':2}

In [101]:
accumulate2([i,j])

{'a': 1, 'b': 3, 'c': 2}

In [102]:
k = {'a':{'alpha':0.8,'color':'b'},'b':2}

In [103]:
a = {
    'zz':{
        'histograms':{
            'sel0':{
                'zm':1,
                'recoil':2
            },
            'sel1':{
                'zm':1,
                'recoil':2
            }
        },
        'cutflow':{
            'sel0':'a',
            'sel1':'b'
        }
    },
    'zh':{
        'histograms':{
            'sel0':{
                'zm':1,
                'recoil':2
            },
            'sel1':{
                'zm':1,
                'recoil':2
            }
        },
        'cutflow':{
            'sel0':'a',
            'sel1':'b'
        }
    }
}
b = {
    'zz':{
        'histograms':{
            'sel0':{
                'zm':1,
                'recoil':2
            },
            'sel1':{
                'zm':1,
                'recoil':2
            }
        },
        'cutflow':{
            'sel0':'a',
            'sel1':'b'
        }
    }
}

In [104]:
get_subdict([a,b],'zh')

[{'histograms': {'sel0': {'zm': 1, 'recoil': 2},
   'sel1': {'zm': 1, 'recoil': 2}},
  'cutflow': {'sel0': 'a', 'sel1': 'b'}}]

In [106]:
accumulate2([a,b])

{'zz': {'histograms': {'sel0': {'zm': 2, 'recoil': 4},
   'sel1': {'zm': 2, 'recoil': 4}},
  'cutflow': {'sel0': 'aa', 'sel1': 'bb'}},
 'zh': {'histograms': {'sel0': {'zm': 1, 'recoil': 2},
   'sel1': {'zm': 1, 'recoil': 2}},
  'cutflow': {'sel0': 'a', 'sel1': 'b'}}}