In [125]:
import json
import yaml
import os

In [126]:
process = {
    'collider':'FCCee',
    'campaign':'spring2021',
    'detector':'IDEA',
    'samples':['p8_ee_ZZ_ecm240','p8_ee_WW_ecm240','p8_ee_ZH_ecm240']
}
fraction = {
    'p8_ee_ZZ_ecm240':0.005,
    'p8_ee_WW_ecm240':0.5,
    'p8_ee_ZH_ecm240':0.2
}

In [127]:
def load_yaml_fileinfo(process):
    onlinesystem_path = '/cvmfs/fcc.cern.ch'
    localsystem_path = './../filesets'
    path = '/'.join(
        [
         'FCCDicts',
         'yaml',
         process['collider'],
         process['campaign'],
         process['detector']
        ])
    if os.path.exists(onlinesystem_path):
        print(f'Connected to {onlinesystem_path}')
        filesystem_path = onlinesystem_path
    else:
        print(onlinesystem_path+' is not available.\nTrying to find local copies of the yaml files ...')
        filesystem_path = localsystem_path
    yaml_dict = {}
    for sample in process['samples']:
        full_path = '/'.join([filesystem_path,path,sample,'merge.yaml'])
        try :
            with open(full_path) as f:
                dict = yaml.safe_load(f)
            print('Loaded : '+full_path)
            # print(dict)
        except:
            raise f'Could not find yaml files at {filesystem_path} .'
        yaml_dict[sample] = dict
    return yaml_dict

In [218]:
def get_fileset(yaml_dict, fraction, skipbadfiles=True):
    output_fileset_dictionary = {}
    print('_________Loading fileset__________')
    for key in yaml_dict.keys():
        output_fileset_dictionary[key] = {}
        # nbad = yaml_dict[key]['merge']['nbad']
        # ndone = yaml_dict[key]['merge']['ndone']
        nevents = yaml_dict[key]['merge']['nevents']
        outdir = yaml_dict[key]['merge']['outdir']
        outfiles = yaml_dict[key]['merge']['outfiles']
        outfilesbad = yaml_dict[key]['merge']['outfilesbad']
        proc = yaml_dict[key]['merge']['process']
        # size = yaml_dict[key]['merge']['size']
        # sumofweights = yaml_dict[key]['merge']['sumofweights']
        out = np.array(outfiles)
        bad = np.array(outfilesbad)
        
        # Remove bad files
        if (bad.size != 0) & skipbadfiles :
            filenames_bad = bad[:,0]
            y = out
            for row in range(out.shape[0]) :
                file = out[row,0]
                if file in filenames_bad:
                    y = np.delete(y , (row), axis=0)
            out = y

        filenames = out[:,0]
        file_events = out[:,1].astype('int32')
        cumulative_events = np.cumsum(file_events)
    
        frac = fraction[proc]
        needed_events = frac*nevents

        #get closest value and index to the needed events
        index = np.abs(cumulative_events - needed_events).argmin() 
        assigned_events = cumulative_events[index]
        assigned_files = filenames[:index+1]

        # Summary
        print('----------------------------------')
        print(f'----------{key}---------')
        print('----------------------------------')
        print(f'Total available events = {nevents}')
        print(f'Fraction needed = {frac}')
        print(f'Needed events = {needed_events}')
        print(f'Assigned events = {assigned_events}')
        print(f'Number of files = {len(assigned_files)}')
        print('Files:')

        # At the same time get the dictionary
        fileset_by_key = {}
        for file in assigned_files:
            print(f'\t {outdir+file}')
            fileset_by_key[outdir+file] = 'events'
        output_fileset_dictionary[key]['files'] = fileset_by_key
    return output_fileset_dictionary

In [219]:
yaml_dict = load_yaml_fileinfo(process)

/cvmfs/fcc.cern.ch is not available.
Trying to find local copies of the yaml files ...
Loaded : ./../filesets/FCCDicts/yaml/FCCee/spring2021/IDEA/p8_ee_ZZ_ecm240/merge.yaml
Loaded : ./../filesets/FCCDicts/yaml/FCCee/spring2021/IDEA/p8_ee_WW_ecm240/merge.yaml
Loaded : ./../filesets/FCCDicts/yaml/FCCee/spring2021/IDEA/p8_ee_ZH_ecm240/merge.yaml


In [220]:
get_fileset(yaml_dict,fraction)

_________Loading fileset__________
----------------------------------
----------p8_ee_ZZ_ecm240---------
----------------------------------
Total available events = 59800000
Fraction needed = 0.005
Needed events = 299000.0
Assigned events = 300000
Number of files = 3
Files:
	 /eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZZ_ecm240/events_000203378.root
	 /eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZZ_ecm240/events_001062578.root
	 /eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZZ_ecm240/events_001109319.root
----------------------------------
----------p8_ee_WW_ecm240---------
----------------------------------
Total available events = 10000000
Fraction needed = 0.5
Needed events = 5000000.0
Assigned events = 5000000
Number of files = 50
Files:
	 /eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_WW_ecm240/events_002446962.root
	 /eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8

{'p8_ee_ZZ_ecm240': {'files': {'/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZZ_ecm240/events_000203378.root': 'events',
   '/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZZ_ecm240/events_001062578.root': 'events',
   '/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZZ_ecm240/events_001109319.root': 'events'}},
 'p8_ee_WW_ecm240': {'files': {'/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_WW_ecm240/events_002446962.root': 'events',
   '/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_WW_ecm240/events_003660780.root': 'events',
   '/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_WW_ecm240/events_003871346.root': 'events',
   '/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_WW_ecm240/events_006111023.root': 'events',
   '/eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_WW_ecm240/events_006917088.root': 'events'

In [107]:
process = 'p8_ee_ZZ_ecm240'
nbad = yaml_dict[process]['merge']['nbad']
ndone = yaml_dict[process]['merge']['ndone']
nevents = yaml_dict[process]['merge']['nevents']
outdir = yaml_dict[process]['merge']['outdir']
outfiles = yaml_dict[process]['merge']['outfiles']
outfilesbad = yaml_dict[process]['merge']['outfilesbad']
process = yaml_dict[process]['merge']['process']
size = yaml_dict[process]['merge']['size']
sumofweights = yaml_dict[process]['merge']['sumofweights']

In [110]:
import numpy as np

In [111]:
out = np.array(outfiles)

In [120]:
out[:,1].astype('int32')

array([100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
       100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,
      

In [205]:
c = np.cumsum(out[:,1].astype('int32'))
c

array([  100000,   200000,   300000,   400000,   500000,   600000,
         700000,   800000,   900000,  1000000,  1100000,  1200000,
        1300000,  1400000,  1500000,  1600000,  1700000,  1800000,
        1900000,  2000000,  2100000,  2200000,  2300000,  2400000,
        2500000,  2600000,  2700000,  2800000,  2900000,  3000000,
        3100000,  3200000,  3300000,  3400000,  3500000,  3600000,
        3700000,  3800000,  3900000,  4000000,  4100000,  4200000,
        4300000,  4400000,  4500000,  4600000,  4700000,  4800000,
        4900000,  5000000,  5100000,  5200000,  5300000,  5400000,
        5500000,  5600000,  5700000,  5800000,  5900000,  6000000,
        6100000,  6200000,  6300000,  6400000,  6500000,  6600000,
        6700000,  6800000,  6900000,  7000000,  7100000,  7200000,
        7300000,  7400000,  7500000,  7600000,  7700000,  7800000,
        7900000,  8000000,  8100000,  8200000,  8300000,  8400000,
        8500000,  8600000,  8700000,  8800000,  8900000,  9000

In [196]:
c[np.abs(c - 140000).argmin()]

100000