# do some timing tests with a large input dictionary of events

In [1]:
import numpy as np
import hepfile as hf
import cProfile

In [2]:
# first construct a HUGE dictionary with a lot of keys and a lot of entries

nKeys = 10 #int(1e6)
nDatasets = 100 # number of datasets in each group
nEvents = 50

lengths = np.array([100, int(1e3), int(1e4), 10])

dictlist = []

for _ in range(nEvents):
    
    d = {}
    
    for i in range(nKeys):
        key = f'key{i}'
        subdict = {}
        # add datasets with random number of values
        num = np.random.choice(lengths)
        for j in range(nDatasets):
            subkey = f'{key}_{j}'
            subdict[subkey] = np.random.randn(num)
        
        # add singletons
        for j in range(10):
            s = f'singleton_{j}'
            d[s] = np.random.randn()
            
        d[key] = subdict
        
    dictlist.append(d)

In [3]:
outfile = 'dict-time-test.h5'
with cProfile.Profile() as p:
    data = hf.dict_tools.dictlike_to_hepfile(dictlist, outfile, write_hepfile=False)
    p.print_stats()

         521134 function calls in 19.041 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 cProfile.py:41(print_stats)
        1    0.000    0.000    0.000    0.000 cProfile.py:51(create_stats)
     1010    0.000    0.000    0.000    0.000 dict_tools.py:201(_get_dtype)
        1    0.000    0.000   19.041   19.041 dict_tools.py:25(dictlike_to_hepfile)
        1    3.760    3.760   19.040   19.040 dict_tools.py:84(_classic)
        1    0.000    0.000    0.000    0.000 pstats.py:107(__init__)
        1    0.000    0.000    0.000    0.000 pstats.py:117(init)
        1    0.000    0.000    0.000    0.000 pstats.py:136(load_stats)
       10    0.000    0.000    0.000    0.000 write.py:106(create_group)
        1    0.000    0.000    0.000    0.000 write.py:20(initialize)
     1010    0.001    0.000    0.001    0.000 write.py:216(create_dataset)
       50    4.900    0.098   15.275    

In [4]:
b = hf.create_single_bucket(data)
np.append(b['singleton_0'], 'foo')
data['singleton_0']

[-1.3406343564130603,
 -1.9040204108581327,
 0.3410229727572975,
 2.5718217332262316,
 0.11322277256233405,
 0.09122450203936923,
 0.2522505904297621,
 0.998991240237649,
 -0.40073669568592024,
 0.0962760318935378,
 0.971078104110865,
 0.27695825161966797,
 -0.28653334883107434,
 0.41824367831373194,
 0.2557894323615965,
 1.3761347110996063,
 2.0475615103874563,
 0.3125857370586917,
 -0.08398585549344012,
 -0.03470539914575879,
 -0.4126119266622106,
 -0.6696992068476467,
 -1.5857721748508424,
 0.7795768170143138,
 0.2262387234303102,
 -1.0358417398382207,
 -0.101293383332934,
 2.2740120350108697,
 -0.008693916243900552,
 0.4436551994055278,
 -0.975443334444427,
 0.9222716335306036,
 -0.5205133130780334,
 1.4314057141142114,
 0.27866619423553246,
 0.6875018361679005,
 -0.5508736427619334,
 1.7492110710299729,
 2.193813225802522,
 1.297876840433892,
 -0.8734647880123562,
 -0.31059645546370845,
 0.5527457381115688,
 -0.27967712306655756,
 -1.4468376069019682,
 -0.6172781428862448,
 0.6107

In [5]:
data.keys()

dict_keys(['_GROUPS_', '_MAP_DATASETS_TO_COUNTERS_', '_LIST_OF_COUNTERS_', '_SINGLETONS_GROUP_/COUNTER', '_MAP_DATASETS_TO_DATA_TYPES_', '_META_', 'singleton_0', 'singleton_1', 'singleton_2', 'singleton_3', 'singleton_4', 'singleton_5', 'singleton_6', 'singleton_7', 'singleton_8', 'singleton_9', 'key0/nkey0', 'key0/key0_0', 'key0/key0_1', 'key0/key0_2', 'key0/key0_3', 'key0/key0_4', 'key0/key0_5', 'key0/key0_6', 'key0/key0_7', 'key0/key0_8', 'key0/key0_9', 'key0/key0_10', 'key0/key0_11', 'key0/key0_12', 'key0/key0_13', 'key0/key0_14', 'key0/key0_15', 'key0/key0_16', 'key0/key0_17', 'key0/key0_18', 'key0/key0_19', 'key0/key0_20', 'key0/key0_21', 'key0/key0_22', 'key0/key0_23', 'key0/key0_24', 'key0/key0_25', 'key0/key0_26', 'key0/key0_27', 'key0/key0_28', 'key0/key0_29', 'key0/key0_30', 'key0/key0_31', 'key0/key0_32', 'key0/key0_33', 'key0/key0_34', 'key0/key0_35', 'key0/key0_36', 'key0/key0_37', 'key0/key0_38', 'key0/key0_39', 'key0/key0_40', 'key0/key0_41', 'key0/key0_42', 'key0/key0_