# do some timing tests with a large input dictionary of events

In [1]:
import numpy as np
import hepfile as hf
import cProfile

In [2]:
# first construct a HUGE dictionary with a lot of keys and a lot of entries

nKeys = 10 #int(1e6)
nDatasets = 100 # number of datasets in each group
nEvents = 50

lengths = np.array([100, int(1e3), int(1e4), 10])

dictlist = []

for _ in range(nEvents):
    
    d = {}
    
    for i in range(nKeys):
        key = f'key{i}'
        subdict = {}
        # add datasets with random number of values
        num = np.random.choice(lengths)
        for j in range(nDatasets):
            subkey = f'{key}_{j}'
            subdict[subkey] = np.random.randn(num)
        
        # add singletons
        for j in range(10):
            s = f'singleton_{j}'
            d[s] = np.random.randn()
            
        d[key] = subdict
        
    dictlist.append(d)

In [3]:
outfile = 'dict-time-test.h5'
with cProfile.Profile() as p:
    data = hf.dict_tools.dictlike_to_hepfile(dictlist, outfile, write_hepfile=False)
    p.print_stats()

         1082184 function calls (980084 primitive calls) in 10.962 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    51050    0.022    0.000    6.110    0.000 <__array_function__ internals>:177(append)
    51050    0.028    0.000    2.995    0.000 <__array_function__ internals>:177(concatenate)
    51050    0.017    0.000    3.009    0.000 <__array_function__ internals>:177(ravel)
        1    0.000    0.000    0.000    0.000 cProfile.py:41(print_stats)
        1    0.000    0.000    0.000    0.000 cProfile.py:51(create_stats)
     1010    0.000    0.000    0.000    0.000 dict_tools.py:201(_get_dtype)
        1    0.000    0.000   10.961   10.961 dict_tools.py:25(dictlike_to_hepfile)
        1    3.872    3.872   10.961   10.961 dict_tools.py:84(_classic)
    51050    0.005    0.000    0.005    0.000 fromnumeric.py:1751(_ravel_dispatcher)
    51050    0.030    0.000    2.973    0.000 fromnumeric.py:1755(ravel)
    51050  

In [6]:
b = hf.create_single_bucket(data)
np.append(b['singleton_0'], 'foo')
data['singleton_0']

array([-0.96192359, -2.4591357 ,  0.26281371,  0.05946111,  1.82021894,
       -0.11099926,  2.09295916, -0.19002159, -2.06970775, -0.25566582,
       -1.4202846 , -0.45650447, -0.81421407,  0.24448263, -0.85675477,
       -0.80520389,  0.03045792, -0.42823741, -2.90865473,  0.40459474,
       -2.32842869, -2.27280707, -1.12248668,  0.98277495,  0.84029583,
       -0.50844967, -1.73398583, -1.26129256, -0.65769473,  0.81304347,
       -0.27647831, -0.38201542, -0.29904988,  0.06380268,  1.40876373,
       -0.9023871 ,  0.74338229, -0.39209027,  0.01848231, -0.80334902,
        0.63735249,  0.05475002, -1.4807117 ,  1.54655681, -1.78022351,
        1.14018735,  0.67025687,  0.11143643, -0.67829612,  0.82063902])

In [7]:
data.keys()

dict_keys(['_GROUPS_', '_MAP_DATASETS_TO_COUNTERS_', '_LIST_OF_COUNTERS_', '_SINGLETONS_GROUP_/COUNTER', '_MAP_DATASETS_TO_DATA_TYPES_', '_META_', 'singleton_0', 'singleton_1', 'singleton_2', 'singleton_3', 'singleton_4', 'singleton_5', 'singleton_6', 'singleton_7', 'singleton_8', 'singleton_9', 'key0/nkey0', 'key0/key0_0', 'key0/key0_1', 'key0/key0_2', 'key0/key0_3', 'key0/key0_4', 'key0/key0_5', 'key0/key0_6', 'key0/key0_7', 'key0/key0_8', 'key0/key0_9', 'key0/key0_10', 'key0/key0_11', 'key0/key0_12', 'key0/key0_13', 'key0/key0_14', 'key0/key0_15', 'key0/key0_16', 'key0/key0_17', 'key0/key0_18', 'key0/key0_19', 'key0/key0_20', 'key0/key0_21', 'key0/key0_22', 'key0/key0_23', 'key0/key0_24', 'key0/key0_25', 'key0/key0_26', 'key0/key0_27', 'key0/key0_28', 'key0/key0_29', 'key0/key0_30', 'key0/key0_31', 'key0/key0_32', 'key0/key0_33', 'key0/key0_34', 'key0/key0_35', 'key0/key0_36', 'key0/key0_37', 'key0/key0_38', 'key0/key0_39', 'key0/key0_40', 'key0/key0_41', 'key0/key0_42', 'key0/key0_