In [10]:
import h5py as h5
import numpy as np
import torch

from root_manager.chunk_generator import ChunkGenerator
from root_manager.settings import ChunkGeneratorConfig, ProcessorConfig, FilterParams

In [2]:
path_to_root = "/net/62/home3/ivkhar/Baikal/data/initial_data/MC_2020/muatm/root/all/10762.root"
h5_file_path = "/net/62/home3/ivkhar/Baikal/data/h5s/baikal_mc2020_multi_split_0924.h5"

In [8]:
proc_cfg = ProcessorConfig(
    center_times= True, 
    calc_tres = False, 
    filter_cfg = FilterParams(
        only_signal = False,
        min_hits = 0,
        min_strings = 0,
        min_Q = 0,
        t_threshold = 100000
    )
)
chunk_generator_cfg=ChunkGeneratorConfig(
            chunk_size = 25,
            processor_params = proc_cfg,
            fields = ['PulsesAmpl', 'PulsesTime', 'Xrel', 'Yrel', 'Zrel', 'num_signal_hits', 'num_signal_strings', 'nu_induced'],
            shuffle_paths = False
        )

gen = ChunkGenerator([path_to_root], chunk_generator_cfg).get_chunks()
df = next(gen)
df_features = df[['PulsesAmpl', 'PulsesTime', 'Xrel', 'Yrel', 'Zrel']]
data = df_features.to_numpy()[:,:5]
for i in range(len(data)):
    data[i] = data[i, :]
    for j in range(len(data[i])):
        data[i, j] = data[i, j].round(3).astype(np.float32)

In [6]:
df['num_signal_hits'].value_counts(sort=True)[0:10]

num_signal_hits,count
i16,u32
3,5207
2,4390
4,4307
5,3191
0,3149
6,2392
7,1765
8,1357
9,1037
10,781


In [7]:
df['num_signal_strings'].value_counts(sort=True)[0:10]

num_signal_strings,count
i8,u32
1,9993
2,9761
3,5074
0,3149
4,2371
5,1328
6,643
7,269
8,92


In [4]:
with h5.File(h5_file_path) as hf:
    print(hf['muatm/raw/labels/part_10762'].keys())
    print(hf['muatm/raw/data/part_10762'].keys())
    labels = hf['muatm/raw/labels/part_10761/data'][:]
    h5ev_starts = hf['muatm/raw/ev_starts/part_10762/data'][:]
    h5data_flat = hf['muatm/raw/data/part_10762/data'][:]

h5data = [[],[],[],[],[]]
for s,e in zip(h5ev_starts[:-1], h5ev_starts[1:]):
    for i in range(len(h5data)):
        h5data[i].append(h5data_flat[s:e,i].round(3))
h5data = np.array(h5data, dtype='O').T

<KeysViewHDF5 ['data']>
<KeysViewHDF5 ['data']>


In [374]:
# Times
h5data[1,1][0:10], data[1,1][0:10]

(array([-2728.46 , -2557.722, -2551.978, -2354.059, -2130.664, -1992.022,
        -1890.892, -1793.888, -1726.919, -1625.827], dtype=float32),
 array([-2728.46 , -2557.721, -2551.977, -2354.058, -2130.664, -1992.022,
        -1890.892, -1793.888, -1726.918, -1625.827], dtype=float32))

In [375]:
# Qs
h5data[1,0][0:10], data[1,0][0:10]

(array([0.63 , 0.819, 0.749, 0.662, 1.234, 1.294, 1.045, 0.699, 1.132,
        0.643], dtype=float32),
 array([0.63 , 0.819, 0.749, 0.662, 1.234, 1.294, 1.045, 0.699, 1.132,
        0.643], dtype=float32))

In [380]:
def my_agg(data):
    value = np.diff(data).sum() + data[0]*1e4 + data[1]*1e6
    return value

arrs_sorted = sorted(data[:,1], key=my_agg)
h5arrs_sorted = sorted(h5data[:,1], key=my_agg)

In [381]:
for i, (arr, h5arr) in enumerate(zip(arrs_sorted, h5arrs_sorted)):
    if len(arr) == len(h5arr) and ((arr-h5arr)**2).sum()>1e-1:
        print(f"{i=}, {arr=}, \n{h5arr=}")
        print(f"{my_agg(arr)=}, \n{my_agg(h5arr)=}")
        print(f"{arr.shape=}, {h5arr.shape=}")
        print(f"{((arr-h5arr)**2).sum()=}")
        continue

i=2175, arr=array([-2630.371, -2598.865, -2396.391, -2308.823, -2247.485, -2197.076,
       -1919.729, -1669.882, -1667.429, -1564.576, -1487.358, -1427.581,
       -1390.628, -1351.047, -1157.54 , -1136.591,  -919.79 ,  -815.798,
        -798.572,  -728.664,  -645.649,  -632.026,  -599.049,  -525.31 ,
        -495.513,  -490.935,  -348.245,  -269.398,  -176.272,  -143.892,
         -36.839,   -17.866,   -16.823,   128.367,   129.689,   178.543,
         192.816,   209.107,   231.153,   282.911,   308.423,   355.39 ,
         362.344,   432.697,   473.945,   482.917,   510.471,   551.416,
         863.865,   873.032,   875.494,   929.326,  1011.585,  1036.494,
        1170.086,  1193.737,  1205.948,  1258.761,  1418.798,  1456.963,
        1488.867,  1501.834,  1535.239,  1551.148,  1558.062,  1896.847,
        2184.203,  2312.502,  2322.261,  2336.764], dtype=float32), 
h5arr=array([-2601.876, -2599.15 , -2421.2  , -2418.695, -2309.146, -2294.953,
       -2293.828, -2280.088, -2212.33

In [398]:
i = 26436
for j in range(i-2,i+2):
    print(arrs_sorted[j][0:10]) 
print('\n')
for j in range(i-2,i+2):
    print(h5arrs_sorted[j][0:10]) 

[-2225.977 -2161.316 -2153.547 -2147.274 -2133.664 -1971.273 -1887.603
 -1779.586 -1725.048 -1597.324]
[-2393.732 -2159.626 -2104.98  -2079.371 -1998.957 -1893.844 -1868.309
 -1755.401 -1741.002 -1736.498]
[-2176.427 -2161.798 -2034.277 -2011.026 -1986.196 -1867.129 -1852.109
 -1760.923 -1753.116 -1739.292]
[-2402.808 -2159.516 -2125.385 -2116.757 -1995.362 -1935.743 -1824.303
 -1819.113 -1814.608 -1787.646]


[-2225.977 -2161.315 -2153.547 -2147.273 -2133.664 -1971.273 -1887.603
 -1779.586 -1725.048 -1597.324]
[-2176.427 -2161.798 -2034.277 -2011.026 -1986.196 -1867.129 -1852.109
 -1760.923 -1753.116 -1739.292]
[-2393.732 -2159.625 -2104.979 -2079.37  -1998.957 -1893.844 -1868.309
 -1755.401 -1741.002 -1736.498]
[-2402.808 -2159.516 -2125.385 -2116.757 -1995.362 -1935.743 -1824.303
 -1819.113 -1814.608 -1787.646]


In [331]:
a = np.diff(arrs_sorted[216][1])
a.sort()
a

ValueError: diff requires input that is at least one dimensional

In [None]:
a = np.diff(arrs_sorted[216, 1])
a.sort()
a

array([  0.5910034,   0.8789673,   1.0019836,   1.1629639,   2.3130493,
         2.3510132,   2.95401  ,   3.4079895,   4.1779785,   4.8720703,
         5.083008 ,   9.335022 ,  11.273926 ,  11.653992 ,  13.247925 ,
        15.374001 ,  15.73584  ,  15.984009 ,  19.317993 ,  22.425999 ,
        23.002197 ,  26.475002 ,  26.87207  ,  27.608994 ,  27.935059 ,
        31.291992 ,  32.484985 ,  33.572    ,  35.866943 ,  36.264893 ,
        36.458008 ,  36.78296  ,  41.02002  ,  41.833984 ,  42.659058 ,
        43.593018 ,  44.25598  ,  44.50415  ,  54.20105  ,  54.867004 ,
        56.682983 ,  58.386017 ,  61.830017 ,  65.63701  ,  72.32202  ,
        73.01404  ,  94.808    ,  95.19214  ,  95.52307  ,  97.47693  ,
        99.25806  ,  99.301025 , 103.37201  , 117.12097  , 124.645996 ,
       139.20404  , 140.97894  , 142.85693  , 144.11899  , 148.71103  ,
       149.29993  , 154.20288  , 159.59302  , 181.36804  , 188.32898  ,
       194.76196  , 225.10901  , 245.63904  , 273.69598  , 280.2

In [312]:
arr**2

array([3882.5364  , 1767.2776  ,   14.0625  ,   10.969344, 1767.2776  ,
       2371.3977  ,   14.0625  ,   14.0625  ,   10.969344, 1767.2776  ,
         10.969344, 3873.444   ,   10.969344,   14.0625  ,   14.0625  ,
       2371.3977  ,   10.969344,  322.38202 , 3882.5364  ,   10.969344,
         10.969344, 1767.2776  , 3873.444   , 1767.2776  , 1767.2776  ,
       3873.444   ,   14.0625  , 1767.2776  , 3873.444   ,   14.0625  ],
      dtype=float32)

In [None]:
for arr in data['PulsesAmpl'].to_numpy()[0:100]:
    isin = False
    for h5arr in np.array(h5data, dtype='O'):
        if np.array_equal(arr.round(2), h5arr.round(1)):
            isin = True
            break
    if not isin:
        print(f"Not found {arr=}")
        break

if isin: print("Ok")

Ok


In [158]:
arrs_sorted[0]

array([0.45707488, 0.54925996, 1.1084076 , 1.0147641 , 1.3423243 ,
       0.45414194, 1.3642868 , 0.1923474 , 1.2159913 , 0.9707401 ,
       0.46262902, 1.3109553 , 0.46508235, 0.3268888 , 1.2114706 ,
       0.68017316, 0.47373992, 4.4352856 , 2.4288769 , 0.8066567 ,
       0.77942866, 0.0568226 , 0.92032105, 0.01334081, 1.1223383 ,
       0.7534976 , 1.1192905 , 1.0429873 , 0.5466738 , 0.9732134 ,
       0.2414653 , 0.8178828 , 0.4950653 , 0.9499361 , 0.57954675,
       0.7830459 ], dtype=float32)

In [None]:
h5arrs_sorted = sorted(, key=np.sum)
h5arrs_sorted[0]

array([0.45707488, 0.54925996, 1.1084076 , 1.0147641 , 1.3423243 ,
       0.45414194, 1.3642868 , 0.1923474 , 1.2159913 , 0.9707401 ,
       0.46262902, 1.3109553 , 0.46508235, 0.3268888 , 1.2114706 ,
       0.68017316, 0.47373992, 4.4352856 , 2.4288769 , 0.8066567 ,
       0.77942866, 0.0568226 , 0.92032105, 0.01334081, 1.1223383 ,
       0.7534976 , 1.1192905 , 1.0429873 , 0.5466738 , 0.9732134 ,
       0.2414653 , 0.8178828 , 0.4950653 , 0.9499361 , 0.57954675,
       0.7830459 ], dtype=float32)

In [159]:
np.array_equal(arrs_sorted, h5arrs_sorted)

False