In [1]:
import h5py
import numpy as np
import pandas as pd 
from pprint import pprint

In [2]:
# data files to import (copy any number of h5 files from lartf servers to your local directory to run)
file1 = "tile-id-24-recursive-pedestal_2022_09_06_16_32_46_CDT.h5"
file2 = "tile-id-35-recursive-pedestal_2022_09_07_15_23_03_CDT.h5"
file_list = [file1, file2]
f1 = h5py.File(file1,'r')
#f2 = h5py.File(file2,'r')
# get relevant dtypes for later use 
configs_dtype = f1['configs'].dtype
packets_dtype = f1['packets'].dtype
f1.close()

In [3]:
# merge input files 
with h5py.File('merged_file.h5', 'a') as f_out: # when testing this script, remember to delete existing "merged_file.h5" form working directory
    # create output datasets
    configs_dset = f_out.create_dataset('configs', (0,), maxshape=(None,), dtype=configs_dtype)
    packets_dset = f_out.create_dataset('packets', (0,), maxshape=(None,), dtype=packets_dtype)
    
    # copy data from each input file to output datasets
    for i,filename in enumerate(file_list):
        with h5py.File(filename, 'a') as f_in:
            
            # copy configs
            nconfigs = len(configs_dset)
            configs_dset.resize((nconfigs+len(f_in['configs']),))
            f_out['configs'][nconfigs:] = f_in['configs']
            
            # copy packets
            npackets = len(packets_dset)
            packets_dset.resize((npackets+len(f_in['packets']),))
            f_out['packets'][npackets:] = f_in['packets']
            
            
f_out.close()

In [4]:
# validate contents of merged file
merged_file = h5py.File('merged_file.h5', 'r')
merged_file.keys()

<KeysViewHDF5 ['configs', 'packets']>

In [5]:
# visualize merged output
merged_configs = np.array(merged_file['configs'])
pprint(merged_configs)

array([(1662499966, 1,  1,  11, [ 16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16, 255,   4, 255, 255, 255, 255, 255, 255, 255, 255,   8,   8,   8,   8,   8,   8,   5,  16, 219,  77,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255, 255, 255, 255, 255, 255, 255, 255,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,  15,   0,  96,   0,   0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,   0,  16,   0,   0,   0,   0,   0,  76,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [6]:
merged_packets_df = pd.DataFrame(np.array(merged_file['packets']))
merged_packets_df

Unnamed: 0,io_group,io_channel,chip_id,packet_type,downstream_marker,parity,valid_parity,channel_id,timestamp,dataword,...,shared_fifo,register_address,register_data,direction,local_fifo_events,shared_fifo_events,counter,fifo_diagnostics_enabled,first_packet,receipt_timestamp
0,1,0,0,4,0,0,0,0,1638833022,0,...,0,0,0,1,0,0,0,0,0,0
1,1,4,86,0,1,1,1,36,440254808,23,...,0,36,86,1,0,0,0,0,1,440262200
2,1,0,0,4,0,0,0,0,1638833022,0,...,0,0,0,1,0,0,0,0,0,0
3,1,3,56,0,1,0,1,36,440254816,25,...,0,36,88,1,0,0,0,0,1,440262208
4,1,0,0,4,0,0,0,0,1638833022,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1668905,1,24,84,0,1,0,1,37,1035484569,22,...,0,101,102,1,0,0,0,0,1,1035491393
1668906,1,0,0,4,0,0,0,0,1638833323,0,...,0,0,0,1,0,0,0,0,0,0
1668907,1,23,54,0,1,1,1,37,1035484581,19,...,0,101,105,1,0,0,0,0,1,1035491405
1668908,1,0,0,4,0,0,0,0,1638833323,0,...,0,0,0,1,0,0,0,0,0,0
