In [3]:
import sys
from sys import byteorder
import warnings
import numpy as np
import h5py
import tables as tb
import pandas as pd

sys.path.append("/home/e78368jw/Documents/MULE/")
import packs.proc.processing_utils as procutil

def flatten(xss):
    return [x for xs in xss for x in xs]



def generate_rwf_header(samples):
### currently stored up here, move later
    return np.dtype([
            ('event_number', np.uint32), 
            ('channels', np.int32),
            ('rwf', np.float32, (samples,))
        ])


def read_defaults_WD2(file, byte_order):
    '''
    Provided with an open file (at the header), will provide the relevant information required
    '''

    event_number    = int.from_bytes(file.read(4), byteorder=byte_order)
    timestamp       = int.from_bytes(file.read(8), byteorder=byte_order)
    samples         = int.from_bytes(file.read(4), byteorder=byte_order)
    sampling_period = int.from_bytes(file.read(8), byteorder=byte_order)

    return (event_number, timestamp, samples, sampling_period)


def procssess_header(file_path, byte_order = None):
    '''
    Collect the relevant information from the file's header, and determine if its valid 

    Header is formatted for WD2 as shown:
        Event number    -> uint32 (4 bytes)
        Timestamp       -> uint64 (8 bytes)
        Samples         -> uint32 (4 bytes)
        Sampling Period -> uint64 (8 bytes)
        (OPTIONAL)
        Channels        -> int32 (8 bytes)
    
    Waveform data is 4-byte float (float32).

    This extra optional channel poses problems, so need to consider it.
    The rest are all as expected.

    Parameters
    ----------

    Returns
    -------
    wdtype - The data type format required for collecting the data from the binary
    '''

    # ensure you're using the right byteorder. If you take the data from one machine to another
    # of differing endianness, you may have issues here!
    if byte_order == None:
        warnings.warn("No byte order provided. This may cause issues if transferring data between machines.")
        byte_order = byteorder
    elif (byte_order != 'little') and (byte_order != 'big'):
        raise Exception(f'Invalid byte order provided: {byteorder}. Please provide the correct byte order for your machine.')
    # open file
    file = open(file_path, 'rb')

    event_number, timestamp, samples, sampling_period = read_defaults_WD2(file, byte_order)
    # attempt to read channels
    channels        = int.from_bytes(file.read(4), byteorder=byte_order)

    # then read in a full collection of data, and see if the following header makes sense.
    dataset         = file.read(4*samples*channels)

    # reread it all in and validate the results
    event_number_1, timestamp_1, samples_1, sampling_period_1 = read_defaults_WD2(file, byte_order)

    # check that event header is as expected
    if (event_number_1 -1 == event_number) and (samples_1 == samples) and sampling_period_1 == (sampling_period):
        print(f"{channels} channels detected. Processing accordingly...")

        # generate data type
        wdtype = np.dtype([
            ('event_number', np.uint32), 
            ('timestamp', np.uint64), 
            ('samples', np.uint32), 
            ('sampling_period', np.uint64), 
            ('channels', np.int32),
            ] + 
            [(f'chan_{i+1}', np.float32, (samples,)) for i in range(0,channels)]
        )

        file.close()
        return wdtype, event_number, timestamp, samples, sampling_period, channels
    else:
        print(f"Single channel detected. If you're expecting more channels, something has gone wrong.\nProcessing accordingly...")
        channels = 1

            
         # generate data type
        wdtype = np.dtype([
            ('event_number', np.uint32), 
            ('timestamp', np.uint64), 
            ('samples', np.uint32), 
            ('sampling_period', np.uint64),
            ('chan_1', np.float32, (samples,))
        ])   
            
        file.close()
        return wdtype, event_number, timestamp, samples, sampling_period, channels

def binary_to_h5(file_path, wdtype, save_path, channels, samples):
    '''
    Function that uses the provided datatype from the header, creates the h5 dataframe and saves the binary
    '''
    # opens file
    with open(file_path, 'rb') as file:
        data = np.fromfile(file, dtype=wdtype)
    


    # separates the waveforms from the rest of the data
    # For many channels, formatted as:
    #      waveforms[0]     <- 0th event
    #      waveforms[0][0]  <- 0th event, 0th channel
    #      waveforms[0][1]  <- 0th event, 1st channel
    #
    # For singular channels: 
    #      waveforms[0]     <- 0th event
    #      waveforms[0][0]  <- 0th event, 0th channel
    #      waveforms[0][1]  <- IndexError: list index out of range
    
    # remove data component of dtype for event_information table
    e_dtype = np.dtype(wdtype.descr[:-channels])
    print(f'e_dtype:\n{e_dtype}')
    # if only one channel, select relevant information. Otherwise, split event by channel
    if channels == 1:
        event_information = [list(data[i])[:4] for i in range(len(data))]
        waveform = [[(data[j][0], 0, list(data[j])[-i:][0]) for i in reversed(range(1, channels+1))] for j in range(len(data))]
    else:
        event_information = [list(data[i])[:5] for i in range(len(data))]
        waveform = [[(data[j][0], data[j][4] - i, list(data[j])[-i:][0]) for i in reversed(range(1, channels+1))] for j in range(len(data))]

    # convert to list of tuples and then structured numpy array
    event_information = list(map(tuple, event_information))
    event_information = np.array(event_information, dtype = e_dtype)
    flat_rwf = np.array(flatten(waveform), dtype = generate_rwf_header(samples))
    # write event information
    with h5py.File(save_path, 'w') as h5f:
        
        h5f.create_dataset('event_info', data=event_information)
        # write waveforms
        h5f.create_dataset('raw_wf', data=flat_rwf)

In [4]:
path = '/home/e78368jw/Downloads/tmp_out/quick_test/lol.bin'
x = procutil.process_header(path)
print(x)

Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...


RuntimeError: Unable to decode raw waveforms that have sample size zero. In wavedump 2, when collecting data from a single channel make sure that 'multiple channels per file' isn't checked.

In [5]:
print(x)

NameError: name 'x' is not defined

In [6]:
path2 = '/home/e78368jw/Documents/MULE/packs/tests/data/one_channel_WD2.bin'
x2 = procutil.process_header(path2)


Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...


In [7]:
print(x2)

(dtype([('event_number', '<u4'), ('timestamp', '<u8'), ('samples', '<u4'), ('sampling_period', '<u8'), ('chan_1', '<f4', (12500,))]), 12500, 8, 1)


In [8]:
print(sys.byteorder)

little


In [9]:
try:
    file = open(path, 'rb')
    y = procutil.read_defaults_WD2(file, 'little')
    print(y)
finally:
    file.close()

(0, 67355, 0, 8)


In [10]:
try:
    file = open(path2, 'rb')
    y = procutil.read_defaults_WD2(file, 'little')
    print(y)
finally:
    file.close()

(0, 94024, 12500, 8)


In [11]:
file = open(path, 'rb')

In [12]:
print(int.from_bytes(file.read(4), byteorder='little')) # event number
print(int.from_bytes(file.read(8), byteorder='little')) # timestamp
print(file.read(4))                                     # samples
print(int.from_bytes(file.read(8), byteorder='little')) # sample size

0
67355
b'\x00\x00\x00\x00'
8


In [13]:
print(int.from_bytes(file.read(4), byteorder='little')) # channels

1


In [14]:
dataset         = file.read(4*12500*1)

In [15]:
file.close()

In [16]:
binary_to_h5(path, x[0], 'tmp_file.h5', 1, 67355)

NameError: name 'x' is not defined

In [17]:

dtype1, _, _, samples1, _, channels1 = process_header('test_data.bin')

binary_to_h5('test_data.bin', dtype1, 'output_data.h5', channels1, samples1)



dtype2, _, _, samples2, _, channels2 = process_header('three_channels.bin')
binary_to_h5('three_channels.bin', dtype2, 'output_data_three_bins.h5', channels2, samples2)


NameError: name 'process_header' is not defined

In [18]:
#print(dtype1)
#print(dtype2)

with h5py.File('data_test', 'w') as h5f:
    data = h5f.create_group("data")
    data.create_dataset('test_bay_1', data = [0, 0, 0, 0])
    data.create_dataset('test_bay_2', data = [1, 1, 1, 1])

In [19]:
with h5py.File('data_test', 'a') as h5f:
    evt_info = h5f.require_group('data')

    evt_info.create_dataset('magical_shite_1', data = [3,3,3,3])

### extracting the columns and data again

In [20]:
with h5py.File('output_data_three_bins.h5', 'r') as h5file:
    data = h5file['raw_wf'][:]
    columns = h5file['raw_wf'].dtype.names
print(data) 
print("===================")
print(data[0])

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'output_data_three_bins.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [21]:
x = pd.DataFrame(data.tolist(), columns = columns)

AttributeError: 'Group' object has no attribute 'tolist'

In [22]:
import numpy as np

In [23]:
# generate data type
wdtype = np.dtype([
    ('event_number', np.uint32), 
    ('timestamp', np.uint64), 
    ('samples', np.uint32), 
    ('sampling_period', np.uint64),
    ('chan_1', np.float32, (2500,))
])  

In [24]:
print(type(wdtype))
print(isinstance(wdtype, np.dtype))

<class 'numpy.dtypes.VoidDType'>
True


## trying with the generalised function

In [25]:
import sys, os
import pandas as pd
sys.path.append("/home/e78368jw/Documents/MULE/")
from packs.proc import processing_utils as proc
from packs.core import io as m_io
from packs.types.types             import generate_wfdtype
from packs.types.types             import rwf_type
from packs.types.types             import event_info_type

In [26]:

MULE_dir = '../MULE'
file_path  = MULE_dir + '/packs/tests/data/three_channels_WD2.bin'    

# collect relevant data from output
check_file = MULE_dir + '/packs/tests/data/three_channels_WD2.h5'
check_data = m_io.load_rwf_info(check_file, 1000)

channels = 3
samples = 1000
wdtype = generate_wfdtype(channels, samples)
with open(file_path, 'rb') as file:
    # read in data
    data = proc.read_binary(file, wdtype)

event_info, rwf = proc.format_wfs(data, wdtype, samples, channels)
# check rwf info matches expected


In [27]:
display(pd.DataFrame(list(map(list, rwf)), columns = rwf_type(samples).names))
display(pd.DataFrame(list(map(list, event_info)), columns = event_info_type.names))

Unnamed: 0,event_number,channels,rwf
0,0,0,"[0.0, -0.384, 0.192, -0.384, -0.256, -0.512, -..."
1,0,1,"[-0.256, 0.448, 0.32, 0.128, -0.64, -0.32, -0...."
2,0,2,"[0.512, 0.128, 0.512, 0.128, 0.576, 0.448, 0.0..."
3,1,0,"[-0.128, -0.128, -0.128, 0.448, 0.32, 0.32, 0...."
4,1,1,"[-0.064, -0.448, -0.384, -0.704, -0.576, -0.12..."
...,...,...,...
241,80,1,"[-0.064, -0.384, -0.64, -0.256, -0.256, -0.384..."
242,80,2,"[0.32, 0.064, 0.064, 0.576, 0.064, 0.256, -0.3..."
243,81,0,"[0.192, 0.256, 0.256, 0.256, 0.192, 0.256, -0...."
244,81,1,"[-0.256, -0.384, -0.064, -0.256, -0.256, -0.38..."


Unnamed: 0,event_number,timestamp,samples,sampling_period,channels
0,0,1998268,1000,8,3
1,1,67506435,1000,8,3
2,2,132498360,1000,8,3
3,3,195997406,1000,8,3
4,4,257998463,1000,8,3
...,...,...,...,...,...
77,77,4820499947,1000,8,3
78,78,4882990612,1000,8,3
79,79,4945989984,1000,8,3
80,80,5007490783,1000,8,3


In [25]:
print(type(check_data))
display(check_data)


<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,event_number,channels,rwf
0,0,0,"[0.0, -0.384, 0.192, -0.384, -0.256, -0.512, -..."
1,0,1,"[-0.256, 0.448, 0.32, 0.128, -0.64, -0.32, -0...."
2,0,2,"[0.512, 0.128, 0.512, 0.128, 0.576, 0.448, 0.0..."
3,1,0,"[-0.128, -0.128, -0.128, 0.448, 0.32, 0.32, 0...."
4,1,1,"[-0.064, -0.448, -0.384, -0.704, -0.576, -0.12..."
...,...,...,...
241,80,1,"[-0.064, -0.384, -0.64, -0.256, -0.256, -0.384..."
242,80,2,"[0.32, 0.064, 0.064, 0.576, 0.064, 0.256, -0.3..."
243,81,0,"[0.192, 0.256, 0.256, 0.256, 0.192, 0.256, -0...."
244,81,1,"[-0.256, -0.384, -0.064, -0.256, -0.256, -0.38..."


In [14]:
print(type(rwf))
display(rwf)

<class 'numpy.ndarray'>


array([( 0, 0, [ 0.   , -0.384,  0.192, -0.384, -0.256, -0.512, -0.064, -0.32 ,  0.064, -0.128,  0.128,  0.512,  0.128, -0.064,  0.128,  0.   ,  0.192, -0.192,  0.   , -0.32 ,  0.256, -0.32 ,  0.256, -0.064,  0.   ,  0.192, -0.128, -0.128,  0.192, -0.128,  0.512,  0.32 ,  0.256,  0.   ,  0.32 , -0.128,  0.192, -0.128,  0.192, -0.128, -0.064, -0.064,  0.   , -0.448, -0.192, -0.128,  0.256, -0.32 ,  0.   , -0.576, -0.256, -0.256,  0.32 , -0.128,  0.   ,  0.064,  0.384, -0.192, -0.064,  0.064,  0.512,  0.192,  0.256,  0.32 ,  0.32 , -0.064,  0.32 , -0.128, -0.064, -0.512, -0.192, -0.256,  0.   , -0.32 , -0.256, -0.384, -0.128, -0.064,  0.   ,  0.   ,  0.128, -0.128,  0.256, -0.064, -0.064, -0.192,  0.512, -0.064,  0.256,  0.32 ,  0.128,  0.   ,  0.192,  0.   , -0.128, -0.576, -0.192, -0.384,  0.   ,  0.   ,  0.128, -0.128,  0.64 , -0.064,  0.256,  0.192,  0.128, -0.512, -0.512, -0.704,  0.   , -0.128,  0.32 ,  0.192,  0.192, -0.064,  0.256, -0.256,  0.256, -0.064,  0.192, -0.256,  0.064, 

In [18]:
from packs.core import io as ioioio


In [20]:
x = ioioio.load_rwf_info('new_location.h5', samples = 1000)

In [27]:
display(x)
print(x[x.event_number == 0].to_numpy())
print(type(x[x.event_number == 0].to_numpy()))

Unnamed: 0,event_number,channels,rwf
0,0,0,"[0.0, -0.576, -0.384, -0.64, -0.384, -0.384, -..."
1,1,0,"[-0.512, -0.768, -0.192, -0.256, -0.064, -0.44..."
2,2,0,"[-0.256, -0.384, -0.128, 0.0, 0.064, 0.064, 0...."
3,3,0,"[-0.768, -0.768, -0.576, -0.256, -0.32, -0.704..."
4,4,0,"[-0.832, -0.64, -0.704, -0.64, -0.448, -0.32, ..."
...,...,...,...
95,116,0,"[-0.576, -0.256, -0.384, -0.064, -0.64, -0.512..."
96,117,0,"[0.128, -0.192, 0.128, -0.64, -0.256, -0.192, ..."
97,118,0,"[0.32, 0.384, 0.32, -0.384, -0.384, -0.512, -0..."
98,119,0,"[-0.064, -0.384, -0.576, -0.64, -0.448, 0.064,..."


[[0 0
  array([ 0.   , -0.576, -0.384, ..., -0.384, -0.32 , -0.256], dtype=float32)]]
<class 'numpy.ndarray'>


In [2]:

dtype1, _, _, samples1, _, channels1 = process_header('test_data.bin')

binary_to_h5('test_data.bin', dtype1, 'output_data.h5', channels1, samples1)



dtype2, _, _, samples2, _, channels2 = process_header('three_channels.bin')
binary_to_h5('three_channels.bin', dtype2, 'output_data_three_bins.h5', channels2, samples2)


NameError: name 'process_header' is not defined

In [28]:
proc.process_bin_WD2('three_channels.bin', 'three_channels_chunk.h5', counts = 10, overwrite = True)

three_channels_chunk.h5
3 channels detected. Processing accordingly...
Chunking by 10...
Processing Finished!




True

In [4]:
import os

filename, extension = os.path.splitext(x)
print(filename, extension)

NameError: name 'x' is not defined

In [5]:
import h5py

In [2]:
import configparser

In [25]:
config = configparser.ConfigParser()

config.read('/home/e78368jw/Documents/MULE/packs/configs/process_WD2_3channel.conf')

#ch0 = config.get('required','optional')
#print(ch0)

['/home/e78368jw/Documents/MULE/packs/configs/process_WD2_3channel.conf']

In [17]:
config.sections()

['required', 'optional']

In [21]:
for key in config['required']:
    print(key)
    print(config['required'][key])

wavedump_edition
2
file_path
'/path/to/file.bin'
save_path
'/path/to/file.h5'


In [28]:
x = dict(config.items('required'))
x['process']

"'decode'"

In [29]:
x = {}

for section in config.sections():
    for key in config[section]:
        x[key] = config[section][key]
        #print(q, type(q))
        #print(q.isdigit())
print(x)

{'process': "'decode'", 'wavedump_edition': '2', 'file_path': "'/path/to/file.bin'", 'save_path': "'/path/to/file.h5'", 'overwrite': 'True', 'counts': '-1'}


In [8]:
x = 'file_name23.h5'
new_x = x.split('.')
print(new_x[0])
print(new_x[0][:-2])

file_name23
file_name


In [2]:
proc.process_bin_WD2('three_channels.bin', 'new_location.h5', overwrite = False)



new_location21.h5
3 channels detected. Processing accordingly...
No chunking selected...




In [8]:
evt_total = []
rwf_total = []
with h5py.File('three_channels_WD2.h5') as f:
    evt_info = f.get('event_information')
    for i in evt_info.keys():
                    q = evt_info.get(str(i))
                    for j in q:
                        evt_total.append(j)
    rwf_info = f.get('rwf')
    for i in rwf_info.keys():
                    q = rwf_info.get(str(i))
                    for j in q:
                        rwf_total.append(j)

In [12]:
print(h5_data)
print(rwf_total[2])

[(0, 1998268, 1000, 8, 3), (1, 67506435, 1000, 8, 3), (2, 132498360, 1000, 8, 3), (3, 195997406, 1000, 8, 3), (4, 257998463, 1000, 8, 3), (5, 320505979, 1000, 8, 3), (6, 383497026, 1000, 8, 3), (7, 445497098, 1000, 8, 3), (8, 507496928, 1000, 8, 3), (9, 570497289, 1000, 8, 3), (10, 632997863, 1000, 8, 3), (11, 695497200, 1000, 8, 3), (12, 758496152, 1000, 8, 3), (13, 820997020, 1000, 8, 3), (14, 882496907, 1000, 8, 3), (15, 945995966, 1000, 8, 3), (16, 1007999656, 1000, 8, 3), (17, 1070504581, 1000, 8, 3), (18, 1132996098, 1000, 8, 3), (19, 1195996239, 1000, 8, 3), (20, 1257496495, 1000, 8, 3), (21, 1320996727, 1000, 8, 3), (22, 1382496872, 1000, 8, 3), (23, 1445496323, 1000, 8, 3), (24, 1507995753, 1000, 8, 3), (25, 1570996597, 1000, 8, 3), (26, 1632495785, 1000, 8, 3), (27, 1695994820, 1000, 8, 3), (28, 1757495935, 1000, 8, 3), (29, 1820507051, 1000, 8, 3), (30, 1882995585, 1000, 8, 3), (31, 1945994537, 1000, 8, 3), (32, 2007497118, 1000, 8, 3), (33, 2070995360, 1000, 8, 3), (34, 213

In [16]:
import

<class 'list'>


In [2]:
proc.process_bin_WD2('test_data.bin', 'new_location.h5', overwrite = False)
proc.process_bin_WD2('three_channels.bin', 'three_channels_gen.h5', overwrite = True)

proc.process_bin_WD2('test_data.bin', 'new_location_no_chunk.h5', overwrite = True)
proc.process_bin_WD2('three_channels.bin', 'three_channels_gen_no_chunk.h5', overwrite = True)

new_location_3.h5
Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...
No chunking selected...
three_channels_gen.h5
3 channels detected. Processing accordingly...
No chunking selected...
new_location_no_chunk.h5
Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...
No chunking selected...
three_channels_gen_no_chunk.h5
3 channels detected. Processing accordingly...
No chunking selected...




In [6]:
spath = list('test_data_gen.h5')
spath.insert(1, 'l')
new_path = ''.join(spath)
print(new_path)

tlest_data_gen.h5


In [14]:
test_ = 'data_lol.h5'
test_ = test_.split('.')
print(test_)
new_test = test_[0] + '_1.' + test_[1]
print(test_)
print(new_test)

['data_lol', 'h5']
['data_lol', 'h5']
data_lol_1.h5


In [2]:
proc.process_bin_WD2('three_channels.bin', 'three_channels.h5', counts = 5, overwrite = False)
proc.process_bin_WD2('test_data.bin', 'test_data_gen.h5', counts = 5, overwrite = False)
proc.process_bin_WD2('test_data.bin', 'test_data_gen.h5', counts = 5, overwrite = False)
proc.process_bin_WD2('test_data.bin', 'test_data_gen.h5', counts = 5, overwrite = False)



three_channels.h5
3 channels detected. Processing accordingly...
Chunking by 5...
Processing Finished!
test_data_gen_0.h5
Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...
Chunking by 5...


Processing Finished!
test_data_gen_1.h5
Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...
Chunking by 5...
Processing Finished!
test_data_gen_2.h5
Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...
Chunking by 5...
Processing Finished!


True

In [4]:
proc.process_bin_WD2('three_channels.bin', 'three_channel.h5', counts = 5, overwrite = False)

three_channel.h5
3 channels detected. Processing accordingly...
Chunking by 5...
Processing Finished!


True

In [2]:
proc.generic_collection_function('test_data.bin', 'test_data_gen.h5', counts = 3)




test_data_gen_1.h5
test_data_gen_1_2.h5
test_data_gen_1_2_3.h5
test_data_gen_1_2_3_4.h5
test_data_gen_1_2_3_4_5.h5
test_data_gen_1_2_3_4_5_6.h5
test_data_gen_1_2_3_4_5_6_7.h5
test_data_gen_1_2_3_4_5_6_7_8.h5
test_data_gen_1_2_3_4_5_6_7_8_9.h5
test_data_gen_1_2_3_4_5_6_7_8_9_10.h5
test_data_gen_1_2_3_4_5_6_7_8_9_10_11.h5
Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...
Chunking by 3...
0
0, 12500, 1, 26
0
[(0,  94024, 12500, 8, [ 0.   , -0.576, -0.384, ..., -0.384, -0.32 , -0.256])
 (1, 219046, 12500, 8, [-0.512, -0.768, -0.192, ..., -0.704, -0.384, -0.512])
 (2, 343935, 12500, 8, [-0.256, -0.384, -0.128, ..., -0.064, -0.384, -0.32 ])]
3
3, 12500, 1, 26
150078
[( 7, 2251800632885248, 0, 11215862223402827776, [ 1.19632707e+32,  7.56506197e-28, -1.52550846e+14, ..., -2.75858752e-22,  4.35775796e-40,  1.58758257e-29])
 ( 9, 2251800632885248, 0, 15495407839680397312, [ 2.06705692e-04, -1.52553816e+14,  5.61318938e+19, ...,  7.56

In [5]:
import h5py
import pandas as pd
with h5py.File("test_data_gen.h5") as f:
    print(f.keys())  # works like a dict
    gro = f.get('rwf')
    print(gro.keys())

    for i in gro.keys():
        print(i)
        wf = gro.get(str(i))
        print(wf)
        for j in wf:
            print(j)
    #wf = gro.get('raw_wf_0')
    
    #print(wf[3])

<KeysViewHDF5 ['event_information', 'rwf']>
<KeysViewHDF5 ['rwf_0', 'rwf_10', 'rwf_15', 'rwf_20', 'rwf_25', 'rwf_30', 'rwf_35', 'rwf_40', 'rwf_45', 'rwf_5', 'rwf_50', 'rwf_55', 'rwf_60', 'rwf_65', 'rwf_70', 'rwf_75', 'rwf_80', 'rwf_85', 'rwf_90', 'rwf_95']>
rwf_0
<HDF5 dataset "rwf_0": shape (5,), type "|V50008">
(0, 0, [ 0.   , -0.576, -0.384, ..., -0.384, -0.32 , -0.256])
(1, 0, [-0.512, -0.768, -0.192, ..., -0.704, -0.384, -0.512])
(2, 0, [-0.256, -0.384, -0.128, ..., -0.064, -0.384, -0.32 ])
(3, 0, [-0.768, -0.768, -0.576, ..., -0.448, -0.576, -0.384])
(4, 0, [-0.832, -0.64 , -0.704, ..., -0.256, -0.192, -0.128])
rwf_10
<HDF5 dataset "rwf_10": shape (5,), type "|V50008">
(11, 0, [-0.576, -0.768, -0.576, ..., -0.192,  0.   , -0.192])
(12, 0, [-0.704, -0.512, -0.256, ..., -0.576, -0.896, -0.64 ])
(13, 0, [-0.704, -0.512, -0.704, ..., -0.32 , -0.384, -0.896])
(14, 0, [-0.64 , -0.96 , -0.704, ..., -0.256, -0.704, -0.448])
(15, 0, [-0.576, -0.576, -0.192, ..., -0.192, -0.256, -0.768])
r

In [4]:
x = 'lololol_1'
print(x[:-2])

lololol


In [14]:
with h5py.File("test_data_gen_no_chunk.h5") as f:
    print(f.keys())  # works like a dict
    gro = f.get('event_information')
    print(gro.keys())
    print(len(gro.keys()))

print(gro)

<KeysViewHDF5 ['event_information', 'rwf']>
<KeysViewHDF5 ['ei_-1']>
1
<Closed HDF5 group>


In [2]:
import sys
sys.path.append("/home/e78368jw/Documents/MULE/")
import h5py
from packs.core import io as io
import pandas as pd
from packs.types import types

In [17]:
x = io.load_h5('three_channels_gen.h5')

In [18]:
print(x)
print(list(map(list, x)))

[]
[]


In [26]:
h5_data = []
with h5py.File('test_data_gen_3.h5') as f:
        # extract event info
        evt_info = f.get('event_information')
        print(evt_info)
        # if data has been chunked, treat differently
        if len(evt_info.keys()) > 1:
                for i in evt_info.keys():
                    q = evt_info.get(str(i))
                    for j in q:
                        h5_data.append(j)
print(h5_data)
print(pd.DataFrame(list(map(list, h5_data)), columns = (types.event_info_type).names))

<HDF5 group "/event_information" (20 members)>
[(0, 94024, 12500, 8, 1), (1, 219046, 12500, 8, 1), (2, 343935, 12500, 8, 1), (3, 468850, 12500, 8, 1), (4, 593825, 12500, 8, 1), (11, 1343944, 12500, 8, 1), (12, 1468945, 12500, 8, 1), (13, 1593840, 12500, 8, 1), (14, 1719034, 12500, 8, 1), (15, 1843898, 12500, 8, 1), (16, 1968927, 12500, 8, 1), (17, 2094020, 12500, 8, 1), (20, 2218934, 12500, 8, 1), (22, 2344008, 12500, 8, 1), (23, 2468883, 12500, 8, 1), (25, 2593823, 12500, 8, 1), (26, 2718892, 12500, 8, 1), (28, 2843896, 12500, 8, 1), (30, 2968919, 12500, 8, 1), (32, 3093950, 12500, 8, 1), (34, 3218919, 12500, 8, 1), (35, 3343866, 12500, 8, 1), (37, 3468964, 12500, 8, 1), (38, 3594030, 12500, 8, 1), (39, 3718969, 12500, 8, 1), (41, 3843983, 12500, 8, 1), (43, 3968803, 12500, 8, 1), (46, 4093964, 12500, 8, 1), (48, 4218995, 12500, 8, 1), (50, 4343970, 12500, 8, 1), (52, 4468897, 12500, 8, 1), (53, 4593937, 12500, 8, 1), (55, 4718981, 12500, 8, 1), (56, 4843837, 12500, 8, 1), (57, 496895

In [20]:
print(types.event_info_type)

AttributeError: module 'packs.types' has no attribute 'event_info_type'

In [10]:
import pandas as pd

display(pd.DataFrame(list(map(list, x))))

Unnamed: 0,0,1,2,3,4
0,0,1998268,1000,8,3
1,1,67506435,1000,8,3
2,2,132498360,1000,8,3
3,3,195997406,1000,8,3
4,4,257998463,1000,8,3
...,...,...,...,...,...
77,77,4820499947,1000,8,3
78,78,4882990612,1000,8,3
79,79,4945989984,1000,8,3
80,80,5007490783,1000,8,3


In [None]:
with h5py.File('three_channels_chunk.h5'):
    wf = f.get('wf')


In [None]:
# MOVE WDTYPE FOR EVENT_INFORMATION TO TYPES, PERHAPS FOR DATA INCLUDE IT AS A FUNCTION THAT PROVIDES
# SAMPLING INFO AS WELL!!

### read in data, using read_binary, play with `format_wfs` to see if it functions nicely with inserted channels

In [1]:
import sys
sys.path.append("/home/e78368jw/Documents/MULE/")
from packs.proc import processing_utils as proc
from packs.types import types

In [54]:
def flatten(xss):
    '''
    Flattens a 2D list
    eg: [[0,1,2,3], [4,5,6,7]] -> [0,1,2,3,4,5,6,7]
    '''
    return [x for xs in xss for x in xs]


def generate_rwf_type(samples  :  int):
    """
    Generates the data-type for raw waveforms 

    Parameters
    ----------

        samples  (int)  :  Number of samples per waveform

    Returns
    -------

        (ndtype)  :  Desired data type for processing


    """
    return np.dtype([
            ('event_number', np.uint32), 
            ('channels', np.int32),
            ('rwf', np.float32, (samples,))
        ])


def format_wfs(data      :  np.ndarray,
               wdtype    :  np.dtype, 
               samples   :  int, 
               channels  :  int):
    '''
    Formats the data for saving purposes.

    Parameters
    ----------

        data      (ndarray)  :  Unformatted data from binary file
        wdtype    (ndtype)   :  Custom data type for extracting information from
                                binary files
        samples   (int)      :  Number of samples in each waveform list
        channels  (int)      :  The first event number in the file (generally)

    Returns
    -------
        event_information (ndarray)  :  Reformatted event information
        waveform          (ndarray)  :  Reformatted waveforms

    '''
    # remove data component of dtype for event_information table
    e_dtype = types.event_info_type
    # if only one channel, select relevant information. Otherwise, split event by channel
    if channels == 1:
        event_information = [list(data[i])[:4] for i in range(len(data))]
        # add channel: 1 for each row
        [x.append(1) for x in event_information]
        waveform = [[(data[j][0], 0, list(data[j])[-i:][0]) for i in reversed(range(1, channels+1))] for j in range(len(data))]
    else:
        event_information = [list(data[i])[:5] for i in range(len(data))]
        waveform = [[(data[j][0], data[j][4] - i, list(data[j])[-i:][0]) for i in reversed(range(1, channels+1))] for j in range(len(data))]

    # convert to list of tuples and then structured numpy array
    event_information = list(map(tuple, event_information))
    event_information = np.array(event_information, dtype = e_dtype)
    waveform = np.array(flatten(waveform), dtype = generate_rwf_type(samples))

    return event_information, waveform


In [55]:
wdtype, samples, sampling_period, channels = proc.process_header('test_data.bin')
print(wdtype, samples, sampling_period, channels)
with open('test_data.bin', 'rb') as file:
    data = proc.read_binary(file, wdtype)
    
event_info, rwf = format_wfs(data, wdtype, samples, channels)

print(event_info)
print(type(event_info[0]))

Single channel detected. If you're expecting more channels, something has gone wrong.
Processing accordingly...
[('event_number', '<u4'), ('timestamp', '<u8'), ('samples', '<u4'), ('sampling_period', '<u8'), ('chan_1', '<f4', (12500,))] 12500 8 1
<_io.BufferedReader name='test_data.bin'>
[('event_number', '<u4'), ('timestamp', '<u8'), ('samples', '<u4'), ('sampling_period', '<u8'), ('chan_1', '<f4', (12500,))]
[(  0,    94024, 12500, 8, 1) (  1,   219046, 12500, 8, 1)
 (  2,   343935, 12500, 8, 1) (  3,   468850, 12500, 8, 1)
 (  4,   593825, 12500, 8, 1) (  5,   718788, 12500, 8, 1)
 (  6,   844100, 12500, 8, 1) (  7,   968898, 12500, 8, 1)
 (  8,  1093882, 12500, 8, 1) ( 10,  1218898, 12500, 8, 1)
 ( 11,  1343944, 12500, 8, 1) ( 12,  1468945, 12500, 8, 1)
 ( 13,  1593840, 12500, 8, 1) ( 14,  1719034, 12500, 8, 1)
 ( 15,  1843898, 12500, 8, 1) ( 16,  1968927, 12500, 8, 1)
 ( 17,  2094020, 12500, 8, 1) ( 20,  2218934, 12500, 8, 1)
 ( 22,  2344008, 12500, 8, 1) ( 23,  2468883, 12500, 8,

In [56]:
wdtype, samples, sampling_period, channels = proc.process_header('three_channels.bin')
print(wdtype, samples, sampling_period, channels)
with open('three_channels.bin', 'rb') as file:
    data = proc.read_binary(file, wdtype)
    
event_info, rwf = format_wfs(data, wdtype, samples, channels)

print(event_info)
print(type(event_info[0]))

3 channels detected. Processing accordingly...
[('event_number', '<u4'), ('timestamp', '<u8'), ('samples', '<u4'), ('sampling_period', '<u8'), ('channels', '<i4'), ('chan_1', '<f4', (1000,)), ('chan_2', '<f4', (1000,)), ('chan_3', '<f4', (1000,))] 1000 8 3
<_io.BufferedReader name='three_channels.bin'>
[('event_number', '<u4'), ('timestamp', '<u8'), ('samples', '<u4'), ('sampling_period', '<u8'), ('channels', '<i4'), ('chan_1', '<f4', (1000,)), ('chan_2', '<f4', (1000,)), ('chan_3', '<f4', (1000,))]
[( 0,    1998268, 1000, 8, 3) ( 1,   67506435, 1000, 8, 3)
 ( 2,  132498360, 1000, 8, 3) ( 3,  195997406, 1000, 8, 3)
 ( 4,  257998463, 1000, 8, 3) ( 5,  320505979, 1000, 8, 3)
 ( 6,  383497026, 1000, 8, 3) ( 7,  445497098, 1000, 8, 3)
 ( 8,  507496928, 1000, 8, 3) ( 9,  570497289, 1000, 8, 3)
 (10,  632997863, 1000, 8, 3) (11,  695497200, 1000, 8, 3)
 (12,  758496152, 1000, 8, 3) (13,  820997020, 1000, 8, 3)
 (14,  882496907, 1000, 8, 3) (15,  945995966, 1000, 8, 3)
 (16, 1007999656, 1000,

In [5]:
import sys
sys.path.append("/home/e78368jw/Documents/MULE/")
import h5py
from packs.core import io as io
import pandas as pd
from packs.types import types
from packs.proc import processing_utils as proc
import numpy as np

x = io.load_evt_info('three_channels_gen_no_chunk.h5')

In [31]:
type(sys.byteorder)

str

In [36]:
idk = sys.byteorder
file = open('three_channels.bin', 'rb')
event_number, timestamp, samples, sampling_period = proc.read_defaults_WD2(file, idk)

print(event_number, timestamp, samples, sampling_period)


0 1998268 1000 8


In [21]:


x = proc.generate_rwf_type(500)
print(x)
print(x['rwf'].shape[0] == 500)
print(type(x['rwf']))

[('event_number', '<u4'), ('channels', '<i4'), ('rwf', '<f4', (500,))]
True
<class 'numpy.dtypes.VoidDType'>


In [15]:
h5_data = []
with h5py.File('three_channels_chunk.h5') as f:
    evt_info = f.get('event_information')
    for i in evt_info.keys():
                    q = evt_info.get(str(i))
                    for j in q:
                        h5_data.append(j)


In [13]:
h5_data

[(0, 1998268, 1000, 8, 3),
 (1, 67506435, 1000, 8, 3),
 (2, 132498360, 1000, 8, 3),
 (3, 195997406, 1000, 8, 3),
 (4, 257998463, 1000, 8, 3),
 (5, 320505979, 1000, 8, 3),
 (6, 383497026, 1000, 8, 3),
 (7, 445497098, 1000, 8, 3),
 (8, 507496928, 1000, 8, 3),
 (9, 570497289, 1000, 8, 3),
 (10, 632997863, 1000, 8, 3),
 (11, 695497200, 1000, 8, 3),
 (12, 758496152, 1000, 8, 3),
 (13, 820997020, 1000, 8, 3),
 (14, 882496907, 1000, 8, 3),
 (15, 945995966, 1000, 8, 3),
 (16, 1007999656, 1000, 8, 3),
 (17, 1070504581, 1000, 8, 3),
 (18, 1132996098, 1000, 8, 3),
 (19, 1195996239, 1000, 8, 3),
 (20, 1257496495, 1000, 8, 3),
 (21, 1320996727, 1000, 8, 3),
 (22, 1382496872, 1000, 8, 3),
 (23, 1445496323, 1000, 8, 3),
 (24, 1507995753, 1000, 8, 3),
 (25, 1570996597, 1000, 8, 3),
 (26, 1632495785, 1000, 8, 3),
 (27, 1695994820, 1000, 8, 3),
 (28, 1757495935, 1000, 8, 3),
 (29, 1820507051, 1000, 8, 3),
 (30, 1882995585, 1000, 8, 3),
 (31, 1945994537, 1000, 8, 3),
 (32, 2007497118, 1000, 8, 3),
 (33, 