In [3]:
from mpes import fprocessing as fp
# from imp import reload
# reload(fp)

In [14]:
fpath = r'../data/data_20180605_131.h5'

### 1.1 Loading HDF5 files
HDF5 files can be read using a few different classes operating on different levels. The hierarchy meaningful to the end user is in the following (from low to high),
* **mpes.fprocessing.File()** -- local import of h5py.File(), a low-level Python HDF5 parser (wrapped over even lower C code).
* **mpes.fprocessing.hdf5Reader()** -- built on the File() class, with the inclusion of several file structure parsing, file component readout and format conversion functions.
* **mpes.fprocessing.hdf5Splitter()** -- built on the hdf5Reader() class, used for splitting large hdf5 files.
* **mpes.fprocessing.hdf5Processor()** -- built on the hdf5Reader() class, with the inclusion of binning operations and io.

The hierarchy goes **File $\in$ hdf5Reader $\in$ (hdf5Splitter, hdf5Processor)**

In [15]:
hdff = fp.File(fpath)
hdff

<HDF5 file "data_20180605_131.h5" (mode r+)>

In [16]:
hdfr = fp.hdf5Reader(fpath)
hdfr

<HDF5 file "data_20180605_131.h5" (mode r+)>

**New attributes and methods in the hdf5Reader() class**

In [17]:
print( list(set(dir(hdfr)) - set(dir(hdff))) )

['groupAliases', 'readGroup', 'nameLookupDict', 'CHUNK_SIZE', 'attributeNames', 'convert', '_assembleGroups', 'summarize', 'name2alias', 'faddress', 'readAttribute', 'getAttributeNames', 'getGroupNames', 'ncores', 'nEvents', 'groupNames']


In [18]:
hdfp = fp.hdf5Processor(fpath)
hdfp

<HDF5 file "data_20180605_131.h5" (mode r+)>

**New attributes and methods in the hdf5Processer() class**

In [19]:
print( list(set(dir(hdfp)) - set(dir(hdfr))) )

['saveHistogram', 'toSplitter', 'toBandStructure', 'histdict', 'localBinning', '_addBinners', 'viewEventHistogram', 'loadMapping', 'hdfdict', 'distributedBinning', 'saveParameters', '_delayedBinning', 'ua', 'axesdict', 'updateHistogram', 'distributedProcessBinning']


### 1.2 Retrieving components from HDF5 files
Reading components can also be done at different levels, the level of hdf5Reader() or above is recommended.

In [20]:
hdfp.summarize()

*** HDF5 file info ***
 File address = /scratch/metis_storage/data_20180605_131.h5


>>> Attributes <<<

CAClientMajorVersion = 0
CAClientMinorVersion = 1
CompileTimeStamp = Wed Jun 13 15:31:04 2018
KTOF:Lens:A:VSet = 514.63
KTOF:Lens:B:VSet = 2199.8
KTOF:Lens:C:VSet = 76.402
KTOF:Lens:D:VSet = 261.24
KTOF:Lens:E:VSet = 558.98
KTOF:Lens:Extr:VSet = 6000.0
KTOF:Lens:F:VSet = 48.904
KTOF:Lens:Foc:VSet = 167.0
KTOF:Lens:G:VSet = 20.1
KTOF:Lens:H:VSet = 35.0
KTOF:Lens:I:VSet = 42.25
KTOF:Lens:MCPback:VSet = 1825.0
KTOF:Lens:MCPfront:VSet = 20.0
KTOF:Lens:TOF:VSet = 20.0
KTOF:Lens:UCA:VSet = 1200.0
KTOF:Lens:UFA:VSet = 600.0
KTOF:Lens:Z1:VSet = 2452.9
KTOF:Lens:Z2:VSet = 1489.9

>>> Groups <<<

EventFormat, Shape = (64,), Alias = None
Stream_0, Shape = (27296214,), Alias = X
Stream_1, Shape = (27296214,), Alias = Y
Stream_2, Shape = (27296214,), Alias = t
Stream_3, Shape = (27296214,), Alias = MasterRstCtr
Stream_4, Shape = (27296214,), Alias = ADC
Stream_5, Shape = (27296214,), Alias = Sta

In [21]:
print(list(hdfr.readGroup(hdfr, 'EventFormat')))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 0, 64, 0, 64, 0, 64, 0, 53, 11, 42, 11, 16, 26, 15, 1, 15, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


### 1.3 Converting HDF5 files
Conversion of hdf5 to Matlab (mat) format (no data processing).

In [1]:
hdfr.convert('mat', save_addr='../data/data_131')

Conversion to parquet format

In [None]:
hdfr.convert('parquet', save_addr='../data/data_131_parquet', pq_append=False, chunksz=1e7, \
             compression='gzip')

### 1.4 Splitting HDF5 files

In [None]:
hdfs = fp.hdf5Splitter(fpath)
hdfs.split(nsplit=50, save_addr=r'../data/data_114_parts/data_114_', pbar=True)

### 1.5 Retrieve binned data from stored HDF5 file
Read binned data over 3 axes

In [None]:
fpath_binned = r'../data/binres_114.h5'

In [None]:
bindict = fp.readBinnedhdf5(fpath_binned, combined=True)
bindict.keys()

Read binned data over 4 axes

In [11]:
fpath_binned = r'../data/data_114_4axis_binned.h5'

In [12]:
bindict = fp.readBinnedhdf5(fpath_binned, combined=True)
bindict.keys()

dict_keys(['ADC', 'X', 'Y', 't', 'V'])

In [7]:
bindict = fp.readBinnedhdf5(fpath_binned, combined=False)
bindict.keys()

dict_keys(['ADC', 'X', 'Y', 't', 'V0', 'V1', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V3', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V4', 'V40', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V5', 'V6', 'V7', 'V8', 'V9'])