## Storing droplets/photons

The number of photons found in each event will differ. You will make to make a choice how to best save this data in an hdf5 file. 
Here, we dropletize/photons an epix100. We can store 
* zero-padded data to create a rectangular array
* store vlen datasets (require a loop in the 'consumer' code)
* store photons in 'flat' arrays

In [None]:
import numpy as np
import holoviews as hv
hv.extension('bokeh')
from tqdm import tqdm
import h5py as h5
from pathlib import Path
import tables
import sys
from importlib import reload

import psana as ps
sys.path.append('/sdf/group/lcls/ds/tools/smalldata_tools/latest')
from smalldata_tools.DetObject import DetObject, DetObjectFunc
from smalldata_tools.ana_funcs.sparsifyFunc import sparsifyFunc#, unsparsifyFunc
from smalldata_tools.ana_funcs.droplet import dropletFunc

from smalldata_tools.SmallDataUtils import setParameter, getUserData, getUserEnvData

from smalldata_tools.utilities_plotting import hv_image_ctl
from smalldata_tools.utilities_plotting import hv_image
from smalldata_tools.utilities import image_from_dxy

# Exp and run, get datasource

run = 630 #4 & 11.4 keV background
exp = 'xpptut15'

dsname = 'exp={}:run={}'.format(exp,run,exp[:3],exp)

ds = ps.MPIDataSource(dsname)
#dets = ds.detnames('epics')
#ds.detnames()

detname = 'epix_alc1' #epix100 w/ beamstop
photE=166.
det = DetObject(detname, ds.env(), int(run)) #epix100 w/ beamstop

# Run the analysis on a few shots
Here we define a few different versions to sparsify the droplet output which is necessary to save the output in the hdf5 file.

In [None]:
func_dict = {}
func_dict['threshold'] = 3.
func_dict['thresholdLow'] = 1.
func_dict['thresADU'] = 4.
func = dropletFunc(**func_dict)

sfunca = sparsifyFunc(nData=15000) #'rectangular array'
sfuncb = sparsifyFunc(nData=0, name='ragged') #saves data as ragged array / vlen data: loop needed to read!
sfuncc = sparsifyFunc(name='var') #saves data in a flat array
func.addFunc(sfunca)
func.addFunc(sfuncb)
func.addFunc(sfuncc)

# add function to detector pipeline
det.addFunc(func)

userDataCfg = det.params_as_dict()
userDict = {}

small_data = ds.small_data('./UserMtg_photon_sparse.h5', gather_interval=5)
    
max_evt = 5
ds.break_after(max_evt) # stop iteration after max_evt events (break statements do not work reliably with MPIDataSource).
for nevt,evt in tqdm(enumerate(ds.events())):        
    det.getData(evt)
    det.processFuncs()
    userDict[det._name]=getUserData(det)

    small_data.event(userDict)
        
small_data.close()

#dat_p = tables.open_file('./UserMtg_photon_sparse.h5').root
#print(userDict[detname].keys())

In [None]:
dat_p = tables.open_file('./UserMtg_photon_sparse.h5').root

h5ls  UserMtg_photon_sparse.h5/epix_alc1
### results from droplet
droplet_nDroplets        Dataset {5/Inf} <br>
droplet_nDroplets_all    Dataset {5/Inf}
### sparsified output - 'rectangular'
droplet_sparse_col       Dataset {5/Inf, 15000} <br>
droplet_sparse_data      Dataset {5/Inf, 15000} <br>
droplet_sparse_npix      Dataset {5/Inf, 15000} <br>
droplet_sparse_row       Dataset {5/Inf, 15000} <br>
droplet_sparse_tile      Dataset {5/Inf, 15000}

Here, we fill arrays with zero so that we get proper arrays too read. But we need to make this deep enough to fit all droplets and then the data becomes big.
By default, col & row are filled as integers to save space.

In [None]:
print(dat_p.epix_alc1.droplet_sparse_col.read()[0][dat_p.epix_alc1.droplet_nDroplets.read()[0]-3:dat_p.epix_alc1.droplet_nDroplets.read()[0]+3])

## Ragged dataset - vlen
### these are vlen arrays
ragged_droplet_col       Dataset {5/Inf} <br>
ragged_droplet_data      Dataset {5/Inf} <br>
ragged_droplet_npix      Dataset {5/Inf} <br>
ragged_droplet_row       Dataset {5/Inf} <br>
ragged_droplet_tile      Dataset {5/Inf}

As these are not 2-d arrays, you need to loop over each entry. python looop are not your friend if you care about CPU.

In [None]:
print(dat_p.epix_alc1.ragged_droplet_col.shape)
print(dat_p.epix_alc1.ragged_droplet_col.read()[0].shape)
dat_p.epix_alc1.ragged_droplet_col.read()

In [None]:
dat_p.epix_alc1.ragged_droplet_col.shape

## Flat dataset
### these are from the flat array, _len is the counter
var_droplet              Group <br>
var_droplet_len          Dataset {5/Inf}
#### content of the flat dataset.
h5ls  UserMtg_photon_sparse.h5/epix_alc1/var_droplet 
col                      Dataset {50203/Inf} <br>
data                     Dataset {50203/Inf} <br>
npix                     Dataset {50203/Inf} <br>
row                      Dataset {50203/Inf} <br>
tile                     Dataset {50203/Inf}

In [None]:
print(dat_p.epix_alc1.var_droplet_len.read())
print(dat_p.epix_alc1.var_droplet_len.read().sum(), dat_p.epix_alc1.var_droplet.col.shape)
print(dat_p.epix_alc1.var_droplet.col.read())

## Get data for a given event out of the flat array
Here, we pick the 4th event and show the image created from the droplet data.

In [None]:
ndroplets = dat_p.epix_alc1.var_droplet_len.read()
ndroplets_cumsum = np.append(np.array([0]),np.cumsum(ndroplets))

In [None]:
ievt=3
outShape= (userDataCfg['imgShape'][0], userDataCfg['imgShape'][1])
#photImg = image_from_dxy(dat_p.epix_alc1.var_droplet.data.read(ndroplets_cumsum[ievt], ndroplets_cumsum[ievt+1]).flatten(),\
#                         dat_p.epix_alc1.var_droplet.row.read(ndroplets_cumsum[ievt], ndroplets_cumsum[ievt+1]).flatten(),\
#                         dat_p.epix_alc1.var_droplet.col.read(ndroplets_cumsum[ievt], ndroplets_cumsum[ievt+1]).flatten(), outShape= outShape)
from scipy import sparse
d=dat_p.epix_alc1.var_droplet.data.read(ndroplets_cumsum[ievt], ndroplets_cumsum[ievt+1])
ix=dat_p.epix_alc1.var_droplet.row.read(ndroplets_cumsum[ievt], ndroplets_cumsum[ievt+1])
iy=dat_p.epix_alc1.var_droplet.col.read(ndroplets_cumsum[ievt], ndroplets_cumsum[ievt+1])
photImg = np.asarray(sparse.coo_matrix((d.flatten(), (ix.flatten(), iy.flatten())), shape=outShape).todense())

In [None]:
hv_image_ctl(photImg)