# Raw data analysis
Plots summary of channel ADC values and trigger rates

## Paths
Set ``geometrypath`` to point to the larpix-geometry yaml file you'd like to use to plot x,y positions of pixels
Set ``datapath`` to point to the directory containing datafiles you'd like to look at

In [1]:
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.colors as colors
import os
import glob
import h5py
from collections import defaultdict
import time
import yaml
from matplotlib import cm

In [2]:
geometrypath = '/global/project/projectdirs/dune/users/pmadigan/larpix/larpix-software/larpix-geometry/larpixgeometry/layouts/layout-2.4.0.yaml'
# datapath = '/global/project/projectdirs/dune/data/larpix/raw_data/raw_20_10_12/500_V_cm'
datapath = '/global/project/projectdirs/dune/www/data/Bern-singlecube/LArPix/dataRuns/rawData' # data runs
# datapath = '/global/project/projectdirs/dune/www/data/Bern-singlecube/LArPix/pedestalRuns' # pedestal runs

In [3]:
ls /global/project/projectdirs/dune/www/data/Bern-singlecube/LArPix/

[0m[01;34mconfig_files[0m/  evd_config_20-10-26_10-48-37.json  [01;34mpedestalRuns[0m/  [01;34mthresholds[0m/
[01;34mdataRuns[0m/      [01;34mleakageCurrent[0m/                    [01;34mpulser[0m/


## Data cache
Evaluate this cell to refresh stored data from run files

In [20]:
data_cache = dict()

## Interactive plots

In [23]:
%matplotlib widget
files = sorted([os.path.basename(path) for path in glob.glob(datapath+'/*_.h5')])

def unique_channel_id(d):
    return ((d['io_group'].astype(int)*256 + d['io_channel'].astype(int))*256 \
            + d['chip_id'].astype(int))*64 + d['channel_id'].astype(int)
def unique_channel_id_2_str(unique_id,*args,**kwargs):
    return (unique_id//(256*256*64)).astype(int).astype(str) \
        + '-' + ((unique_id//(256*64))%256).astype(int).astype(str) \
        + '-' + ((unique_id//64)%256).astype(int).astype(str) \
        + '-' + (unique_id%64).astype(int).astype(str)

with open(geometrypath) as fi:
    geo = yaml.full_load(fi)
chip_pix = dict([(chip_id, pix) for chip_id,pix in geo['chips']])

@widgets.interact
def display(filenames=widgets.SelectMultiple(options=files, rows=10, description="File")
           ):   
    plt.close('all')
    fig1 = None
    for filename in filenames:
        if not filename in data_cache:
            print('opening',filename,'...')
            f = h5py.File(os.path.join(datapath,filename),'r')
            unixtime = f['packets']['timestamp'][f['packets']['packet_type'] == 4]
            livetime = np.max(unixtime) - np.min(unixtime)

            data_mask = f['packets']['packet_type'] == 0
            data_mask = np.logical_and(f['packets']['valid_parity'], data_mask)
            dataword = f['packets']['dataword'][data_mask]

            unique_id = unique_channel_id(f['packets'][data_mask])
            unique_id_set = np.unique(unique_id)
            d = defaultdict(dict)
            last = time.time()
            for i,id in enumerate(unique_id_set):
                if time.time() > last + 1:
                    print('{}/{} {}'.format(i+1,len(unique_id_set),unique_channel_id_2_str(id)),end='\r')
                    last = time.time()
                id_mask = unique_id == id
                if np.sum(id_mask) < 3:
                    continue
                masked_dataword = dataword[id_mask]
                d[id]['min'] = np.min(masked_dataword)
                d[id]['mean'] = np.mean(masked_dataword)
                d[id]['med'] = np.median(masked_dataword)
                d[id]['std'] = np.std(masked_dataword)
                d[id]['rate'] = len(masked_dataword) / (livetime + 1e-9)
                pix = chip_pix[(id//64)%256][id%64] if (id//64)%256 in chip_pix else None
                if pix:
                    d[id]['x'] = geo['pixels'][pix][1]
                    d[id]['y'] = geo['pixels'][pix][2]
                else:
                    d[id]['x'] = 0.
                    d[id]['y'] = 0.
            data_cache[filename] = d
        else:
            print('loading',filename,'from cache')
            d = data_cache[filename]

        if not fig1:
            fig1,axes = plt.subplots(3,1,sharex='col',num='summary 1',figsize=(8,6))
        else:
            fig1 = plt.figure('summary 1')
            axes = fig1.axes
        axes[0].scatter([key for key in d if 'mean' in d[key]],
                        [d[key]['mean'] for key in d if 'mean' in d[key]],
                        marker='.',alpha=0.5)
        axes[1].scatter([key for key in d if 'std' in d[key]],
                        [d[key]['std'] for key in d if 'std' in d[key]],
                        marker='.',alpha=0.5)
        axes[2].scatter([key for key in d if 'rate' in d[key]],
                        [d[key]['rate'] for key in d if 'rate' in d[key]],
                        marker='.',alpha=0.5)
        axes[2].set(xlabel='unique channel')
        axes[0].set(ylabel='mean ADC')
        axes[1].set(ylabel='std ADC')
        axes[2].set(ylabel='rate [Hz]')
        for ax in axes:
            ax.grid(1)
        axes[2].set_yscale('log')

        ax2 = axes[0].secondary_xaxis('top', functions=(lambda x: x, lambda x: x))
        ax2.xaxis.set_major_formatter(ticker.FuncFormatter(unique_channel_id_2_str))
        ax2.set(xlabel='channel key')
        plt.legend(range(len(filenames)))
        plt.tight_layout()

        fig2, axes = plt.subplots(3,1,sharex='col',sharey='col',num='summary 2 {}'.format(filename),figsize=(6,12))
        x = np.array([d[key]['x'] for key in d if 'x' in d[key]])
        y = np.array([d[key]['y'] for key in d if 'y' in d[key]])
        c0 = fig2.colorbar(axes[0].scatter(x,y,c=[d[key]['mean'] for key in d if 'mean' in d[key]], 
                                           marker='.', alpha=0.5*2), ax=axes[0])
        c1 = fig2.colorbar(axes[1].scatter(x,y,c=[d[key]['std'] for key in d if 'std' in d[key]], 
                                           marker='.', norm=colors.LogNorm(), alpha=0.5*2), ax=axes[1])
        c2 = fig2.colorbar(axes[2].scatter(x,y,c=[d[key]['rate'] for key in d if 'rate' in d[key]], 
                                           marker='.', norm=colors.LogNorm(), alpha=0.5*2), ax=axes[2])
        axes[2].set(xlabel='x [mm]')
        axes[0].set(ylabel='y [mm]',title=filename)
        c0.set_label('mean ADC')
        axes[1].set(ylabel='y [mm]')
        c1.set_label('std ADC')
        axes[2].set(ylabel='y [mm]')
        c2.set_label('rate [Hz]')

        ax2 = axes[0].secondary_xaxis('top', functions=(lambda x: x, lambda x: x))
        ax2.set(xlabel='x [mm]')
        plt.tight_layout()
        
        for filename_other in filenames:
            figname='correlation {},{}'.format(filename, filename_other)
            if filename_other == filename:
                continue
            elif filename_other not in data_cache:
                continue
            elif plt.fignum_exists(figname):
                continue
            d_other = data_cache[filename_other]
            fig,axes = plt.subplots(3,1,num=figname, figsize=(6,12))
            axes[0].scatter([d_other[key]['mean'] for key in d if 'mean' in d[key] and 'mean' in d_other[key]],
                            [d[key]['mean'] for key in d if 'mean' in d[key] and 'mean' in d_other[key]],
                            marker='.',alpha=0.5)
            axes[1].scatter([d_other[key]['std'] for key in d if 'std' in d[key] and 'std' in d_other[key]],
                            [d[key]['std'] for key in d if 'std' in d[key] and 'std' in d_other[key]],
                            marker='.',alpha=0.5)
            axes[2].scatter([d_other[key]['rate'] for key in d if 'rate' in d[key] and 'rate' in d_other[key]],
                            [d[key]['rate'] for key in d if 'rate' in d[key] and 'rate' in d_other[key]],
                            marker='.',alpha=0.5)
            axes[0].set(xlabel='mean ADC, {}'.format(filename_other), ylabel='mean ADC, {}'.format(filename))
            axes[1].set(xlabel='std ADC, {}'.format(filename_other), ylabel='std ADC, {}'.format(filename))
            axes[2].set(xlabel='rate [Hz], {}'.format(filename_other), ylabel='rate [Hz], {}'.format(filename))
            for ax in axes:
                ax.grid(1)
            plt.tight_layout()
                
    # aggregate
    if filenames:
        all_d = defaultdict(lambda : defaultdict(float))
        ids = set([id for filename in filenames for id in data_cache[filename]])
        for id in ids:
            for attr in ('min','mean','med','std','rate','x','y'):
                f = np.mean
                if attr in ('x','y'):
                    f = np.median
                all_d[id][attr] = f([data_cache[filename][id][attr] for filename in filenames 
                                     if id in data_cache[filename] and attr in data_cache[filename][id]])

        fig_agg2, axes = plt.subplots(3,1,sharex='col',sharey='col',num='summary 2 all',figsize=(6,12))
        x = np.array([all_d[id]['x'] for id in ids])
        y = np.array([all_d[id]['y'] for id in ids])
        c0 = fig_agg2.colorbar(axes[0].scatter(x,y,c=[all_d[id]['mean'] for id in ids], 
                                               marker='.', alpha=0.5*2), ax=axes[0])
        c1 = fig_agg2.colorbar(axes[1].scatter(x,y,c=[all_d[id]['std'] for id in ids], 
                                               marker='.', norm=colors.LogNorm(), alpha=0.5*2), ax=axes[1])
        c2 = fig_agg2.colorbar(axes[2].scatter(x,y,c=[all_d[id]['rate'] for id in ids], 
                                               marker='.', norm=colors.LogNorm(), alpha=0.5*2), ax=axes[2])
        axes[2].set(xlabel='x [mm]')
        axes[0].set(ylabel='y [mm]',title=filename)
        c0.set_label('mean ADC')
        axes[1].set(ylabel='y [mm]')
        c1.set_label('std ADC')
        axes[2].set(ylabel='y [mm]')
        c2.set_label('rate [Hz]')

        ax2 = axes[0].secondary_xaxis('top', functions=(lambda x: x, lambda x: x))
        ax2.set(xlabel='x [mm]')
        plt.tight_layout()
    


interactive(children=(SelectMultiple(description='File', options=('datalog_2020_10_26_17_18_45_CET_.h5', 'data…

In [24]:
run = 'datalog_2020_10_27_04_19_20_CET_.h5'
for channel,vals in data_cache[run].items():
    if vals['rate'] > 5:
        print('chip',(channel//64)%256,'chan',(channel%64),vals['rate'],'Hz')


KeyError: 'datalog_2020_10_27_04_19_20_CET_.h5'