In [None]:
%matplotlib notebook

# This examples shows who to download files from the ONC server
import os

import numpy as np
import datetime

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import strawb
import strawb.sensors.module
import strawb.tools

import h5py 

import pandas
import scipy.ndimage

# Load ONC DB and mask files of interest

In [None]:
# Check if DB exits, if not load it, but update it anyway
if os.path.exists(strawb.Config.pandas_file_sync_db):
    db = strawb.SyncDBHandler()  # loads the db from disc
else:
    db = strawb.SyncDBHandler(load_db=False)  # loads the db from ONC server

db.load_onc_db_update(output=True, save_db=True)

In [None]:
# mask by device and data-product
mask = db.dataframe['deviceCode'] == 'TUMMUONTRACKER001'
mask &= db.dataframe.dataProductCode == 'MTSD'  # see SyncDBHandler.sensor_mapping

In [None]:
db.dataframe[mask & db.dataframe.synced & (db.dataframe.file_version>0)]

In [None]:
db.dataframe.dateTo.max()

In [None]:
# show some masked entries
db.dataframe[mask].iloc[-5:]

# Download a specific file, needs some minutes if file isn't synced

In [None]:
file_list = ['TUMMUONTRACKER001_20220731T001146.146Z-SDAQ-MUON.hdf5']
db_i = db.get_files_from_names(file_list)
db_i

# Load the file

In [None]:
muon = strawb.MuonTracker(file_list[0])

# The event builder introduced in this notebook is initegrated to strawb and simplifies to

In [None]:
# # keeping all events
# df = muon.event_builder.event_builder(reduce_dataframe=False)
# # or cutting to the evets of interes
# df = muon.event_builder.event_builder(reduce_dataframe=True)

# Load and Cut Data
It seems, the TRB has an overflow at 2750 in both time data. (can be corrected with np.unwrap)

In [None]:
def get_dataframe(muon, trb_overflow = 2750.):
    """ Load data and do first cleaning.
    removes: tot_time_ns!=1e-9 <-> there are some events with 1ns
    removes the TRB overflow in the timestamp but don't do it at tot_time_ns as it can cause precision loss"""
    tot = muon.file_handler.tot_tot[:]

    tot_time_ns = muon.file_handler.tot_time_ns[:]

    # some values are 1ns, exclude them
    mask_valid = tot_time_ns!=1e-9

    # some tot values are very high ~1e12, exclude them here or not
    if True:
        max_tot = 1 * 1e3 # cut at 1us - [ns]
        mask_valid &= (tot<max_tot) & (tot > 0)
    else:
        pass

    print(f'exclude {np.sum(~mask_valid)} events')

    # time in seconds, time_masked since epoch (1.1.1970); remove the overflow, 
    # but don't do it at tot_time_ns as it can cause precision loss
    time_masked = np.unwrap(muon.file_handler.tot_time[mask_valid], period=trb_overflow)

    df_base = pandas.DataFrame({
        # time_masked since epoch (1.1.1970)
        'time': time_masked,
        # time_ns TRB internal
        'time_ns': tot_time_ns[mask_valid], 
        # channel id
        'channel': muon.file_handler.tot_channel[mask_valid], 
        # tot in nano-seconds
        'tot': tot[mask_valid]})

    # the time isn't sorted correctly, do it here
    df_base.sort_values(['time', 'time_ns', 'tot'], inplace=True)

    # free RAM - parameter not needed
    del tot_time_ns, time_masked, mask_valid, tot
    return df_base

# Event builder
Detect and build events. A event is defined as set of tot-events where the timestamps between the singel tot-events isn't greater as a limit (`dt_max`). Muons are traveling with ~c and 1ns corresbonds to 0.3m distance. The mountracker has a diameter of 13", so ~0.3m.    

In [None]:
# define the label function
def label_intermediate(input):
    """ Label features in an array based on a second intermediate array, which length is shorter by one.
    input                     =  [1,0,1,1,0,0,0,1]
    scipy.ndimage.label(input)=  [1,0,2,2,0,0,0,3]
    label_intermediate(input) = [1,1,2,2,2,0,0,3,3]
    Parameters
    ----------
    input : array_like
        The intermediate array-like object to be labeled. Any non-zero values in `input` are
        counted as features and zero values are considered the background.
    Retruns
    -------
    label : ndarray or int
        An integer ndarray where each unique feature in `input` has a unique
        label in the returned array. And each label is extended by one item.
    num_features : int
    Example
    -------
    >>> d_a = np.diff(a)
    >>> label = label_intermediate(d_a<1.)
    >>> assert len(label) == len(a)
    """
    label, num_features = scipy.ndimage.label(input, structure=None)
    label = np.append(label, [0])  # add one item
    label[1:][label[:-1]!=0] = label[:-1][label[:-1]!=0]  # add the shifted labels if label!=0
    return label, num_features

# test
a = np.array([1,0,2,2,0,0,0,3])
l, _ = label_intermediate(a)
print(f'input         :  {a}')
print(f'l_intermediate: {l}')

## Build events for dataset

In [None]:
def add_labels(dataframe, threshold_ns = 20e-9):
    # evet selection
    dt = np.diff(dataframe['time_ns'])
    dataframe['label'], _ = label_intermediate((dt>=0) & (dt<threshold_ns))

    return dataframe

def add_label_timing(dataframe):
    # add label count, but only if it doesn't exists
    if 'label_count' in dataframe:
        dataframe.pop('label_count')
    dataframe = pandas.merge(dataframe, dataframe.groupby('label')['label'].count(), how='left',
                      right_index=True, left_on='label', 
                      suffixes=[None, '_count'])

    if 'time_ns_label' in dataframe:
        dataframe.pop('time_ns_label')
    dataframe = pandas.merge(dataframe, dataframe.groupby('label')['time_ns'].min(), how='left',
                      right_index=True, left_on='label', 
                      suffixes=[None, '_label'])
    dataframe['time_ns_label'] = dataframe.time_ns - dataframe.time_ns_label
    
    return dataframe

In [None]:
def remove_duplicate(dataframe):
    """most of the events don't show doublicate entries per channel
    - doublicate show a tot of ~25ns and ~58ns, whereas non doublicate have ~25ns
    - therefore -> the long TOT seem to be incorrect
    - it is sorted by tot and we want only the shortest
    - we get it with: aggfunc='first'

    """
    # the time isn't sorted correctly, do it here, to get keep='first' 
    # keeping the ~25ns and not the ~58ns
    dataframe.sort_values(['label', 'time_ns_label', 'tot'], inplace=True)

    # mask doublicate per label and channel
    mask_duplicate = dataframe.duplicated(['label', 'channel'], keep='first')
    # get a dataframe with non duplicates
    return dataframe[~mask_duplicate].copy()

def add_scintillator(dataframe, inplace=False):
    """Adds the scintillator number"""
    if not inplace:
        dataframe = dataframe.copy()
        
    # create the mapping between channel and scintillator
    trans_ch = np.array([[1,11], [2,12], [3, 9], [4,10],[5,15], [8,14], [7,13], [6,16]]).flatten()
    trans_scin = np.array([[1,1], [2,2], [3, 3], [4, 4],[5, 5], [6, 6], [7, 7], [8, 8]]).flatten()

    # add a new column with the scintillator number; replace needs the same dtype
    dataframe['scintillator'] = dataframe.channel.replace(
        trans_ch.flatten().astype(dataframe.channel.dtype), 
        trans_scin.flatten().astype(dataframe.channel.dtype))
    
    return dataframe

def add_scintillator_counts(dataframe):
    # scintillator_double
    scintillator_count = dataframe.groupby('label')['scintillator'].count()
    scintillator_count.name = 'scintillator_count'
    
    # groupby(['label', 'scintillator']).scintillator.count() results in 1, 
    # if there is only one sipm involved or 2 if both sipms for one scintillator are involved. 
    # True if both are involved: == 2
    scintillator_double = dataframe.groupby(['label', 'scintillator']).scintillator.count() == 2
    scintillator_double.name = 'scintillator_double'
    
    # now count the scintillator_double for each label to get or the counts of
    # full scintillator hits
    scintillator_double_count = scintillator_double.groupby('label').sum()
    scintillator_double_count.name = 'scintillator_double_count'

    df_sci = pandas.DataFrame(scintillator_double)
    df_sci = df_sci.merge(scintillator_double_count, how='left', left_on='label', right_index=True)

    # remove the names, otherwise, merg will add a suffixe
    if 'scintillator_double' in dataframe:
        dataframe.pop('scintillator_double')
    if 'scintillator_double_count' in dataframe:
        dataframe.pop('scintillator_double_count')
    if 'scintillator_count' in dataframe:
        dataframe.pop('scintillator_count')
        
    dataframe = pandas.merge(dataframe, scintillator_count, how='left', right_index=True, 
                             left_on='label')
    dataframe = pandas.merge(dataframe, df_sci, how='left', right_index=True, 
                             left_on=['label','scintillator'])
    
    # only count a scintillator once
    dataframe['scintillator_count'] -= dataframe['scintillator_double_count']
    return dataframe

def event_builder(muon, threshold_ns = 20e-9):
    """Combines all the different steps to generate the events from the raw data.
    That includes:
    - remove invalid entires
    - label events where at least two entries are in a specified time range (threshold_ns)
    - calculate in event timing
    - remove the invalid doublicate entries
    - label the scintillator for each entrie and count how many different scintillator 
      got hit per event and if a entrie is part of a double scintillator hit 
      (both SiPMs of the scintillator are present in the event)
    - reduce to events with at least one double scintillator hit
    """
    df_base = get_dataframe(muon)

    # detect if there are two or more entires within a dt of some ns
    df_base = add_labels(df_base, threshold_ns = threshold_ns)

    # remove all entires with no close by neigbour (add_labels)
    df_events = df_base[df_base['label']>0]
    print(f'events reduced from: {len(df_base)} to {len(df_events)} - or {len(df_events)/len(df_base)*100:.2f}% of df_base')

    # add label timing, important for a proper event ordering
    df_events = add_label_timing(df_events)

    # some channels appear two time with the same timing in one event (label), remove it
    df_events_nd = remove_duplicate(df_events)
    print(f'events reduced from: {len(df_events)} to {len(df_events_nd)} - or {len(df_events_nd)/len(df_base)*100:.2f}% of df_base')
    # df_nondup.reset_index(drop=True, inplace=True)

    df_events_nd = add_scintillator(df_events_nd)
    df_events_nd = add_scintillator_counts(df_events_nd)

    df_double = df_events_nd[df_events_nd.scintillator_double_count > 0].copy()
    df_double.reset_index(drop=True, inplace=True)

    print(f'events reduced from: {len(df_events_nd)} to {len(df_double)} - or {len(df_double)/len(df_base)*100:.2f}%  of df_base')
    
    return df_base, df_events, df_events_nd, df_double

df_base, df, df_nondup, df_double = event_builder(muon)
df_double

In [None]:
df_double[~df_double.scintillator_double]

# Build events with multiplicity `length`

## what to do with doublicate entries per label
- most of the events don't show doublicate entries per channel
- doublicate show a tot of ~25ns and ~58ns, whereas non doublicate have ~25ns
- therefore -> the long TOT seem to be incorrect 
    - it is sorted by tot and we want only the shortest
    - we get it with: aggfunc='first' 

## Result: 
- most of the events don't show doublicate entries per channel
- doublicate show a tot of ~25ns and ~58ns, whereas non doublicate have ~25ns
- therefore -> the long TOT seem to be incorrect 
    - it is sorted by tot and we want only the shortest
    - we get it with: aggfunc='first' 

In [None]:
# mask doublicate per label and channel
mask_duplicate = df.duplicated(['label', 'channel'], keep='first')

In [None]:
df[~mask_duplicate]

In [None]:
df[mask_duplicate]

In [None]:
# mask doublicate per label and channel
mask_duplicate = df.duplicated(['label', 'channel'], keep='first')

# get a dataframe with non duplicates
df_nondup = df[~mask_duplicate].copy()
df_nondup.reset_index(drop=True, inplace=True)

print(f'events reduced from: {len(df)} to {len(df_nondup)} - or {len(df_nondup)/len(df)*100:.2f}%')

# add label count, but only if it doesn't exists
if 'label_count' not in df_nondup:
    df_nondup = pandas.merge(df_nondup, df_nondup.groupby('label')['label'].count(), how='left',
                      right_index=True, left_on='label', 
                      suffixes=[None, '_count'])
    
if 'time_ns_label' not in df:
    df_nondup = pandas.merge(df_nondup, df_nondup.groupby('label')['time_ns'].min(), how='left',
                      right_index=True, left_on='label', 
                      suffixes=[None, '_label'])
    df_nondup['time_ns_label'] = df_nondup.time_ns - df_nondup.time_ns_label
    
# add the channel of the first event to each label (to do the multiplicity plot)
if 'channel_first' not in df:
    df_nondup.sort_values('time_ns_label')
    df_nondup = pandas.merge(df_nondup, df_nondup.groupby('label')['channel'].first(), how='left',
                      right_index=True, left_on='label', 
                      suffixes=[None, '_first'])

# the time isn't sorted correctly, do it here
df_nondup.sort_values(['time', 'time_ns_label', 'tot'], inplace=True)

# add scintillator label

In [None]:
df_nondup['scintillator'] = df_nondup.channel.copy()

# upper hemisphere is even, lower odd
for i, sipm_pair in enumerate([[1,11], [2,12], 
                               [3, 9], [4,10],
                               [5,15], [8,14], 
                               [7,13], [6,16]]):
    df_nondup.loc[df_nondup['channel'].isin(sipm_pair), 'scintillator'] = i+1
    
# sort needed for 'channel_first', 'scintillator_first' as time is not stricktly increasing
df_nondup.sort_values('time_ns_label')

# # add the channel of the first event to each label (to do the multiplicity plot)
# if 'channel_first' not in df:
#     df_nondup = pandas.merge(df_nondup, df_nondup.groupby('label')['channel'].first(), how='left',
#                       right_index=True, left_on='label', 
#                       suffixes=[None, '_first'])

### add the channel of the first event to each label (to do the multiplicity plot)
if 'scintillator_first' in df_nondup:
    df_nondup.pop('scintillator_first')
df_nondup = pandas.merge(df_nondup, df_nondup.groupby('label')['scintillator'].first(), how='left',
                  right_index=True, left_on='label', 
                  suffixes=[None, '_first'])
    
### add label count, but only if it doesn't exists
if 'scintillator_count' in df_nondup:
    df_nondup.pop('scintillator_count')
df_nondup = pandas.merge(df_nondup, df_nondup.groupby('label')['scintillator'].count(), how='left',
                  right_index=True, left_on='label', 
                  suffixes=[None, '_count'])
    

# # the time isn't sorted correctly, do it here
# df_nondup.sort_values(['time', 'time_ns_label', 'tot'], inplace=True)

### count how many different scintillator are involved at one label
if 'scintillator_double' in df_nondup:
    df_nondup.pop('scintillator_double')
df_nondup = pandas.merge(df_nondup, df_nondup.groupby(['label', 'scintillator']).scintillator.count(), 
                         how='left', 
                         right_index=True, left_on=['label','scintillator'], 
                         suffixes=[None, '_double'])
# this results in 1, if there is only one sipm involved or 2 if both sipms 
# for one scintillator are involved. True if both are involved:
df_nondup.scintillator_double = df_nondup.scintillator_double == 2 

if 'scintillator_double_count' in df_nondup:
    df_nondup.pop('scintillator_double_count')
# now count the scintillator_same_count for each label
df_nondup = pandas.merge(df_nondup, df_nondup.groupby('label')['scintillator_double'].sum(), how='left',
                  right_index=True, left_on='label', 
                  suffixes=[None, '_count'])
# if both sipms at one scintillator are invovled it is 2
# if two scintillator are fully invovled it is 4,...
# -> /= 2 to get the fully invovled scintillator count
df_nondup.scintillator_double_count /= 2
df_nondup.scintillator_double_count = df_nondup.scintillator_double_count.astype(int)

# get 'scintillator_count' right
df_nondup['scintillator_count'] -= df_nondup['scintillator_double_count']

# reduce to events with at least one scintillator hit

In [None]:
df_double = df_nondup[df_nondup.scintillator_double_count > 0]

print(f'events reduced from: {len(df_nondup)} to {len(df_double)} - or {len(df_double)/len(df_nondup)*100:.2f}%')
df_double

# multiplicity plot of hits per channel
Plot the 2d histogramm of first channel hit to secondary channel hits for each label.

The x axis is the channel of the first hit and the y axis of the secondary channel hits. There should be no counts on the diagonal as doublicate channels are removed for each label. 

In [None]:
def get_nrows_ncols(number_subfigures, nrows=None, ncols=None):
    number_subfigures = int(number_subfigures)
    if (ncols is None and nrows is None) or \
       (ncols is not None and nrows is not None):
        raise ValueError(f'either ncols or nrows must be set. Both are set: ncols={ncols}, nrows={nrows} ')
    elif nrows is not None:
        nrows = int(nrows)
        ncols = int(np.ceil(number_subfigures/nrows))
    else:
        ncols = int(ncols)
        nrows = int(np.ceil(number_subfigures/ncols))
        
    active = np.zeros(int(nrows*ncols)).astype(bool)
    active[:number_subfigures] = True
    return nrows, ncols, active

get_nrows_ncols(3, ncols=3)

In [None]:
nrows, ncols, ax_active = get_nrows_ncols(df_nondup.label_count.max(), ncols=3)
fig, ax = plt.subplots(ncols=ncols, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(10,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )
ax_shape = ax.shape
ax = ax.flatten()

# remove the first channel entires
mask_events = df_nondup.channel != df_nondup.channel_first

for i in range(df_nondup.label_count.max()):
    if i==0:
        mask_label_count = df_nondup.label_count > 1
        title = 'all hits'
    else:
        mask_label_count = df_nondup.label_count == i+1
        title = f'hit multiplicity {i+1}'

    print(i, (mask_events&mask_label_count).sum())
        
    c, e_x, e_y = np.histogram2d(df_nondup.channel_first[mask_events&mask_label_count], 
                                 df_nondup.channel[mask_events&mask_label_count], 
                                 bins=np.arange(1, 18)-.5)
    c = np.ma.masked_equal(c, 0)
    
    cb = ax[i].pcolormesh(e_x, e_y, np.log10(c.filled(np.nan)), shading='auto')
    # cb = plt.matshow(np.log10(statistic_i.filled(np.nan)))

    fig.colorbar(cb, ax=ax[i], shrink=.6, label=f'log$_{{10}}$(counts)', location='right')
    ax[i].set_title(title)

for axi in ax[ax_active]:
    axi.axline((0,0), slope=1, ls='--', color='gray')
    axi.set_aspect('equal')
    axi.grid(lw=.5)
    axi.set_xlim(.5, 16.5)
    axi.set_xticks(ticks=np.arange(1,17))
    
    axi.set_ylim(16.5, .5)
    axi.set_yticks(ticks=np.arange(1,17))
    
ax = ax.reshape(ax_shape)
[axi.set_xlabel('channel first hit') for axi in ax[-1,:]]
[axi.set_ylabel('channel secondary hit') for axi in ax[:,0]]

for axi in ax.flatten()[~ax_active]:
    axi.set_axis_off()

plt.tight_layout()

# multiplicity plot of hits per scintillator

In [None]:
df_nondup[df_nondup.scintillator_count == df_nondup.scintillator_count.max()]

In [None]:
print('upper hemisphere is even, lower odd')

nrows, ncols, ax_active = get_nrows_ncols(df_nondup.scintillator_count.max(), ncols=3)
fig, ax = plt.subplots(ncols=ncols, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(10,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )

ax_shape = ax.shape
ax = ax.flatten()

# remove the first channel entires
mask_events = df_nondup.scintillator != df_nondup.scintillator_first

for i in range(df_nondup.scintillator_count.max()):
    if i==0:
        mask_label_count = df_nondup.scintillator_count > 1
        title = 'all hits'
    else:
        mask_label_count = df_nondup.scintillator_count == i+1
        title = f'scintillator multiplicity {i+1}'
        
    c, e_x, e_y = np.histogram2d(df_nondup.scintillator_first[mask_events&mask_label_count], 
                                 df_nondup.scintillator[mask_events&mask_label_count], 
                                 bins=np.arange(1, 18)-.5)
    c = np.ma.masked_equal(c, 0)
    
    cb = ax[i].pcolormesh(e_x, e_y, np.log10(c.filled(np.nan)), shading='auto')
    # cb = plt.matshow(np.log10(statistic_i.filled(np.nan)))

    fig.colorbar(cb, ax=ax[i], shrink=.6, label=f'log$_{{10}}$(counts)', location='right')
    ax[i].set_title(title)


for axi in ax[ax_active]:
    axi.axline((0,0), slope=1, ls='--', color='gray')
    axi.set_aspect('equal')
    axi.grid(lw=.5)
    axi.set_xlim(.5, 8.5)
    axi.set_xticks(ticks=np.arange(1,9))
    
    axi.set_ylim(8.5, .5)
    axi.set_yticks(ticks=np.arange(1,9))
    
ax = ax.reshape(ax_shape)
[axi.set_xlabel('scintillator first hit') for axi in ax[-1,:]]
[axi.set_ylabel('scintillator secondary hit') for axi in ax[:,0]]


for axi in ax.flatten()[~ax_active]:
    axi.set_axis_off()

plt.tight_layout()

# multiplicity plot of full scintillator hits (both sipms saw something)

In [None]:
print('upper hemisphere is even, lower odd')
nrows, ncols, ax_active = get_nrows_ncols(df_nondup.scintillator_double_count.max()+1, ncols=3)
fig, ax = plt.subplots(ncols=ncols, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(10,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )

ax_shape = ax.shape
ax = ax.flatten()

# remove the first channel entires
mask_events = df_nondup.scintillator != df_nondup.scintillator_first

for i in range(df_nondup.scintillator_double_count.max()+1):
    if i==0:
        mask_label_count = df_nondup.scintillator_double_count > 0
        title = 'all hits'
    else:
        mask_label_count = df_nondup.scintillator_double_count == i
        title = f'scintillator multiplicity {i}'
        
    c, e_x, e_y = np.histogram2d(df_nondup.scintillator_first[mask_events&mask_label_count], 
                                 df_nondup.scintillator[mask_events&mask_label_count], 
                                 bins=np.arange(1, 18)-.5)
    c = np.ma.masked_equal(c, 0)
    
    cb = ax[i].pcolormesh(e_x, e_y, np.log10(c.filled(np.nan)), shading='auto')
    # cb = plt.matshow(np.log10(statistic_i.filled(np.nan)))

    fig.colorbar(cb, ax=ax[i], shrink=.6, label=f'log$_{{10}}$(counts)', location='right')
    ax[i].set_title(title)


for axi in ax[ax_active]:
    axi.axline((0,0), slope=1, ls='--', color='gray')
    axi.set_aspect('equal')
    axi.grid(lw=.5)
    axi.set_xlim(.5, 8.5)
    axi.set_xticks(ticks=np.arange(1,9))
    
    axi.set_ylim(8.5, .5)
    axi.set_yticks(ticks=np.arange(1,9))
    
ax = ax.reshape(ax_shape)
[axi.set_xlabel('scintillator first hit') for axi in ax[-1,:]]
[axi.set_ylabel('scintillator secondary hit') for axi in ax[:,0]]

for axi in ax.flatten()[~ax_active]:
    axi.set_axis_off()

# ax[-1].stairs(statistic_i.sum(axis=0), fill=True, edges=binned_stat.bin_edges[0])
# ax[-1].set_yscale('log')
plt.tight_layout()

# and now only the scintillator with double hits

# Extract the labels (sipm events) which have at least on full scintillator hit (both sipms saw something)

In [None]:
print('upper hemisphere is even, lower odd')
nrows, ncols, ax_active = get_nrows_ncols(df_nondup.scintillator_double_count.max()+1, ncols=3)
fig, ax = plt.subplots(ncols=ncols, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(10,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )
ax_shape = ax.shape
ax = ax.flatten()

# remove the first channel entires
mask_events = df_nondup.scintillator != df_nondup.scintillator_first
mask_events &= df_nondup.scintillator_double

for i in range(df_nondup.scintillator_double_count.max()+1):
    if i==0:
        mask_label_count = df_nondup.scintillator_double_count > 0
        title = 'all hits'
    else:
        mask_label_count = df_nondup.scintillator_double_count == i
        title = f'scintillator multiplicity {i}'
        
    c, e_x, e_y = np.histogram2d(df_nondup.scintillator_first[mask_events&mask_label_count], 
                                 df_nondup.scintillator[mask_events&mask_label_count], 
                                 bins=np.arange(1, 18)-.5)
    c = np.ma.masked_equal(c, 0)
    
    cb = ax[i].pcolormesh(e_x, e_y, np.log10(c.filled(np.nan)), shading='auto')
    # cb = plt.matshow(np.log10(statistic_i.filled(np.nan)))

    fig.colorbar(cb, ax=ax[i], shrink=.6, label=f'log$_{{10}}$(counts)', location='right')
    ax[i].set_title(title)


for axi in ax[ax_active]:
    axi.axline((0,0), slope=1, ls='--', color='gray')
    axi.set_aspect('equal')
    axi.grid(lw=.5)
    axi.set_xlim(.5, 8.5)
    axi.set_xticks(ticks=np.arange(1,9))
    
    axi.set_ylim(8.5, .5)
    axi.set_yticks(ticks=np.arange(1,9))
    
ax = ax.reshape(ax_shape)
[axi.set_xlabel('scintillator first hit') for axi in ax[-1,:]]
[axi.set_ylabel('scintillator secondary hit') for axi in ax[:,0]]

for axi in ax.flatten()[~ax_active]:
    axi.set_axis_off()
    
# ax[-1].stairs(statistic_i.sum(axis=0), fill=True, edges=binned_stat.bin_edges[0])
# ax[-1].set_yscale('log')
plt.tight_layout()

In [None]:
scintillator_unique = np.sort(df_double.scintillator.unique())

nrows, ncols, ax_active = get_nrows_ncols(len(scintillator_unique), ncols=3)
fig, ax = plt.subplots(ncols=ncols, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(10,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )
ax_shape = ax.shape

ax_shape = ax.shape
ax = ax.flatten()

bins = np.arange(-df_double.time_ns_label.max(), df_double.time_ns_label.max(), .5e-9)
print(f'Bins: 0 to {df_double.time_ns_label.max()}, length: {len(bins)}')

list_fits = []
for i, s_i in enumerate(scintillator_unique):
    mask_scintillator = df_double.scintillator_double & (df_double.scintillator==s_i)
    ch_first = df_double[mask_scintillator].groupby(['label'])['channel'].first()
    t_0 = df_double[mask_scintillator].groupby(['label'])['time_ns'].min()
    t_1 = df_double[mask_scintillator].groupby(['label'])['time_ns'].max()
    dt = t_1 - t_0

    channels = np.sort(df_double[mask_scintillator].channel.unique())
        
    dt[ch_first==channels[0]] *= -1
    counts, edges = np.histogram(dt, bins=bins)
    ax[i].stairs(counts, edges=edges*1e9,label=f'dt: ch{channels[1]} - ch{channels[0]}',)
        
    ax[i].legend(loc='upper left', ncol=2, title=f'scintillator {s_i}')
    ax[i].grid()
    
ax = ax.reshape(ax_shape)
[axi.set_ylabel('counts') for axi in ax[:,0]]
[axi.set_xlabel('$\Delta$ t [ns]') for axi in ax[-1,:]]

for axi in ax.flatten()[~ax_active]:
#     axi.set_axis_off()
    axi.get_yaxis().set_visible(False)

plt.tight_layout()

In [None]:
scintillator_unique = np.sort(df_double.scintillator.unique())

nrows, ncols, ax_active = get_nrows_ncols(len(scintillator_unique), ncols=3)
fig, ax = plt.subplots(ncols=ncols, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(10,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )

ax_shape = ax.shape
ax = ax.flatten()

bins = np.arange(0, df_double.time_ns_label.max(), .2e-9)
print(f'Bins: 0 to {df_double.time_ns_label.max()}, length: {len(bins)}')

for i, s_i in enumerate(scintillator_unique):
    mask_scintillator = df_double.scintillator_double & (df_double.scintillator==s_i)
    mask_scintillator &= df_double.scintillator_double_count==2
    ch_first = df_double[mask_scintillator].groupby(['label'])['channel'].first()
    t_0 = df_double[mask_scintillator].groupby(['label'])['time_ns'].min()
    t_1 = df_double[mask_scintillator].groupby(['label'])['time_ns'].max()
    dt = t_1 - t_0

    channels = df_double[mask_scintillator].channel.unique()
    for ch_i in np.sort(channels):
        counts, edges = np.histogram(dt[ch_first==ch_i], bins=bins)
        ax[i].stairs(counts, edges=edges*1e9,label=f'{ch_i}')
        
    ax[i].legend(loc='upper right', ncol=2, title=f'scintillator {s_i} \nfirst event in channel:')
    ax[i].grid()
    
ax = ax.reshape(ax_shape)
[axi.set_ylabel('counts') for axi in ax[:,0]]
[axi.set_xlabel('$\Delta$ t [ns]') for axi in ax[-1,:]]

for axi in ax.flatten()[~ax_active]:
#     axi.set_axis_off()
    axi.get_yaxis().set_visible(False)

plt.tight_layout()

# Simulate gaussian offset between channel + gaussian signals

In [None]:
bins = np.arange(-df_double.time_ns_label.max(), df_double.time_ns_label.max(), .5e-9)
print(f'Bins: 0 to {df_double.time_ns_label.max()}, length: {len(bins)}')

s_i = 2
mask_scintillator = df_double.scintillator_double & (df_double.scintillator==s_i)
ch_first = df_double[mask_scintillator].groupby(['label'])['channel'].first()
t_0 = df_double[mask_scintillator].groupby(['label'])['time_ns'].min()
t_1 = df_double[mask_scintillator].groupby(['label'])['time_ns'].max()
dt = t_1 - t_0

channels = np.sort(df_double[mask_scintillator].channel.unique())

dt[ch_first==channels[0]] *= -1
counts, edges = np.histogram(dt, bins=bins)
edges *= 1e9
x = strawb.tools.cal_middle(edges)

In [None]:
import scipy.signal
import scipy.optimize

def gauss(x, *params):
    """Generalized sum of N gaussians. N is defined with: N=len(params)//3
    therefore len(params) must be a multiple of 3, i.e. 3, 6, 9,...
    and gaussian i is defined by: 
    x0_i = params[::3]
    sigma_i = params[1::3]
    scale_i = params[2::3]
    """
    y = np.zeros_like(x)
    for i in range(0, len(params), 3):
        y = y + params[i+2] * np.exp( -((x - params[i])/params[i+1])**2 / 2)
    return y

def fit_peaks_gauss(x, y, distance=None, height_min=None, height_max=None, verbose=2, min_peaks=None):
    if distance is None:
        distance = 1
    else:
        distance = int(distance / np.diff(x).mean())
        if distance<2:
            distance = 1
    if verbose<1:
        print('distance in index ', distance)
    i_pk, p_pk = scipy.signal.find_peaks(y, distance=distance,
                                             height=(height_min, height_max),
                                             threshold=(None, None),
                                             prominence=(None, None),
                                             width=(None, None),
#                                              plateau_size=(None, None)
                                        )

    # convert to dataframe; scipy.signal.find_peaks measures in indices, convert it here
    df_p = pandas.DataFrame(p_pk)
    for i in df_p.columns.intersection(
        ['left_edges', 'right_edges', 'left_bases', 'right_bases', 'left_ips', 'right_ips']):
        df_p[i] = np.interp(df_p[i], np.arange(len(x)), x)
    df_p['widths'] = df_p['right_ips'] - df_p['left_ips']
    df_p['peak_pos'] = x[i_pk]
    df_p[['x0', 'sigma', 'scale']] = np.nan 

    # fit single
    for i, row_i in df_p.iterrows():
        p0=[row_i.peak_pos, 
            row_i.widths/np.sqrt(2*np.log(2))/2,
            row_i.peak_heights]
        d_left = row_i.peak_pos - row_i.left_ips
        d_right = row_i.right_ips - row_i.peak_pos
        mask_data = (x>=row_i.peak_pos-+d_right*1.2) & (x<=row_i.peak_pos+d_right*1.2)
        if mask_data.sum()<=len(p0):
            ind = np.argwhere(x==row_i.peak_pos)[0]
            mask_data = (x>=x[ind-len(p0)]) & (x<=x[ind+len(p0)])
        try:
            # use relative error -> absolute_sigma=False, to concentrate on the peak
            popt, pcov = scipy.optimize.curve_fit(gauss, x[mask_data], y[mask_data], p0=p0)

            df_p.loc[i, 'x0'] = popt[0]
            df_p.loc[i, 'sigma'] = np.abs(popt[1])
            df_p.loc[i, 'scale'] = popt[2]
            if verbose<1:
                print(popt)
        except Exception as a:
            if verbose<2:
                print(i, a)

    # fit all together
    p0 = np.concatenate([df_p.peak_pos, 
                         df_p.widths/np.sqrt(2*np.log(2))/2,
                         df_p.peak_heights]).reshape(3,-1).T
    
    if min_peaks is not None and len(p0) <= min_peaks:
        p0 = np.append(p0, [0,1,1]*(min_peaks-len(p0)))

    # use absolute error -> absolute_sigma=True+sigma, to NOT concentrate on the peaks
    popt, pcov = scipy.optimize.curve_fit(gauss, x, y, 
                                          p0=p0.flatten(),
                                          absolute_sigma=True, 
                                          sigma=np.sqrt(np.abs(counts))+1,
                                         )

    # merg the two dataframes
    df_full = pandas.DataFrame(columns=['x0', 'sigma', 'scale'], 
                     data= popt.reshape(-1,3))
    df_full.sort_values('x0', inplace=True, ignore_index=True)
    df_p = df_p.merge(df_full, how='outer', left_index=True, right_index=True, suffixes=['_single', None])

    return df_p

In [None]:
df_p = fit_peaks_gauss(x, counts)

In [None]:
scintillator_unique = np.sort(df_double.scintillator.unique())

nrows, ncols, ax_active = get_nrows_ncols(len(scintillator_unique), ncols=3)
fig, ax = plt.subplots(ncols=ncols, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(10,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )
ax_shape = ax.shape

ax_shape = ax.shape
ax = ax.flatten()

bins = np.arange(-df_double.time_ns_label.max(), df_double.time_ns_label.max(), .5e-9)
print(f'Bins: 0 to {df_double.time_ns_label.max()}, length: {len(bins)}')

list_fits = []
for i, s_i in enumerate(scintillator_unique):
    mask_scintillator = df_double.scintillator_double & (df_double.scintillator==s_i)
    ch_first = df_double[mask_scintillator].groupby(['label'])['channel'].first()
    t_0 = df_double[mask_scintillator].groupby(['label'])['time_ns'].min()
    t_1 = df_double[mask_scintillator].groupby(['label'])['time_ns'].max()
    dt = t_1 - t_0

    channels = np.sort(df_double[mask_scintillator].channel.unique())
        
    dt[ch_first==channels[0]] *= -1
    counts, edges = np.histogram(dt, bins=bins)
    ax[i].stairs(counts, edges=edges*1e9,label=f'dt: ch{channels[1]} - ch{channels[0]}',)
    
    # fit peaks
    x_fit = strawb.tools.cal_middle(edges)*1e9
    df_p = fit_peaks_gauss(x_fit, counts, distance=3, height_min=counts.max()/10, min_peaks=2)
    df_p['scintillator'] = s_i
    list_fits.append(df_p)
    x_plot = np.linspace(x_fit.min(), x_fit.max(), 1000)
    popt = df_p[['x0', 'sigma', 'scale']]
    lin, = ax[i].plot(x_plot, gauss(x_plot, *popt.to_numpy().flatten()), '-', label='fit')
    for j, row_j in df_p[['x0', 'sigma', 'scale']].iterrows():
        ax[i].plot(x_plot, gauss(x_plot, row_j.x0, row_j.sigma, row_j.scale), '--', zorder=0,
#                    color=lin.get_color()
                  )

#     popt = df_p[['x0_single', 'sigma_single', 'scale_single']]
#     popt = popt[~popt.isna().any(axis=1)]  # exclude missing values
#     lin, = ax[i].plot(x_plot, gauss(x_plot, *popt.to_numpy().flatten()), '--', label='fit single')
#     for i, row_i in popt.iterrows():
#         ax[i].plot(x_plot, gauss(x_plot, row_i.x0_single, row_i.sigma_single, row_i.scale_single), ':', color=lin.get_color())

        
    ax[i].legend(loc='upper left', ncol=2, title=f'scintillator {s_i}')
    ax[i].grid()
    
ax = ax.reshape(ax_shape)
[axi.set_ylabel('counts') for axi in ax[:,0]]
[axi.set_xlabel('$\Delta$ t [ns]') for axi in ax[-1,:]]

for axi in ax.flatten()[~ax_active]:
#     axi.set_axis_off()
    axi.get_yaxis().set_visible(False)

plt.tight_layout()

# combine all fit df's and take only the columns of interest
columns = ['scintillator', 
           'x0', 'sigma', 'scale', 
           'x0_single', 'sigma_single', 'scale_single', 
           'peak_heights', 'peak_pos']
df_fits = pandas.concat(list_fits, ignore_index=True)[columns]

df_fits

# Simulate the signal

In [None]:
bins = np.arange(dt.min(), dt.max(), .5e-9)
bins

In [None]:
n = int(2e3)
signal2noise = .9
n_signal = int(n*signal2noise)
ch_ratio = 1
n_signal_2 = int(ch_ratio*n_signal)

dt_channel = 6
dt_signal = 2

sigma_channel = .05
sigma_noise = 1.

dt_channel - dt_signal, dt_channel + dt_signal

In [None]:
s_i=8
df_s = df_fits[df_fits.scintillator==s_i]
df_s=df_s.sort_values('peak_heights', ascending=False).iloc[:2]
df_s

In [None]:
bins_s = np.arange(-df_double.time_ns_label.max(), df_double.time_ns_label.max(), .5e-9)

n = int(1e3)
signal2noise = 1
n_signal = int(df_s.scale.iloc[0]*df_s.sigma.iloc[0]*np.sqrt(2*np.pi)*2)  #int(n*signal2noise)
ch_ratio = 0.9
n_signal_2 = int(df_s.scale.iloc[1]*df_s.sigma.iloc[1]*np.sqrt(2*np.pi)*2)  #int(ch_ratio*n_signal)

t_0 = np.array([])
t_1 = np.array([])

# gaussian offset in the channel timing
# t_0 = np.random.normal(loc=0.0, scale=sigma_channel, size=n)
# t_1 = np.random.normal(loc=dt_channel, scale=sigma_channel, size=n)

# gaussian signal first hit channel 0
t_0 = np.append(t_0, np.random.normal(loc=df_s.x0.iloc[0], scale=df_s.sigma.iloc[0], size=n_signal))
t_0 = np.append(t_0, np.random.normal(loc=df_s.x0.iloc[1], scale=df_s.sigma.iloc[1], size=n_signal_2))

t_0 += np.random.normal(loc=0, scale=1, size=len(t_0))

counts, edges = np.histogram(t_0, bins=bins_s*1e9)
x = strawb.tools.cal_middle(edges)

fig, ax = plt.subplots(ncols=1, nrows=2, #layout="constrained", 
                       squeeze=False, sharex=False, 
                       figsize=(6,4),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )
ax = ax.flatten()

# plot signal
if True:
    bins_s = np.arange(-df_double.time_ns_label.max(), df_double.time_ns_label.max(), .5e-9)
    mask_scintillator = df_double.scintillator_double & (df_double.scintillator==s_i)
    ch_first = df_double[mask_scintillator].groupby(['label'])['channel'].first()
    t_0_s = df_double[mask_scintillator].groupby(['label'])['time_ns'].min()
    t_1_s = df_double[mask_scintillator].groupby(['label'])['time_ns'].max()
    dt_s = t_1_s - t_0_s
    channels = np.sort(df_double[mask_scintillator].channel.unique())
    dt_s[ch_first==channels[0]] *= -1
    counts_s, edges_s = np.histogram(dt_s, bins=bins_s)
    ax[0].stairs(counts_s, edges=edges_s*1e9,label=f'dt: ch{channels[1]} - ch{channels[0]}',)
    

counts, edges = np.histogram(t_0, bins=bins_s*1e9)
ax[0].stairs(counts, edges=edges)
# counts, edges = np.histogram(t_1, bins=bins)
# ax[0].stairs(counts, edges=edges)


ax[1].stairs(counts, edges=edges)

# fit peaks
x_fit = strawb.tools.cal_middle(edges)
df_p = fit_peaks_gauss(x_fit, counts, distance=3, height_min=counts.max()/10, min_peaks=2)

x_plot = np.linspace(x_fit.min(), x_fit.max(), 1000)
popt = df_p[['x0', 'sigma', 'scale']]
lin, = ax[1].plot(x_plot, gauss(x_plot, *popt.to_numpy().flatten()), '-', label='fit')
for j, row_j in df_p[['x0', 'sigma', 'scale']].iterrows():
    ax[1].plot(x_plot, gauss(x_plot, row_j.x0, row_j.sigma, row_j.scale), '--', zorder=0,
#                    color=lin.get_color()
              )
    
df_p[['x0','sigma','scale','x0_single','sigma_single','scale_single','peak_heights','peak_pos']]