In [None]:
%matplotlib notebook

# This examples shows who to download files from the ONC server
import os

import numpy as np
import datetime

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import strawb
import strawb.sensors.module
import strawb.tools

import h5py 

import pandas
import scipy.ndimage

import glob
import tqdm
import tqdm.notebook

# create working path

In [None]:
path = os.path.abspath(os.path.join(strawb.Config.proc_data_dir, 'muontracker/muon_events'))
os.makedirs(path, exist_ok=True)
print(path)

# Load ONC DB and mask files of interest

In [None]:
# Check if DB exits, if not load it, but update it anyway
if os.path.exists(strawb.Config.pandas_file_sync_db):
    db = strawb.SyncDBHandler()  # loads the db from disc
else:
    db = strawb.SyncDBHandler(load_db=False)  # loads the db from ONC server

db.load_onc_db_update(output=True, save_db=True)

In [None]:
# mask by device and data-product
mask = db.dataframe['deviceCode'] == 'TUMMUONTRACKER001'
mask &= db.dataframe.dataProductCode == 'MTSD'  # see SyncDBHandler.sensor_mapping
mask &= db.dataframe.synced
mask &= db.dataframe.file_version==4  # we need TOT
mask &= db.dataframe.measurement_type == 'counts'  # no rate scan

db.dataframe[mask]

In [None]:
def get_file_name(file_i, path, threshold_ns):
    str_formater = '{t_start:%Y%m}/{dev_code}_{t_start:%Y%m%dT%H%M%S}_{t_end:%Y%m%dT%H%M%S}_muon-events_threshold-ns_{threshold_ns}.gz'

    # generate resulting file name
    formater_dict = {
        'dev_code': file_i.deviceCode,
        't_start': file_i.dateFrom,
        't_end': file_i.dateTo,
        'threshold_ns': f'{threshold_ns:.0f}',
    }
    file_name = os.path.join(path, str_formater.format(**formater_dict))
    return file_name

def save_dataframe(full_path, dataframe, path, threshold_ns=20, reduce_dataframe=True, skipp=True):
    file_i = dataframe.loc[full_path]
    file_name = get_file_name(file_i, path, threshold_ns)
    if skipp and os.path.exists(file_name):
        return 0
    
    os.makedirs(os.path.dirname(file_name), exist_ok=True)
    
    df = create_dataframe(full_path, dataframe, 
                          threshold_ns=threshold_ns, reduce_dataframe=reduce_dataframe)
    df.to_pickle(file_name)
    
    return 1

def create_dataframe(full_path, dataframe, threshold_ns=20, reduce_dataframe=True):    
    muon = strawb.MuonTracker(full_path)
    return muon.event_builder.event_builder(reduce_dataframe=reduce_dataframe, 
                                            threshold_ns=threshold_ns)

# Test
full_path = db.dataframe[mask].iloc[-1].fullPath
threshold_ns = 20.0

df = create_dataframe(full_path, db.dataframe[mask], threshold_ns=threshold_ns)
df

# loop over all files and extract the events

In [None]:
import tqdm
import tqdm.notebook

mask_small_files = mask & (db.dataframe.fileSize < 1e6)
full_path_list = list(db.dataframe[mask].fullPath)[:20]

threshold_ns = 20.0

path = os.path.abspath(os.path.join(strawb.Config.proc_data_dir, 'muontracker/muon_events'))
os.makedirs(path, exist_ok=True)
print(path)

mpi = strawb.MProcessIterator(processes=9,
                                   progress_bar=tqdm.tqdm,
                                   with_sys_log=False)

results = mpi.run(save_dataframe, full_path_list,
        dataframe=db.dataframe[mask], path=path, threshold_ns=threshold_ns,
                 skipp=False)

In [None]:
mpi_results = {'error' : mpi.error_dict,
          'success': mpi.success_dict}

for i in mpi_results:
    print(f'{i:8}: {len(mpi_results[i]):4}')

# combine the dataframes to a monthly dataframe and store it

In [None]:
import glob

def get_db_pandas():
    # get all monthly Cluster DBs; they have the endinng `_image_cluster.gz`
    search_str = os.path.join(path, '**/*_muon-events_*.gz')
    df_data = pandas.DataFrame({'fullPath': glob.glob(search_str)})

    # extract the informations from the file name
    ps_file_name = df_data.fullPath.str.rsplit('/', 1, True)[1]  # extract the filename
    ps_file_name = ps_file_name.str.rsplit('.', 1, True)[0]  # remove file ending
    # split the filename in its components; 4 columns means 3 splits
    df_data[['deviceCode','dateFrom', 'dateTo', 'threshold_ns']] = ps_file_name.str.split('_', 5, True)[[0,1,2,5]]

    # convert the dtypes
    df_data.dateFrom = pandas.to_datetime(df_data.dateFrom, unit='ns')
    df_data.dateTo = pandas.to_datetime(df_data.dateTo, unit='ns')
    df_data.threshold_ns = df_data.threshold_ns.astype(float)
    
    # get file size
    df_data['file_size'] = df_data.fullPath.apply(os.path.getsize)
    df_data['file_size_str'] = df_data.file_size.apply(strawb.tools.human_size)
    
    df_data.sort_values(['deviceCode', 'dateFrom'], inplace=True, ignore_index=True)
    return df_data

df_single = get_db_pandas()

print('Detected Groups: ', *df_single.groupby(['deviceCode']).groups, sep='\n')

df_single

# generate the monthly files

In [None]:
def get_file_name_monthly(dev_code, date_from, date_to, path, threshold_ns):
    str_formater = '{dev_code}_{t_start:%Y%m%dT%H%M%S}_{t_end:%Y%m%dT%H%M%S}_muon-events-merge_threshold-ns_{threshold_ns}.gz'

    # generate resulting file name
    formater_dict = {
        'dev_code': dev_code,
        't_start': date_from,
        't_end': date_to,
        'threshold_ns': f'{threshold_ns:.0f}',
    }
    file_name = os.path.join(path, str_formater.format(**formater_dict))
    return file_name


# get the time range
dt_month=pandas.offsets.MonthBegin(1)
dr = pandas.date_range(df_single.dateFrom.min()-dt_month, df_single.dateFrom.max(), freq=dt_month, normalize = True)

# loop ovoer the time range
for dr_i in tqdm.notebook.tqdm(dr):
    file_name = get_file_name_monthly('TUMMUONTRACKER001', 
                                      dr_i, dr_i+dt_month-pandas.Timedelta('00:00:01'), 
                                      path, 20)
    print(file_name)
    mask_dates = (df_single.dateFrom >= dr_i) & (df_single.dateFrom < dr_i+dt_month)

    df_list = []
    for i, row_i in df_single[mask_dates].iterrows():
        df_list.append(strawb.BaseDBHandler(row_i.fullPath).dataframe)

    df_all = pandas.concat(df_list, ignore_index=True)

    df_all.to_pickle(file_name)

# get a DataFrame with all monthly files

In [None]:
def get_db_pandas_monthly():
    # get all monthly Cluster DBs; they have the endinng `_image_cluster.gz`
    search_str = os.path.join(path, '*_muon-events-merge_*.gz')
    df_data = pandas.DataFrame({'fullPath': glob.glob(search_str)})

    # extract the informations from the file name
    ps_file_name = df_data.fullPath.str.rsplit('/', 1, True)[1]  # extract the filename
    ps_file_name = ps_file_name.str.rsplit('.', 1, True)[0]  # remove file ending
    # split the filename in its components; 4 columns means 3 splits
    df_data[['deviceCode','dateFrom', 'dateTo', 'threshold_ns']] = ps_file_name.str.split('_', 5, True)[[0,1,2,5]]

    # convert the dtypes
    df_data.dateFrom = pandas.to_datetime(df_data.dateFrom, unit='ns')
    df_data.dateTo = pandas.to_datetime(df_data.dateTo, unit='ns')
    df_data.threshold_ns = df_data.threshold_ns.astype(float)
    
    # get file size
    df_data['file_size'] = df_data.fullPath.apply(os.path.getsize)
    df_data['file_size_str'] = df_data.file_size.apply(strawb.tools.human_size)

    df_data.sort_values(['deviceCode','dateFrom'], inplace=True, ignore_index=True)
    return df_data

df_monthly = get_db_pandas_monthly()

print('Detected Groups: ', *df_monthly.groupby(['deviceCode']).groups, sep='\n')

df_monthly

# Do some plotting

In [None]:
# extract the rate over all monthly files
dt = 30*60  # [s]

df_list = []

for i in df_monthly.fullPath:
    print(i)
    df = pandas.read_pickle(i)
    df['time'] = pandas.to_datetime(df['time']*1e9)
    df.label = (df.label.diff().fillna(0)!=0).cumsum()
    
    bins = pandas.to_numeric(
        pandas.date_range(df['time'].min(),
                          df['time'].max()+pandas.Timedelta('1H'), 
                          normalize=True, 
                          freq='1h'))

    for j in range(df.scintillator_double_count.max()):
        t = df[df.scintillator_double_count==j+1].groupby('label').time.min()
        counts, edges = np.histogram(pandas.to_numeric(t), bins=bins)
        
        
        df_list.append(pandas.DataFrame({'counts': counts,
                                         'time': (edges[1:] + edges[:-1])*.5,
                                         'scintillator_double_count': j+1}))

df_all = pandas.concat(df_list)
df_all['time'] = pandas.to_datetime(df_all['time'])

In [None]:
nrows = 1
fig, ax = plt.subplots(ncols=1, nrows=nrows, #layout="constrained", 
                       squeeze=False, sharex=True, 
                       figsize=(9,3*nrows),
#                        gridspec_kw=dict(height_ratios=[5,1])
                      )
ax_shape = ax.shape

ax_shape = ax.shape
ax = ax.flatten()

for i in df_all.scintillator_double_count.unique():
    mask_sc = df_all.scintillator_double_count == i
    
    ax[0].plot(df_all[mask_sc].time, df_all[mask_sc].counts)
    
plt.yscale('log')
plt.grid()
plt.tight_layout()