# 0. Import required packages

In [None]:
import os
import yaml
import textwrap
import numpy as np
import pandas as pd
import tools as tools
from matplotlib import pyplot as plt
from scipy import stats as spstats

# 1. Load the input parameters

In [None]:
# Write here the ABSOLUTE path to the input parameters file
input_parameters_filepath = ''

with open(
        input_parameters_filepath,
        "r",
        encoding="utf-8"
    ) as file:
        
        params = yaml.safe_load(file)

# 2. Read the dataframes and combine into a single one

In [None]:
# Read the dataframes
dataframes = []
for filename in os.listdir(params['input_dir']):
    if filename.endswith('.pkl'):
        filepath = os.path.join(
            params['input_dir'],
            filename
        )
        df = pd.read_pickle(filepath)
        dataframes.append(df)

In [None]:
# Check that every DataFrame has the same number of columns
aux = [len(df.columns) for df in dataframes]
for i in range(1, len(aux)):
    if aux[i-1]!=aux[i]:
        raise Exception(
            f"DataFrames {i-1} and {i} have different number of columns"
        )
print(
    f"All DataFrames have {aux[0]} columns"
)

In [None]:
# Check that the name of the columns match DataFrame-wise
for i in range(len(dataframes[0].columns)):
    for j in range(1, len(dataframes)):
        if dataframes[0].columns[i] != dataframes[j].columns[i]:
            raise Exception(
                f"The name of the column {i} of DataFrame 0 "
                f"({dataframes[0].columns[i]}) does not match "
                f"column {i} of DataFrame {j} "
                f"({dataframes[j].columns[i]})"
            )
print(
    "All DataFrames have the same column names"
)

In [None]:
# Combine all dataframes into a single dataframe
dataframe = pd.concat(
    dataframes, 
    ignore_index=True
)
print(
    f"Combined DataFrame has {len(dataframe)} rows"
)

# 3. Delete ignored columns

In [None]:
for column in params['columns_to_ignore']:
    if column in dataframe.columns:
        dataframe = dataframe.drop(
            column,
            axis=1
        )
print(
    f"The resulting dataframe has {len(dataframe.columns)} columns"
)

# 4. Choose which boards should be analyzed

In [None]:
if params['visualize_all_boards']:
    filtered_df = dataframe
else:
    boards_to_visualize = []
    for set_no in params['sets_to_analyze']:
        boards_to_visualize += tools.strip_ids_of_set[set_no]

    disyuntive_filters = []
    for strip_ID in boards_to_visualize:
        disyuntive_filters.append(
            dataframe['strip_ID'] == int(strip_ID)
        )

    filter = disyuntive_filters[0]
    for i in range(1,len(disyuntive_filters)):
        filter |= disyuntive_filters[i]     # Logical 'OR'

    filtered_df = dataframe[filter]
    filtered_df = filtered_df.reset_index(drop=True)

print(
    f"Filtered DataFrame has {len(filtered_df)} rows"
)

# 5. Apply more filters

In [None]:
# Implement more filters? Define them here
conjuntive_filters = []
conjuntive_filters.append(
    filtered_df['analysis_reliability']>1
)

disyuntive_filters = []

disyuntive_filters.append(
    filtered_df['overvoltage_V']==2.
)
disyuntive_filters.append(
    filtered_df['overvoltage_V']==3.
)
disyuntive_filters.append(
    filtered_df['overvoltage_V']==4.
)

# Gain measurements of tray 115 of the re-test batch (i.e. those taken in April) have overvoltages 2.7, 3.0, 3.1 and 4.1

# Apply the filters
total_filter = pd.Series(
    np.ones(len(filtered_df), dtype=bool),
)

for filter in conjuntive_filters:
    total_filter &= filter  # Conjuntive filters

if len(disyuntive_filters) == 0:
    total_disyuntive_filter = pd.Series(
        np.ones(len(filtered_df), dtype=bool)
    )
else:
    total_disyuntive_filter = pd.Series(
        np.zeros(len(filtered_df), dtype=bool)
    )
    for filter in disyuntive_filters:
        # Disyuntive filters
        total_disyuntive_filter |= filter

total_filter &= total_disyuntive_filter
filtered_df = filtered_df[list(total_filter)]
filtered_df = filtered_df.reset_index(drop=True)

In [None]:
filtered_df.sort_values(by=['strip_ID', 'sipm_location'], inplace=True)
print(
    f"Filtered DataFrame has {len(filtered_df)} rows"
)

# 6. Display the considered boards

In [None]:
clustered_boards_string = \
    tools.get_string_of_contiguously_clustered_integers(
        tools.cluster_integers_by_contiguity(
            list(filtered_df.groupby('strip_ID').groups.keys())
        )
    )

clustered_boards_string = \
    '\n'.join(textwrap.wrap(
        clustered_boards_string,
        width=72
    ))

print(
    f"Boards in the DataFrame: {clustered_boards_string}"
)

In [None]:
filtered_df.columns

# 7. Graph the data

In [None]:
title_fontsize=10   # Title of plots
labels_fontsize=16  # Axes-labels of plots
ticks_fontsize=14
nbins=30            # Number of bins for 1-D histograms
ndigits=2           # Number of digits for means and stds
DCR_ndigits=2       # Number of digits for DCRs
fit_to_pdf = True
distribution = spstats.lognorm

## 7.1. Graph gain data

In [None]:
gain_nbins = 100
infer_range = False
# The following range is ignored if infer_range is True
gain_histograms_range = (0., 0.4e+7)

overvoltage_grouped_dataframes = {key: group for key, group in filtered_df.groupby('overvoltage_V')}
gain_characteristics_with_overvoltage = {
    key: {
        'mean': np.mean(overvoltage_grouped_dataframes[key]['gain_in_#e-']),
        'std': np.std(overvoltage_grouped_dataframes[key]['gain_in_#e-'])
    } for key in overvoltage_grouped_dataframes.keys()
}

fig, axes = plt.subplots(
    nrows=1,
    ncols=1,
    figsize=(6, 5)
)

if infer_range:
    aux_min_overvoltage = min(overvoltage_grouped_dataframes.keys())
    aux_max_overvoltage = max(overvoltage_grouped_dataframes.keys())
    gain_histograms_range = (
        gain_characteristics_with_overvoltage[aux_min_overvoltage]['mean'] \
            - (20. * gain_characteristics_with_overvoltage[aux_min_overvoltage]['std']),
        gain_characteristics_with_overvoltage[aux_max_overvoltage]['mean'] \
            + (20. * gain_characteristics_with_overvoltage[aux_max_overvoltage]['std'])
    )


tools.plot_histogram(
    axes,
    *[
        np.array(overvoltage_grouped_dataframes[overvoltage]['gain_in_#e-'])
        for overvoltage in overvoltage_grouped_dataframes.keys()
    ],
    bins=gain_nbins,
    hist_range=gain_histograms_range,
    density=False,
    xlabel='Gain (#e-)',
    ylabel='Hits',
    legend_labels=[
        f"OV = {overvoltage} V, "
        r"$\mu = $"
        f"{tools.scientific_notation_str(gain_characteristics_with_overvoltage[overvoltage]['mean'], ndigits=2)}, "
        r"$\sigma = $"
        f"{tools.scientific_notation_str(gain_characteristics_with_overvoltage[overvoltage]['std'], ndigits=2)}, "
        for overvoltage in overvoltage_grouped_dataframes.keys()
    ],
    linewidth = 1.,
    # figtitle=f"Overvoltage-wise gain distributions - Boards {clustered_boards_string}",
    figtitle=f"Overvoltage-wise gain distributions - Sets: {params['sets_to_analyze']}",
    fontsize=14,
    colourful=True,
)

fig.show()

## 7.2. Graph dark-noise data

### 7.2.1. DCR

In [None]:
DCR_bin_width = 5
DCR_mean = np.mean(filtered_df['DCR_mHz_per_mm2'])
DCR_std = np.std(filtered_df['DCR_mHz_per_mm2'], ddof=1)
DCR_outlier_threshold = tools.thresholds['DCR_mHz_per_mm2']['threshold']

hist, edges, _ = plt.hist(
    filtered_df['DCR_mHz_per_mm2'], 
    label='w/ bursts',
    bins=np.arange(
        np.min(filtered_df['DCR_mHz_per_mm2'])-DCR_bin_width, 
        np.max(filtered_df['DCR_mHz_per_mm2'])+DCR_bin_width,
        step=DCR_bin_width),
    density=fit_to_pdf,
    histtype='step',
    color='blue'
)

if fit_to_pdf:
    fit_x = np.linspace(
        0,
        max(
            DCR_outlier_threshold,
            np.max(filtered_df['DCR_mHz_per_mm2'])
        ),
        1000
    )
    plt.plot(
        fit_x,
        distribution.pdf(
            fit_x,
            *distribution.fit(filtered_df['DCR_mHz_per_mm2'])
        ),
        label="Fit to 'w/ bursts'",
        color='blue'
    )

plt.hist(
    filtered_df['burstless_DCR_mHz_per_mm2'], 
    label='w/o bursts',
    bins=np.arange(
        np.min(filtered_df['burstless_DCR_mHz_per_mm2'])-DCR_bin_width,
        np.max(filtered_df['burstless_DCR_mHz_per_mm2'])+DCR_bin_width,
        step=DCR_bin_width),
    density=fit_to_pdf,
    histtype='step',
    color='black'
)

plt.axvline(
    DCR_outlier_threshold,
    color='red',
    label='Outlier threshold'
)
plt.xlim(
    0,
    DCR_outlier_threshold+(0.05*DCR_outlier_threshold)
)
plt.xlabel(
    'DCR (mHz/mm2)',
    fontsize=labels_fontsize
)
plt.ylabel(
    'Hits' if not fit_to_pdf else 'Probability density',
    fontsize=labels_fontsize
)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.title(
    f"Sets: {params['sets_to_analyze']} \n w/ bursts - ("
    f"{round(DCR_mean, ndigits=DCR_ndigits)} +/- {round(DCR_std, ndigits=DCR_ndigits)})"
    f", w/o bursts - ({round(np.mean(filtered_df['burstless_DCR_mHz_per_mm2']), ndigits=DCR_ndigits)}"
    f" +/- {round(np.std(filtered_df['burstless_DCR_mHz_per_mm2'], ddof=1), ndigits=DCR_ndigits)})",
    fontsize=title_fontsize)
plt.grid()
plt.legend()
plt.show()

### 3.2.2. XTP

In [None]:
XTP_bin_width = 0.01
XTP_mean = np.mean(filtered_df['XTP'])
XTP_std = np.std(filtered_df['XTP'], ddof=1)
XTP_outlier_threshold = tools.thresholds['XTP']['threshold']

hist, edges, _ = plt.hist(
    filtered_df['XTP'], 
    label='w/ bursts',
    bins=np.arange(
        np.min(filtered_df['XTP'])-XTP_bin_width, 
        np.max(filtered_df['XTP'])+XTP_bin_width,
        step=XTP_bin_width),
    density=fit_to_pdf,
    histtype='step',
    color='blue'
)

if fit_to_pdf:
    fit_x = np.linspace(
        0,
        max(       
            XTP_outlier_threshold,
            np.max(filtered_df['XTP'])
        ),
        1000
    )
    plt.plot(
        fit_x,
        distribution.pdf(
            fit_x,
            *distribution.fit(filtered_df['XTP'])
        ),
        label="Fit to 'w/ bursts'",
        color='blue'
    )

plt.hist(
    filtered_df['burstless_XTP'], 
    label='w/o bursts',
    bins=np.arange(
        np.min(filtered_df['XTP'])-XTP_bin_width, 
        np.max(filtered_df['XTP'])+XTP_bin_width,
        step=XTP_bin_width),
    density=fit_to_pdf,
    histtype='step',
    color='black'
)
plt.axvline(
    XTP_outlier_threshold,
    color='red',
    label='Outlier threshold'
)
plt.xlim(
    0,
    XTP_outlier_threshold+(0.05*XTP_outlier_threshold)
)
plt.xlabel(
    'X-Talk probability',
    fontsize=labels_fontsize
)
plt.ylabel(
    'Hits' if not fit_to_pdf else 'Probability density',
    fontsize=labels_fontsize
)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.title(
    f"Sets: {params['sets_to_analyze']} \n w/ bursts - ("
    f"{round(XTP_mean, ndigits=ndigits)} +/- {round(XTP_std, ndigits=ndigits)})"
    f", w/o bursts - ({round(np.mean(filtered_df['burstless_XTP']), ndigits=ndigits)}"
    f" +/- {round(np.std(filtered_df['burstless_XTP'], ddof=1), ndigits=ndigits)})",
    fontsize=title_fontsize)
plt.grid()
plt.legend()
plt.show()

### 3.2.3. APP

In [None]:
APP_bin_width = 0.0035
APP_mean = np.mean(filtered_df['APP'])
APP_std = np.std(filtered_df['APP'], ddof=1)
APP_outlier_threshold = tools.thresholds['APP']['threshold']

hist, edges, _ = plt.hist(
    filtered_df['APP'], 
    label='w/ bursts',
    bins=np.arange(
        np.min(filtered_df['APP'])-APP_bin_width, 
        np.max(filtered_df['APP'])+APP_bin_width,
        step=APP_bin_width),
    density=fit_to_pdf,
    histtype='step',
    color='blue')

if fit_to_pdf:
    fit_x = np.linspace(
        0,
        max(       
            APP_outlier_threshold,
            np.max(filtered_df['APP'])
        ),
        1000
    )
    plt.plot(
        fit_x,
        distribution.pdf(
            fit_x,
            *distribution.fit(filtered_df['APP'])
        ),
        label="Fit to 'w/ bursts'",
        color='blue'
    )

plt.hist(
    filtered_df['burstless_APP'], 
    label='w/o bursts',
    bins=np.arange(
        np.min(filtered_df['APP'])-APP_bin_width, 
        np.max(filtered_df['APP'])+APP_bin_width,
        step=APP_bin_width),
    density=fit_to_pdf,
    histtype='step',
    color='black')

plt.axvline(
    APP_outlier_threshold,
    color='red',
    label='Outlier threshold'
)
plt.xlim(
    0,
    APP_outlier_threshold+(0.5*APP_outlier_threshold)
)
plt.xlabel(
    'Afterpulse probability',
    fontsize=labels_fontsize
)
plt.ylabel(
    'Hits' if not fit_to_pdf else 'Probability density',
    fontsize=labels_fontsize
)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.title(
    f"Sets: {params['sets_to_analyze']} \n w/ bursts - ("
    f"{round(APP_mean, ndigits=ndigits)} +/- {round(APP_std, ndigits=ndigits)})"
    f", w/o bursts - ({round(np.mean(filtered_df['burstless_APP']), ndigits=ndigits)}"
    f" +/- {round(np.std(filtered_df['burstless_APP'], ddof=1), ndigits=ndigits)})",
    fontsize=title_fontsize
)
plt.grid()
plt.legend()
plt.show()

# 4. Generate a table of strip_ID vs. sipm_location (still under development)

## 4.1. For gain data (development in this section can benefit from the code in the gain analyzer)

In [None]:
field_to_show = 'gain_in_#e-'
additional_string = "overvoltage_V=3.0"
table_ndigits = 1

table = tools.strip_ID_vs_sipm_location_dataframe(
    filtered_df[filtered_df['tray_no']==62],
    field_to_show,
    significant_figures=table_ndigits
)

fig, ax = plt.subplots(figsize=(8, 2))
ax.axis('off')
ax.table(
    cellText=[[f"{val:.{1}e}" for val in row] for row in table.values],
    colLabels=table.columns, 
    rowLabels=[ ' '+str(aux)+' ' for aux in range(1,7)], 
    colWidths = [0.15 for aux in table.columns],
    cellColours = [
        [
            tools.decide_colour(
                val, 
                gain_mean, 
                gain_std, 
                discern_sign=True,
                red_above=False
            ) for val in row
        ] for row in table.values
    ],
    cellLoc = 'center',
    loc='center')
ax.set_title(
    f"{field_to_show} \n {additional_string} \n Boards: "
    f"{clustered_boards_string} \n Mean +/- std: ("
    f"{round(gain_mean, ndigits=ndigits)} +/- "
    f"{round(gain_std, ndigits=ndigits)})",
    fontsize=title_fontsize
)
fig.show()
#plt.savefig('tabla.png', bbox_inches='tight', pad_inches=0.1)

## 4.2. For dark-noise data (development in this section can benefit from the code in the darknoise analyzer)

In [None]:
field_to_show = 'DCR_mHz_per_mm2'
table_ndigits = 1
simple_colour_decide = lambda val : (1.,0.,0.) if val > 200 else ((1.,0.6,0.6) if val>150 else 'white')

table = tools.strip_ID_vs_sipm_location_dataframe(
    filtered_df,
    field_to_show,
    significant_figures=table_ndigits
)

fig, ax = plt.subplots(figsize=(8, 2))
ax.axis('off')
ax.table(
    cellText=table.values, 
    colLabels=table.columns, 
    rowLabels=[ ' '+str(aux)+' ' for aux in range(1,7)], 
    colWidths = [0.15 for aux in table.columns],
    cellColours = [[ simple_colour_decide(val) for val in row] for row in table.values],
    cellLoc = 'center',
    loc='center'
)
ax.set_title(field_to_show)
fig.show()
#plt.savefig('tabla.png', bbox_inches='tight', pad_inches=0.1)

In [None]:
field_to_show = 'XTP'
table_ndigits = 2
simple_colour_decide = lambda val : (1.,0.,0.) if val > 0.35 else ((1.,0.6,0.6) if val>0.2 else 'white')

table = tools.strip_ID_vs_sipm_location_dataframe(
    filtered_df,
    field_to_show,
    significant_figures=table_ndigits
)

fig, ax = plt.subplots(figsize=(8, 2))
ax.axis('off')
ax.table(
    cellText=table.values, 
    colLabels=table.columns, 
    rowLabels=[ ' '+str(aux)+' ' for aux in range(1,7)], 
    colWidths = [0.15 for aux in table.columns],
    cellColours = [[ simple_colour_decide(val) for val in row] for row in table.values],
    cellLoc = 'center',
    loc='center'
)
ax.set_title(field_to_show)
fig.show()
#plt.savefig('tabla.png', bbox_inches='tight', pad_inches=0.1)

In [None]:
field_to_show = 'APP'
table_ndigits = 3
bicolour_decide = lambda val : (1.,0.,0.) if val > 0.05 else ((1.,0.6,0.6) if val>0.04 else 'white')
simple_colour_decide = lambda val : (1.,0.,0.) if val > 0.05 else  'white'

table = tools.strip_ID_vs_sipm_location_dataframe(
    filtered_df,
    field_to_show,
    significant_figures=table_ndigits
)

fig, ax = plt.subplots(figsize=(8, 2))
ax.axis('off')
ax.table(
    cellText=table.values, 
    colLabels=table.columns, 
    rowLabels=[ ' '+str(aux)+' ' for aux in range(1,7)], 
    colWidths = [0.15 for aux in table.columns],
    cellColours = [[ simple_colour_decide(val) for val in row] for row in table.values],
    cellLoc = 'center',
    loc='center'
)
ax.set_title(field_to_show)
fig.show()
#plt.savefig('tabla.png', bbox_inches='tight', pad_inches=0.1)