data_analysis_and_reference_measurements/diode_detector/analyse_and_plot_pulses.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Script for analysing recorded pulses from the diode detector and plotting the 
results in histograms.
The algorithm looks for the negative pusles by evaluating the derivative 
of the wafeform. The falling steep slop must be in a certain range as well as 
the pulse width to reject fake peaks caused by electronic noise.
Additional pulses which are smaller than the minimum threshold set during recording 
are still recognized if they appear in the same waveform that triggered a record.

INSTRUCTIONS:
This analysis script has several sections and is intended to be used in an interactive 
way. Use a python IDE/editor like Spyder3 (preferred) or PyCharm which 
understands the '# <codecell> markers'. This feature provides the same flexibility as 
Jupyter notebooks but keeps all code together in one executable script. 
The plot sections are each geared for certain kinds of measurements. 
If this script is executed as a whole, certain plots will be more usefull 
than others for evaluating a particular datasets.

Following main code sections exist below. 
They should be always executed in order and at least once per new python kernel.
    GENERAL SETTINGS: enbale raw pulse or waveform plots and fruther debug information
    SELECT DATASET: select the data file to be analysed
    PULSE ANALYSIS: recorded waveforms are parsed and filered for proper pulses
    ENERGY CALIBRATION FIT: no further seetings needed

Following visualisation plot sections should be chosen depending on the selected dataset:
    LOW ENERGY RANGE histogram: for KCl, background and other low/non-alpha sources
    FULL ENERGY RANGE histogram: for thorium or uranium stones (e.g. Columbite),
                                 radium watch paint, radon and other alpha sources
    SPECIAL histogram: for direct comparison of an alpha reference measruement 
                       with an AASI simulation. Use with mixed alpha reference dataset.
    
For further customization of plots (axis ranges and histogram resolutions) 
consider individual PLOT SETTINGS sections and section comments.
    
Final sections not necessary for pulse analysis but included for reference:
    ENERGY CALIBRATION PLOT: Plot linear energy calibration with fit parameters 
                             as shown in the paper.
    ALTERNATIVE code: Unused code which evalutes pulse integrals instead of amplitudes.
                             

Expected format for datasets: 
    - MessagePack format files (.msgp) as generated by HTML/js pulse recorder
    - Pandas data frames stored in python's .pkl file format as generated by pulse_recorder.py.


@author: Oliver Keller
@date: July 2019
"""

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import matplotlib.gridspec as gridspec  # for unequal plot boxes
import decimal as D
import msgpack

mpl.rcParams['font.size']=12 #default font size


# GENERAL SETTINGS

SHOW_DETECTED_PULSES = False # will take a long time for large date sets if enabled!
# the following option is only relevant if SHOW_DETECTED_PULSES is set True
OVERLAY_PULSES = False        # shows pulses overlayed and centered on largest amplitude of each pulse

DEBUG = False   # enable for printed debug info on pulse characteristics
DBG_ID = -1     # supply index to enable debug plot of single waveforms, starting from 0. set as -1 to disable


### helper function

def poly1(x, a, b): #1st grade polynom
    return a*x + b

# <codecell>
#
# SELECT DATASET
#
    
# read .msgp files from HTML/js pulse recorder
#file_name = "./data/flight_from-middle-to-landing_GVA-LIS_EJU1445_09-08-2019_11-30.msgp"
file_name = ""

if file_name is not "":
    with open(file_name,'rb') as file:
       msgp = msgpack.unpackb(file.read(),encoding='utf-8')
       df = pd.DataFrame(msgp)
       if not isinstance(df['pulse'][0], (np.ndarray, np.generic)):
           df_new = pd.DataFrame(columns=['pulse'])
           # create numpy arrays
           for i,row in df.iterrows():
               df_new['pulse'].loc[i] = np.asarray(row['pulse'], dtype='int16')
           df['pulse'] = df_new['pulse'] 
       # javascript's Date() saves timestamp formated in UTC and milliseconds
       df['ts'] = pd.to_datetime(df['ts']*1000000)
       df['ts'] = df['ts'].dt.tz_localize(tz='UTC') 
       # convert to desired time zone:
       df['ts'] = df['ts'].dt.tz_convert('Europe/Berlin')
       # make sure column order starts with timestamp
       df = df[['ts','pulse']]
       print(df)
   
# read python pickle files (.pkl) files recorded with pulse_recorder.py
# df = pd.read_pickle("../data_recording_software/data/pulses_2019-08-01_23-05-42___8___0-00.pkl")

###########################################################
# load reference measurements (as discussed in the article)
###########################################################
   
# KCl block, 9.44g ~3x3 cm measured at tickest 1 cm spot in the middle
df = pd.read_pickle("./data/KCL_9-44g_1x3x3cm_touchingdiodecase_pulses_2019-07-08_23-07-18___947___11-47.pkl")

# Columbite stone
# diode case touching large black spot on the stone
# recorded on average at 25 deg C, 964 hPa, 50% humidity
# => dew point 15 deg C, air density 0.00111979 g / (cm^3)
# uncomment all for more than 70 hours of data or use dataset only partially 
#df =  pd.read_pickle("./data/Columbit_diodecase_touching_big_spot_2019-06-27_19-46-14___11613___13-53.pkl")
#df2 = pd.read_pickle("./data/Columbit_diodecase_touching_big_spot_2019-07-01_21-30-08___11145___14-08.pkl")
#df3 = pd.read_pickle("./data/Columbit_diodecase_touching_big_spot_2019-07-02_20-50-51___10128___12-36.pkl")
#df4 = pd.read_pickle("./data/Columbit_diodecase_touching_big_spot_2019-07-03_19-04-13___13515___16-59.pkl")
#df5 = pd.read_pickle("./data/Columbit_diodecase_touching_big_spot_2019-07-04_20-20-16___10433___13-05.pkl")
#df = df.append(df2, ignore_index = True)
#df = df.append(df3, ignore_index = True)
#df = df.append(df4, ignore_index = True)
#df = df.append(df5, ignore_index = True)

# Mixed Alpha Source reference measurment for energy calibration
# estimated 967 hPa, Temp 23 deg C, 40 % Humidity, 
# => dew dew point 12 deg C, air density: 0.001123 [g/cm^3]
#df = pd.read_pickle("./data/mixed_alpha_4236RP_pulses_2019-04-08_17-54-37___27022___0-49.pkl")

# Radium watch hand, an old watch hand painted with radium-based radioluminescent paint
#df = pd.read_pickle("./data/Radium_watch_hand_11mm_pulses_2019-06-27_11-46-55___30032___1-28.pkl")

# background radiation as measured in souterrain office at CERN, Meyrin, Switzerland
#df = pd.read_pickle("./data/office_background_pulses_2019-07-17_22-14-10___48___14-44.pkl")

# loading pulse waveforms into lp array

lp = []

THL = -300 # same as settting in pulse_recorder.py

for i,p in df.iterrows():
    if p.pulse.min() < (THL):
        lp.append(p.pulse)
        
lp = np.asarray(lp[:])      # provide indexes here if leading or trainling pulses should be cut away

#lp=[lp[46]]    # specify index for evaluating only single pulses

# <codecell>

# PULSE ANALYSIS

count=0
loopcnt =0
areas = []
peaks = []
min_alpha_peak = 1243 # used for highlithing beta vs. alpha pulses

min_g = -20 # steepeness of falling edge slope, almost vertical
max_g = -3300 # limit to ignore vertical lines
min_length = 44  # about 0.9 ms
max_length = 120 # about 2.5 ms, tolerates some alpha-pileup
min_skip = min_length # was 25

# arrays storing identified pulse waveforms for later plotting
xpulses = []
ypulses = []

if SHOW_DETECTED_PULSES:
    fig = plt.figure()
    wf = fig.add_subplot(111)
    wf.set_xlim(0,120)
    wf.set_xticks(np.arange(0,120,0.5e-3*48e3), minor=False)
    wf.set_xticks(np.arange(0,120,0.1e-3*48e3), minor=True)
    wf.set_xticklabels(list(map(str, np.arange(0,2.5,0.5))), minor=False)
    #wf.set_xticklabels(list(map(str, np.arange(0.1,0.9,0.1))), minor=True)
    wf.set_xlabel("Time [ms]", fontsize=14)
    wf.xaxis.set_label_coords(0.96, -0.025) #right
    wf.set_ylabel('Audio signal amplitude [arb. unit]', fontsize = 14)
    #wf.set_ylim(-1300,400) # beta pulse range
    wf.set_ylim(-17000,5000) # complete alpha pulse range
    fig.tight_layout() #rect=(0.05,-0.010,1.01,1.02))

if DEBUG:
    fig2 = plt.figure()
    dbg = fig2.add_subplot(111)

gmax=0
for i,y in enumerate(lp[:]):
    dydx=np.gradient(y)
    gy= dydx.min() 
    gx= dydx.argmin() 

    if DEBUG: 
        print(i,"- max. gradient: ", gy, )
        #show waveform
        x = range(len(y))
        dbg.plot(x, y, alpha=0.3)
        dbg.text(x[y.argmin()], y.min(), i) #lable pulse with id

    while True:
        # check if waveform and falling edge starts within +/- THL range and if falling slope is large enough 
        if y[0] > THL and y[0] < np.abs(THL) and y.min() < THL and y[gx] <= np.abs(THL) and gy < min_g and gy > max_g: #and gy > -100:
            # find next trigger point based on gradient
            trigx=np.where(dydx < min_g)[0]
            trigy=dydx[dydx < min_g]
            peakx1=trigx[0]
            # check where pulse goes back up to original trigger level
            crossing_x1 = (y[peakx1+min_length:] > y[peakx1]).argmax() if (y[peakx1+min_length:] > y[peakx1]).any() else -1
            if crossing_x1 > 0:
                peakx2=peakx1+crossing_x1+min_length #because crossing_x1 is offset by min_length!
                diff = np.abs(peakx2 - peakx1)
                peak = y[peakx1:peakx2].min() 
                if DEBUG or i == DBG_ID: #or (peak <= -300 and peak >= -310): 
                    print(i, "- pulse width: ", diff, " max. amplitude: ", peak, "x1/x2: ", peakx1,peakx2)
                if diff >= min_length and diff <= max_length and peak <= THL :
                     area = np.absolute(y[peakx1:peakx2]).sum()
                     areas.append(area)
                     peak = np.absolute(peak) + y[peakx1] #+ THL -> removing THL offset inlcudes smaller pulses!
                     if peak < 0:
                         if DEBUG: print(i,"- peak below THL",peak)
                     peaks.append(peak)
                     ypulse = np.roll(y[peakx1-100:peakx2+100],50-y[peakx1:peakx2].argmin())[130:]
                     if ypulse.size > min_length:
                         #FIXME: why are some empty?
                         xpulses.append(list(range(len(ypulse))))
                         ypulses.append(ypulse)
                     if SHOW_DETECTED_PULSES: #and peak <= 10:
                         x = range(len(y))
                         # wf.plot(y, "black", alpha=0.1)
                         if OVERLAY_PULSES:
                             wf.plot(ypulse, "green", alpha=(1/255)) #alpha=(1/255) for smallest setting, 0.1 otherwise
                             # uncomment below to print index labels next to max pulse amplitude
                             #wf.text(x[y[peakx1:peakx2].argmin()],y[peakx1:peakx2].min(), i) #lable pulse with id
                         else:
                             if peak > min_alpha_peak:
                                 wf.plot(x[peakx1:peakx2],y[peakx1:peakx2], "red") #, alpha=0.1) 
                             else:
                                 wf.plot(x[peakx1:peakx2],y[peakx1:peakx2], "blue") #, alpha=0.1)                    
                     if gy < gmax:
                         gmax=gy
                     count+=1
                else:
                    # can be falling slope of overshoot!
                    if DEBUG or i == DBG_ID:
                        print(i,"- strange pulse: ", peakx1,peakx2)
                        #show waveform
                        #x = range(len(y))
                        #dbg.plot(x, y, "black")
                    peakx2=peakx1+min_skip # shift waveform by min_length for next iteration
                    #raise SystemExit #stop script here if needed
            else:
                if DEBUG:
                   print(i,"- pulse discontinous")
                   # show trigger points
                   #dbg.plot(trigx, trigy, "b+")
                peakx2 = peakx1 + min_skip # shift waveform by min_length for next iteration
                #raise SystemExit

        else:
            # remove some data at the front
            peakx1=0
            remaining = len(y)
            if remaining <  min_length:
                break
            else:
                peakx2=min_skip
        y = np.delete(y, slice(0,peakx2)) #skip processed data
        if len(y) < min_length:
            break # not enough samples left, got to next waveform
        else:
            dydx=np.delete(dydx, slice(0,peakx2))
            # find next falling edge
            gy= dydx.min() 
            gx= dydx.argmin() 
        loopcnt+=1


time_diff = df.iloc[-1,0] - df.iloc[0,0]
time_diff = time_diff.total_seconds() / 60.0
cpm = count/time_diff
# FIXME: in case of concatenation of several datasets (e.g. columbite stone), 
# the calculated overall measurement time period will be wrong
print("detected pulses:",count, "in", round(time_diff), "minutes ->", round(cpm,3), "CPM")
peaks=np.asarray(peaks)

# <codecell>
#
# ENERGY CALIBRATION FIT
# from mixed alpha source reference measurement 
# and threshold confirmation with X-ray machine
#

# reference centroids as estimated from AASI simulation results
ref = np.asarray([33,1300, 3893, 4290, 4636]) # 11mm of air (1.123 kg/m^3 density) between detector and source

# centroids of peak amplitudes estimated from recorded histograms
data_peak = np.asarray([300,2924,8175,8875, 9500]) # lowest value corresponds to threshold used in 33 keV X-ray measurement

# alternative to using max. peak amplitudes: 
# corresponding peak integrals/areas have been estimated here:
# only applied to recorded data in disabled code at the very end
data_area = np.asarray([1,107160,296230,321455, 339771]) 

thr_e_kev = 6    # estimated from threshold measurement with x-ray machine
src_e_kev = 40.5 # results in best fit (reducedchisq ~= 1)
e_kev = [thr_e_kev,src_e_kev,src_e_kev,src_e_kev,src_e_kev]

#http://www.physics.utah.edu/~detar/lessons/python/curve_fit/node1.html

popt_area, pcov_area = curve_fit(poly1, data_area, ref, sigma=e_kev,absolute_sigma=True)
popt_peak, pcov_peak = curve_fit(poly1, data_peak, ref, sigma=e_kev, absolute_sigma=True)

data_fit_area = poly1(data_area,*popt_area)
data_fit_peak = poly1(data_peak,*popt_peak)

perr_area = np.sqrt(np.diag(pcov_area))
perr_peak = np.sqrt(np.diag(pcov_peak))
print("perr area: ", perr_area, ", perr peak: ", perr_peak)
#calculation of residuals
residuals = ref - data_fit_area
ss_res = np.sum(residuals**2)
ss_tot = np.sum((ref - np.mean(ref))**2)
r_squared_a = 1 - (ss_res/ss_tot)

chisq = np.sum((residuals/e_kev)**2)
reducedchisq_area = chisq/float(len(data_area)-2)

print('R squared curve_fit area is: ',(r_squared_a))
print('chi sq.:', chisq, " red. chi sq.:",reducedchisq_area )

residuals = ref - data_fit_peak
ss_res = np.sum(residuals**2)
ss_tot = np.sum((data_peak - np.mean(data_peak))**2)
r_squared_p = 1 - (ss_res/ss_tot)

chisq = np.sum((residuals/e_kev)**2)
reducedchisq = chisq/float(len(data_peak)-2)

print('R squared curve_fit peak is: ',(r_squared_p))
print('chi sq.:', chisq, " red. chi sq.:",reducedchisq )

# <codecell>

# HISTOGRAM PLOT SECTION STARTS HERE

# <codecell>
#
# LOW ENERGY RANGE histogram, use with electron-only spectra 
# as e.g. the KCl data in the paper 
# based on max. PEAK AMPLITUDE from pulses in arb. units
# 
# plotting measured raw data together with secondary energy axis
# 

### PLOT SETTINGS

 # define high end of energy scale in arb. units

# set histogram resolution in arb. units
bin_width = 67/2 # 67 is used in energy calibration with alphas

# limit maximum of displayed energy range
max_x_limit = 1330 #1445 # ~ 0.6 Mev

#################

peaks_kev = poly1(peaks,*popt_peak)

ticks_kev_minor = np.arange(0,6000,50) #minor 0.05 Mev
ticks_kev = np.arange(0,11000,100)

ticks_raw = ((ticks_kev - popt_peak[1]) / popt_peak[0]) #np.asarray([0, 500, 1000, 1500, 2000, 2250, 2500, 2750, 3000, 3250, 3500, 3750, 4000, 4250, 4500, 4750, 5000])
ticks_raw_minor = (ticks_kev_minor - popt_peak[1]) / popt_peak[0] 

fig2 = plt.figure()
h2 = fig2.add_subplot(111)

min_bin = np.round((0 - popt_peak[1]) / popt_peak[0],0)  #np.asarray([0, 500, 1000, 1500, 2000, 2250, 2500, 2750, 3000, 3250, 3500, 3750, 4000, 4250, 4500, 4750, 5000])
min_bin = min_bin.astype(dtype='int')

max_bin = max(peaks)
bins= np.arange(min_bin, max_bin + bin_width, bin_width)

h2.set_xlabel('[MeV]', fontsize = 12)
#h2.xaxis.set_label_coords(0.07, -0.093) #left
#h2.xaxis.set_label_coords(1, -0.098) #right
h2.xaxis.set_label_coords(1.057, -0.09) # right aligned at end
h2.text(-0.001, 0.05, 'Energy',transform=plt.gcf().transFigure, fontsize = 14)

h2.set_ylabel('Counts', fontsize = 14)
h2.yaxis.set_label_coords(-0.10, 0.91)

# plot either histogram or just data points with error bars
(entries,edges,patches) = h2.hist(peaks, color="blue", histtype="step", bins=bins, log=0, linewidth="0.8", label="electron measurement")

# binning need if h2.hist() above is not used (but errobar plot below)
#(entries,edges) = np.histogram(peaks, bins=bins)

h2.set_xlim(min_bin,max_x_limit)

# add secondaty axis with energy calibration

ax2 = h2.twiny()
ax2.set_xticks(ticks_raw, minor=False)
ax2.set_xticks(ticks_raw_minor, minor=True)
ax2.set_xticklabels(list(map(str, (ticks_kev/1000))))
ax2.set_xlim(h2.get_xlim())
ax2.xaxis.set_ticks_position('bottom')
ax2.xaxis.set_label_position('bottom')
ax2.spines['bottom'].set_position(('outward', 20))
ax2.set_xlabel('[arb.unit]', fontsize = 12)
#ax2.xaxis.set_label_coords(0.06, -0.021) #left
#ax2.xaxis.set_label_coords(1.015, -0.025) #right
ax2.xaxis.set_label_coords(1.085, -0.02) #right at end of plot

# show measurement with poisson errors, alternatively uncomment h2.hist() line above
#bin_centers = 0.5 * (edges[:-1] + edges[1:])
#h2.errorbar(bin_centers, entries, yerr=np.sqrt(entries), xerr=edges[1:]-bin_centers, color="black", ecolor='black', elinewidth=0.8, fmt='.', label="measured KCl spectrum")

# read K-40 simulation generated via Allpix^2
# use same binning as in measuremnt plot
# Allpix hit energy data is provided raw and un-binned by default
#sim_kev = pd.read_pickle("./sim/KCl_allpix_sim_791hits.pkl")[0]
#sim_raw = (sim_kev - popt_peak[1]) / popt_peak[0]
#bins= np.arange(min(sim_raw), max(sim_raw) + bin_width, bin_width)
#h2.hist(sim_raw.values, alpha=1, histtype='step',color="red", bins=edges, log=0, linewidth="0.8", label="Allpix$^2$ simulation of ${}^{40}$K decays") #,hatch='\\')


h2.legend()
plt.tight_layout(pad=0.0)

# number of bins should be similar to simulation (minus the 33 kev threshold)  if a comparision is done
print("number of bins:", edges[:-1].size, ", raw bin size:", edges[1]-edges[0], ", first:",edges[0],", last:", edges[-1] )

# <codecell>
#
# FULL ENERGY RANGE histogram, e.g. use for alpha spectrum measurements
# based on max. PEAK AMPLITUDE from pulses in arb. units
# 
# plotting measured raw data together with secondary energy axis

#### PLOT SETTINGS

# adjust maxium range of energy axis (affects binned range, too)
max_bin_kev = 8000 # for e.g. columbite stone, 
#max_bin_kev = 5016 # max. energy in mixed alpha source simulation                   
show_isotope_labels = False # for watch hand measurement, adds labels of Ra226 and its progeny 

# histogram resolution
bin_width = 67*4 # use (67*4) for low statistics data as with columbite stone, otherwise down to 67

# limit upper end of displayed count range
max_y_limit =  600 # e.g. 600 for columbit stone or radium watch hand, -1 to disable

##################

peaks_kev = poly1(peaks,*popt_peak)

ticks_kev = np.arange(0,9000,1000) #major 1 Mev
ticks_kev_minor = np.arange(500,8500,1000) #minor 0.5 Mev


ticks_raw = ((ticks_kev - popt_peak[1]) / popt_peak[0]) 
ticks_raw_minor = (ticks_kev_minor - popt_peak[1]) / popt_peak[0] 

fig2 = plt.figure(figsize=(7, 5))
h2 = fig2.add_subplot(111)

max_bin = np.round((max_bin_kev - popt_peak[1]) / popt_peak[0],0)

bins= np.arange(0, max_bin + bin_width, bin_width)

h2.set_xlabel('[MeV]', fontsize = 12)
#h2.xaxis.set_label_coords(0.06, -0.093) # left
#h2.xaxis.set_label_coords(1.067, -0.088) # right
h2.xaxis.set_label_coords(1.081, -0.084) # right
h2.text(0.02, 0.06, 'Energy',transform=plt.gcf().transFigure,fontsize = 12)

h2.set_ylabel('Counts', fontsize = 12)
h2.yaxis.set_label_coords(-0.1, 0.91)

(entries,edges,patches) = h2.hist(peaks, color="blue", histtype="step", bins=bins, log=0, linewidth="0.8", label="alpha measurement")

h2.set_xlim(235,max_bin)           # from  0 keV
#h2.set_xlim(data_peak[0],max_bin) # from 33 keV threshold
if max_y_limit > 0:
    h2.set_ylim(0,max_y_limit) # zoom in on y axis if necessary


x_bounds = h2.get_xlim()

# uncomment this block to show centroid lines in mixed alpha measurement 
# with annotation/isotope labels
#h2.axvline(data_peak[1], linewidth=1, linestyle=":", color="grey")
#h2.axvline(data_peak[2],linewidth=1, linestyle=":", color="grey")
#h2.axvline(data_peak[3],linewidth=1, linestyle=":", color="grey")
#h2.axvline(data_peak[4],linewidth=1, linestyle=":", color="grey")
#h2.annotate(s='${}^{148}$Gd', xy =(((data_peak[1]-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.35), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
#h2.annotate(s='${}^{239}$Pu', xy =(((data_peak[2]-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.943), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
#h2.annotate(s='${}^{241}$Am', xy =(((data_peak[3]-x_bounds[0])/(x_bounds[1]-x_bounds[0]))+0.005,0.76), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
#h2.annotate(s='${}^{244}$Cm', xy =(((data_peak[4]-x_bounds[0])/(x_bounds[1]-x_bounds[0]))+0.007,0.46), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')


if show_isotope_labels:
    #  peak labels for radium & its progeny in watch hand measurement
    h2.annotate(s='${}^{226}$Ra', xy =(((6800-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.55), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
    h2.annotate(s='${}^{222}$Rn,\n${}^{210}$Po', xy =(((8600-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.45), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
    h2.annotate(s='${}^{218}$Po', xy =(((9900-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.32), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
    h2.annotate(s='${}^{214}$Po', xy =(((13300-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.3), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')

# add secondaty axis with energy calibration

ax2 = h2.twiny()
ax2.set_xticks(ticks_raw, minor=False)
ax2.set_xticks(ticks_raw_minor, minor=True)
ax2.set_xticklabels(list(map(str, np.rint(ticks_kev/1000).astype(np.int32))))
ax2.set_xlim(h2.get_xlim())
ax2.xaxis.set_ticks_position('bottom')
ax2.xaxis.set_label_position('bottom')
ax2.spines['bottom'].set_position(('outward', 20))
ax2.set_xlabel('[arb. unit]', fontsize = 12)
#ax2.xaxis.set_label_coords(-0.01, -0.025) #left
#ax2.xaxis.set_label_coords(1.095, -0.023) #right
ax2.xaxis.set_label_coords(1.115, -0.02) #right

plt.tight_layout(pad=0.5)

# number of bins should be similar to simulation (minus the 33 kev threshold)  if a comparision is done
print("number of bins:", edges[:-1].size, ", raw bin size:", edges[1]-edges[0], ", first:",edges[0],", last:", edges[-1] )

# <codecell>
#
# SPECIAL histogram for comparing alpha simulation with measurement in one plot
# Main axis is keV, second axis is in arb. pulse amplitude units,
# usefull for comparing overlapp of centroid lines.
# The AASI simulation data is used as is, including its binning.
# the pulse measurement is shown with applied energy calibration.
# This way, both can be compared using the same energy binning in keV.
# 

### PLOT SETTINGS

bin_width_kev = 33 #in keV, must be same as in the simulation settings
max_bin_kev = 5500 #in keV, defines high end of energy axis

#################

peaks_kev = poly1(peaks,*popt_peak)

ticks_kev = np.arange(0,6000,1000) #major 1 Mev
ticks_kev = ticks_kev.astype(dtype='int')
ticks_raw_range = np.arange(min(peaks_kev),6000,1000) # in major 1 Mev scale
ticks_kev_minor = np.arange(500,6000,1000) #minor 0.5 Mev

ticks_raw = np.round((ticks_raw_range - popt_peak[1]) / popt_peak[0],0)  #np.asarray([0, 500, 1000, 1500, 2000, 2250, 2500, 2750, 3000, 3250, 3500, 3750, 4000, 4250, 4500, 4750, 5000])
ticks_raw = ticks_raw.astype(dtype='int')

fig2 = plt.figure()
h2 = fig2.add_subplot(111)

bins= np.arange(min(peaks_kev), max(peaks_kev) + bin_width_kev, bin_width_kev)

h2.set_xlabel('Energy     [MeV]', fontsize = 10)
h2.xaxis.set_label_coords(0., -0.021)

h2.set_ylabel('Counts/{0:0.0f}  keV'.format(bin_width_kev), fontsize = 10)
h2.yaxis.set_label_coords(-0.10, 0.87)


(entries,edges,patches) = h2.hist(peaks_kev, color="blue", histtype="step", bins=bins, log=0, label="measurement")

ax2 = h2.twiny()
ax2.set_xticklabels(list(map(str, ticks_raw)))
ax2.xaxis.set_ticks_position('bottom')
ax2.xaxis.set_label_position('bottom')
ax2.spines['bottom'].set_position(('outward', 20))
ax2.set_xlabel('[arb.unit]', fontsize = 10)
ax2.xaxis.set_label_coords(0.09, -0.093) #left
h2.axvline(data_fit_peak[1], linewidth=0.8, linestyle=":", color="grey")
h2.axvline(data_fit_peak[2],linewidth=0.8, linestyle=":", color="grey")
h2.axvline(data_fit_peak[3],linewidth=0.8, linestyle=":", color="grey")
h2.axvline(data_fit_peak[4],linewidth=0.8, linestyle=":", color="grey")
#h2.xaxis.set_label_coords(0.90, -0.095) #right

# read AASI simulation
# data is already binned!
sim_mev = pd.read_csv("./sim/sum_of_Pu-Am-Cm.txt", skiprows=0, sep='\t' , converters={'Energy': D.Decimal, 'Counts': D.Decimal}, engine='c', skipinitialspace= True, float_precision='round_trip')
sim2_mev = pd.read_csv("./sim/Gd_148_3084000.txt", skiprows=0, sep='\t' , converters={'Energy': D.Decimal, 'Counts': D.Decimal}, engine='c', skipinitialspace= True, float_precision='round_trip')

sim_mev['Energy'] = sim_mev['Energy'].astype(dtype='float128')
sim_mev['Counts'] = sim_mev['Counts'].astype(dtype='int')
sim2_mev['Counts'] = sim2_mev['Counts'].astype(dtype='int')
sim_mev['Counts']=sim_mev['Counts'].add(sim2_mev['Counts'], fill_value=0)
sim_mev['Energy']=sim_mev['Energy']*1000
sim_mev_edges =  np.insert(sim_mev['Energy'].values,0,0)
h2.hist(sim_mev_edges[:-1], alpha=1, histtype='step',color="red", bins=sim_mev_edges, weights=sim_mev['Counts'].values,log=0, linewidth="0.8", label="simulation") #,hatch='\\')
h2.set_xlim(min(peaks_kev),max_bin_kev)
x_bounds = h2.get_xlim()
h2.annotate(s='${}^{148}$Gd', xy =(((data_fit_peak[1]-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.01), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
h2.annotate(s='${}^{239}$Pu', xy =(((data_fit_peak[2]-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.01), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
h2.annotate(s='${}^{241}$Am', xy =(((data_fit_peak[3]-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.07), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')
h2.annotate(s='${}^{244}$Cm', xy =(((data_fit_peak[4]-x_bounds[0])/(x_bounds[1]-x_bounds[0])),0.01), xycoords='axes fraction', verticalalignment='right', horizontalalignment='center')

h2.legend()

plt.tight_layout(h_pad=0.0)


# <codecell>

#  ENERGY CALIBRATION PLOT based on max. PEAK AMPLITUDE

gs = gridspec.GridSpec(2, 1, height_ratios=[6, 2])
fig3 = plt.figure()
l = fig3.add_subplot(gs[0])
l.plot(data_peak,data_fit_peak,'red')
l.scatter(data_peak,ref,15,color='none',edgecolor="black",marker='o')
resid = ref - data_fit_peak
#sd_a = 3400 #6300
#offset = poly1(0.0,*popt_area)
#sd = [734,sd_a,sd_a,sd_a,sd_a] #3741
#sd = 6000
l.set_xlabel('Energy\n[keV]', fontsize = 14)
l.xaxis.set_label_coords(-0.075, 1.03)

#l.text(0.05, 0.85, '$\chi^2$ = {0:0.2f}, $\chi_r^2$ = {1:0.2f}'.format(chisq,reducedchisq),transform = l.transAxes)
l.text(0.05, 0.85, '$\sigma$ = {0:0.1f} keV, FWHM = {1:0.0f} keV'.format(src_e_kev,src_e_kev*2.355), transform = l.transAxes)
l.text(0.05, 0.75, 'threshold = {0:0.0f} $\pm$ {1:0.1f} keV'.format(ref[0],thr_e_kev), transform = l.transAxes)
l.text(0.05, 0.65, '$\chi_r^2$ = {0:0.3f}'.format(reducedchisq),transform = l.transAxes)
#errKev = (sd_a - popt_area[1])/popt_area[0]
#thr_errKev = (sd[0] - popt_area[1])/popt_area[0]
l.text(0.05, 0.55, '$R^2$ = {0:0.3f}'.format(r_squared_a), transform = l.transAxes)

#l.text(0.05, 0.45, '$\pm thr. error$ = {0:0.0f} keV'.format(thr_e_kev), transform = l.transAxes)


err = fig3.add_subplot(gs[1])
err.errorbar(data_peak, resid, yerr=e_kev, fmt='o',color='red', ms=2,linewidth=0.8, ecolor='black')
err.set_xlabel('Pulse amplitude [arb. unit]',fontsize = 14)
err.set_ylim(100,-100)
plt.tight_layout(pad=0.3)

# <codecell>

# persistance plot, overlaying many waveforms in hex bins

fig = plt.figure() #figsize=(7, 5.5))
wf = fig.add_subplot(111)
wf.set_xlim(0,120)
wf.set_xticks(np.arange(0,120,0.5e-3*48e3), minor=False)
wf.set_xticks(np.arange(0,120,0.1e-3*48e3), minor=True)
wf.set_xticklabels(list(map(str, np.arange(0,2.5,0.5))), minor=False)
#wf.set_xticklabels(list(map(str, np.arange(0.1,0.9,0.1))), minor=True)
wf.set_xlabel("Time [ms]", fontsize=14)
wf.xaxis.set_label_coords(0.96, -0.025) #right
wf.set_ylabel('Audio signal amplitude [arb. unit]', fontsize = 14)
#wf.set_ylim(-1300,400) # beta pulse range
wf.set_ylim(-17000,5000) # complete alpha pulse range
fig.tight_layout() #rect=(0.05,-0.010,1.01,1.02))
hb = wf.hexbin(np.ravel(xpulses), np.asarray(ypulses).ravel(), gridsize=188, cmap='Greens', mincnt=1, vmax=100)
#cb = fig.colorbar(hb, ax=wf)

# <codecell>

# persistance plot, overlaying many waveforms in regular square bins (pixels)

fig = plt.figure(figsize=(6.5, 5.5))
wf = fig.add_subplot(111)
wf.set_xlim(0,120)
wf.set_xticks(np.arange(0,120,0.5e-3*48e3), minor=False)
wf.set_xticks(np.arange(0,120,0.1e-3*48e3), minor=True)
wf.set_xticklabels(list(map(str, np.arange(0,2.5,0.5))), minor=False)
#wf.set_xticklabels(list(map(str, np.arange(0.1,0.9,0.1))), minor=True)
wf.set_xlabel("Time [ms]", fontsize=16)
wf.xaxis.set_label_coords(0.95, -0.015) #right
wf.set_ylabel('Audio signal amplitude [arb. unit]', fontsize = 16)

#wf.set_ylim(-17000,6000) # complete alpha pulse range
#wf.set_yticklabels(list(map(str, np.arange(-17000,6000,1000))), minor=False)
#wf.set_ylim(-1300,400) # beta pulse range

#hb = wf.hexbin(xpulses, ypulses, gridsize=354, cmap='Greens', mincnt=1, vmax=100)
#cb = fig.colorbar(hb, ax=wf)

length = max(map(len, xpulses))
xpulses_n=np.array([np.append(xi,[0]*(length-len(xi))) for xi in xpulses], dtype=np.int)
length = max(map(len, ypulses))
ypulses_n=np.array([np.append(yi,[0]*(length-len(yi))) for yi in ypulses], dtype=np.int)


#wf.plot(ypulses_n[23])

## <codecell>

#xpulses_n=np.asarray([x for x in xpulses if len(x) > 0 ])
#ypulses_n=np.asarray([x for x in ypulses if len(x) > 0] )
#xpulses_n=np.array(xpulses)
#ypulses_n=np.array(ypulses)


divx=1
divy=180

xmin,xmax = min([i.min() for i in xpulses_n]), max([i.max() for i in xpulses_n])
ymin,ymax = min([i.min() for i in ypulses_n]), max([i.max() for i in ypulses_n])

xsize = (np.round((xmax-xmin)/divx)).astype(int)
ysize = (np.round((ymax-ymin)/divy)).astype(int)

wf.set_ylim(0,(4000-ymin)/divy)
wf.set_yticks(np.arange((-ymin-15000)/divy,ysize,2500/divy), minor=False)
wf.set_yticklabels(list(map(str, np.arange(-15000,4001,2500))), minor=False)
fig.tight_layout(pad=0.1,h_pad=0) #rect=(0.05,-0.010,1.01,1.02))


#xsize,ysize  = (xmax-xmin),(ymax-ymin)


im = np.zeros((xsize+1,ysize+1))
def addIm(im,x,y):
    for i,j in zip(x,y):
        #try:
            im[i,j] = im[i,j]+1
        #except IndexError:
        #    print(i,j)
    return im

for i,val in enumerate(xpulses_n[:]):
    #xo = (np.round((xpulses_n[i]-xmin)/xsize)).astype(int)
    yo = (np.round((ypulses_n[i]-ymin)/divy)).astype(int)
    xo = (xpulses_n[i]-xmin)
    #yo = (ypulses_n[i]-ymin)/ysize
    #im[xo,yo] = im[xo,yo]+1
    im = addIm(im,xo,yo)
    #im = addIm(im,xpulses_n[i],ypulses_n[i]-ymin)
im = np.ma.masked_array(im,mask=(im==0))

wf.imshow( im.T,interpolation='nearest',origin="lower",cmap="Greens", vmax=250)#  extent=([x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]]))

#x_edges = np.arange(xsize)
#y_edges = np.arange(ysize)

#bin_values,_,__ = np.histogram2d(np.ravel(xpulses_n),np.ravel(ypulses_n),bins=(x_edges, y_edges) )
#X, Y = np.meshgrid(x_edges,y_edges)

#wf.pcolormesh(X,Y, bin_values.T, cmap="Greens", vmax=100)
#hb = wf.hexbin(np.ravel(xpulses_n), np.asarray(ypulses_n).ravel(), gridsize=(188,188*2), cmap='Greens', mincnt=1, vmax=20)


#wf.hist2d(im[:,0],im[:,1],bins=(x_edges, y_edges),vmax=3)
#wf.scatter(ypulse_sum,range(len(ypulse_sum)))
          
# <codecell>

# ALTERNATIVE code which evaluates the pulse integrals instead of amplitudes
# seems more affected by pile-up of pulses and is therefore less accurate.
# This is old code only here for reference and therefore disabled.
# The fit result is much worse, requires larger sigma value to converge.


if 0:
    #
    #   ENERGY CALIBRATION FIT based on PULSE INTEGRAL (=area)
    #
    gs = gridspec.GridSpec(2, 1, height_ratios=[6, 2])
    fig3 = plt.figure()
    l = fig3.add_subplot(gs[0])
    l.plot(data_area,data_fit_area,'red')
    l.scatter(data_area,ref,15,color='none',edgecolor="black",marker='o')
    resid = ref - data_fit_area
    #sd_a = 3400 #6300
    #offset = poly1(0.0,*popt_area)
    #sd = [734,sd_a,sd_a,sd_a,sd_a] #3741
    #sd = 6000
    l.set_xlabel('Energy\n[keV]', fontsize = 10)
    l.xaxis.set_label_coords(-0.075, 1.0)
    
    #l.text(0.05, 0.85, '$\chi^2$ = {0:0.2f}, $\chi_r^2$ = {1:0.2f}'.format(chisq,reducedchisq),transform = l.transAxes)
    l.text(0.05, 0.85, '$\sigma$ = {0:0.1f} keV, FWHM = {1:0.0f} keV'.format(src_e_kev,src_e_kev*2.355), transform = l.transAxes)
    l.text(0.05, 0.75, 'threshold = {0:0.0f} $\pm$ {1:0.0f} keV'.format(ref[0],thr_e_kev), transform = l.transAxes)
    l.text(0.05, 0.65, '$\chi_r^2$ = {0:0.3f}'.format(reducedchisq_area),transform = l.transAxes)
    #errKev = (sd_a - popt_area[1])/popt_area[0]
    #thr_errKev = (sd[0] - popt_area[1])/popt_area[0]
    l.text(0.05, 0.55, '$R^2$ = {0:0.3f}'.format(r_squared_a), transform = l.transAxes)
    
    #l.text(0.05, 0.45, '$\pm thr. error$ = {0:0.0f} keV'.format(thr_e_kev), transform = l.transAxes)
    
    
    err = fig3.add_subplot(gs[1])
    err.errorbar(data_area, resid, yerr=e_kev, fmt='o',color='red', ms=2,ecolor='black')
    err.set_xlabel('pulse integrals [arb. units]')
    err.set_ylim(190,-190)
    
    #
    # histogram plot using energy calibration based on PULSE INEGTRAL 
    #
    
    ticks_kev = np.arange(0,11000,1000)
    ticks = (ticks_kev - popt_area[1]) / popt_area[0]  #np.asarray([0, 500, 1000, 1500, 2000, 2250, 2500, 2750, 3000, 3250, 3500, 3750, 4000, 4250, 4500, 4750, 5000])
    ticks_m = (np.arange(500,10000,1000)  - popt_area[1]) / popt_area[0]
     
    fig2 = plt.figure()
    h1 = fig2.add_subplot(111)    
    bin_width = 3200
    bins= np.arange(min(areas), max(areas)+ bin_width, bin_width)
    (n, edges, patches) = h1.hist(areas, bins=bins, log=0) #340 seems ok for mixed alpha, 170 for columbit
    h1.set_xlim(min(areas),max(areas)/2)
    h1.set_xlabel('arb. unit', fontsize = 10)
    h1.xaxis.set_label_coords(1.05, -0.02)
    
    print(len(bins))
    
    # 2nd axis
    ax2 = h1.twiny()
    
    ax2.set_xticks(ticks, minor=False)
    ax2.set_xticks(ticks_m, minor=True)
    #ax2.set_xlim(ticks[0],ticks[-1])
    ax2.set_xlim(h1.get_xlim())
    ax2.set_xticklabels(list(map(str, ticks_kev/1000)))
    #ax2.xaxis.set_minor_locator(ticker.MultipleLocator(50000))
    ax2.xaxis.set_ticks_position('bottom')
    ax2.xaxis.set_label_position('bottom')
    ax2.spines['bottom'].set_position(('outward', 20))
    ax2.set_xlabel('MeV', fontsize = 10)
    ax2.xaxis.set_label_coords(1.05, -0.1)