Implemented hit selection, events scatter, ion heatmap, electrons time of flight and ions time of flight

Additions:
+ merging multiple runs
+ spatial selection view
+ calibration choice of lines with corresponding zoomed-in views
+ calibration curve fit and apply with mq column

# Imports and functions

In [4]:
import pandas as pd
import xarray as xr
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
#import ipywidgets as widgets
#from IPython.display import display

In [5]:
def read(runid):
    'Read the preprocessed data of run with ID runid saved in the h5 file with a corresponding name'
    'Outputs dataframes per event, per pulse, and xarrays etof, pnccd in that order'
    
    filename = '../preprocess/datarun' + str(runid) + '.h5'
    
    dfevent = pd.read_hdf(filename, 'dfevent')
    dfpulse = pd.read_hdf(filename, 'dfpulse')
    
    etof = xr.open_dataarray(filename, group="etof")
    pnccd = xr.open_dataarray(filename, group="pnccd")
    
    return dfevent, dfpulse, etof, pnccd



def events_selection(runs,lower_threshold,upper_threshold):
    'Reads one or multiple runs from h5 files'
    'Makes a pulse selection based on the number of events per pulse between the defined thresholds'
    'If multiple runs are passed, will merge the runs, once hit selected'
    
    
    dataframes = dict()
    
    for run in runs:
        
        dfevent, dfpulse, etof, pnccd = read(run)
        
        selected_dfpulse = dfpulse[lower_threshold < dfpulse.nevents_pulse][dfpulse.nevents_pulse < upper_threshold]
        selected_dfevent = dfevent[dfevent.pulseId.isin(selected_dfpulse.pulseId)]
        selected_etof = etof.sel(pulseId=etof.coords['pulseId'].isin(selected_dfpulse.pulseId))
        
        dataframes[run] = selected_dfevent, selected_dfpulse, selected_etof
        
        plt.figure()
        plt.scatter(dfpulse.pulseId,dfpulse.nevents_pulse,label='All pulses')
        plt.scatter(selected_dfpulse.pulseId,selected_dfpulse.nevents_pulse,c='r',label='Selected pulses')
        plt.xlabel('Pulse ID')
        plt.ylabel('Number of events per pulse')
        plt.legend()
        plt.title(f'Events per pulse with respect to pulse ID for run {run}')
        plt.show()

        
    selected_dfevents = list()
    selected_dfpulses = list()
    selected_etofs = list()
        
    for key, values in dataframes.items():
        
        selected_dfevents.append(values[0])
        selected_dfpulses.append(values[1])
        selected_etofs.append(values[2])
        
    
    merged_dfevent = pd.concat(selected_dfevents)
    merged_dfevent.reset_index(drop=True, inplace=True)
    
    merged_dfpulse = pd.concat(selected_dfpulses)
    merged_dfpulse.reset_index(drop=True, inplace=True)
    
    merged_etof = xr.concat(selected_etofs, dim='pulseId')
    
    
    print(f"Number of pulses selected across {len(runs)} run(s): {len(merged_dfpulse)}")

    
    return merged_dfevent, merged_dfpulse, merged_etof



def heatmap(dfevent):
    'Creates heatmap of the ions hits, based on a dfevent dataframe'
    
    counts_df = dfevent.groupby(['x', 'y']).size().reset_index(name='count')
    heatmap_data = counts_df.pivot(index='y', columns='x', values='count')
    
    plt.figure()
    ax = sns.heatmap(heatmap_data, cmap='viridis',cbar_kws={'label': 'Number of events'})
    plt.title('Ion heatmap')
    plt.show()
    

    
def ion_tof(dfevent):
    'Plots ion time of flight data using dfevent dataframe'
    
    hist, bin_edges = np.histogram(dfevent.tof, bins=250000)
    hist1 = hist[:1500]
    bin_edges1 = bin_edges[:1501]
    
    plt.figure()
    plt.plot(bin_edges1[:-1], hist1)
    plt.xlabel('Time of flight (s)')
    plt.ylabel('Number of hits per bin')
    plt.title('Ions time of flight')
    plt.show()   
    
    
    
def e_tof(etof):
    'Plots electron time of flight data using etof xarray data'
    
    TIME_BETWEEN_PULSES = 3.54462e-6
    CHANNELS_PER_PULSE = 14080
    channel_time = TIME_BETWEEN_PULSES/CHANNELS_PER_PULSE
    
    xaxis = np.arange(14080)*channel_time
    avg_selected_etof = -np.mean(etof, axis=0)
    
    plt.figure()
    plt.plot(xaxis,avg_selected_etof/max(avg_selected_etof))
    plt.xlabel('Time of flight (s)')
    plt.ylabel('Normalized signal')
    plt.title('Electrons time of flight')
    plt.show()



def events_selection_plots(runs,lower_threshold,upper_threshold):
    'Runs functions events_selection, heatmap, e_tof, ion_tof'
    
    selected_dfevent, selected_dfpulse, selected_etof = events_selection(runs,lower_threshold,upper_threshold)
    heatmap(selected_dfevent)
    ion_tof(selected_dfevent)
    e_tof(selected_etof)
    
    return selected_dfevent, selected_dfpulse, selected_etof



def heatmap_with_zones(dfevent,zones):
    'Creates heatmap of the ions hits, based on a dfevent dataframe'
    'Draws a rectangle around zones defined by a list of tuples where each tuple represents a tilted zone (xstart, ystart, width, height, angle in degrees)'
    
    counts_df = dfevent.groupby(['x', 'y']).size().reset_index(name='count')
    heatmap_data = counts_df.pivot(index='y', columns='x', values='count')
    
    plt.figure()
    ax = sns.heatmap(heatmap_data, cmap='viridis', cbar_kws={'label': 'Number of events'})
    
    xlim = int(ax.get_xticklabels()[0].get_text())
    ylim = int(ax.get_yticklabels()[0].get_text())

    for zone in zones:
        x, y, width, height, angle = zone
        x_adjusted = x - xlim
        y_adjusted = y - ylim
        
        rect = plt.Rectangle((x_adjusted, y_adjusted), width, height, fill=False, edgecolor='red', lw=1, angle=angle)
        ax.add_patch(rect)
    
    plt.title('Ion heatmap')
    plt.show()
    
        
    
def square_spatial_ion_selection(dfevent,dfpulse,etof,zone):
    'Square selection from the heatmap using spatial coordinates'
    'Zone is a tuple representing a zone (xstart, ystart, width, height, angle in degrees)' 'No tilt implemented for now, use angle=0'
    'Returns spatially selected dfevent,dfpulse,etof'
    
    heatmap_with_zones(dfevent,[zone])
    
    xstart,ystart,width,height,angle = zone
    
    spatial_selected_dfevent = dfevent[dfevent.x > xstart][dfevent.x < xstart+width][dfevent.y > ystart][dfevent.y < ystart+height]
    spatial_selected_dfpulse = dfpulse[dfpulse.pulseId.isin(spatial_selected_dfevent.pulseId)]
    spatial_selected_etof = etof.sel(pulseId=etof.coords['pulseId'].isin(spatial_selected_dfevent.pulseId))
    
    return spatial_selected_dfevent,spatial_selected_dfpulse,spatial_selected_etof



def big_ion_tof(dfevent):
    'Plots widget of big ion time of flight data using dfevent dataframe'
    
    hist, bin_edges = np.histogram(dfevent.tof, bins=250000)
    hist1 = hist[:1500]
    bin_edges1 = bin_edges[:1501]
    
    plt.figure(figsize=(18, 8))
    plt.plot(bin_edges1[:-1], hist1, c='g')
    plt.xlabel('Time of flight (s)')
    plt.ylabel('Number of hits per bin')
    plt.title('Ions time of flight')
    plt.show()
    
 
    
def autoscale_y(ax,margin=0.1):
    """This function rescales the y-axis based on the data that is visible given the current xlim of the axis.
    ax -- a matplotlib axes object
    margin -- the fraction of the total height of the y-data to pad the upper and lower ylims"""

    import numpy as np

    def get_bottom_top(line):
        xd = line.get_xdata()
        yd = line.get_ydata()
        lo,hi = ax.get_xlim()
        y_displayed = yd[((xd>lo) & (xd<hi))]
        h = np.max(y_displayed) - np.min(y_displayed)
        bot = np.min(y_displayed)-margin*h
        top = np.max(y_displayed)+margin*h
        return bot,top

    lines = ax.get_lines()
    bot,top = np.inf, -np.inf

    for line in lines:
        new_bot, new_top = get_bottom_top(line)
        if new_bot < bot: bot = new_bot
        if new_top > top: top = new_top

    ax.set_ylim(bot,top)
    
    
    
def zoomed_ion_tof(dfevent,anchor):
    'Plots zoom around anchor point of ion time of flight data using dfevent dataframe'
    
    hist, bin_edges = np.histogram(dfevent.tof, bins=250000)
    hist1 = hist[:1500]
    bin_edges1 = bin_edges[:1501]
    
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(bin_edges1[:-1], hist1, c='g')
    plt.xlabel('Time of flight (s)')
    plt.ylabel('Number of hits per bin')
    plt.title('Ions time of flight')
    plt.xlim(anchor-1e-7,anchor+1e-7)
    autoscale_y(ax)
    ax.axvline(x=anchor, color='black', linestyle='--')
    plt.show()
    
    
    
def power_law(x, a, b):
    'Calibration fit power law'
    return a * x**b



def compute_calibration(calibration_lines):

    # Corresponding m/q argon values
    mq_lines = [40,20,40/3,40/4,40/5]
    
    # Initial guesses for parameters a and b
    initial_guess = [1.6e13, 2]

    # Perform the curve fitting
    params, covariance = curve_fit(power_law, calibration_lines, mq_lines, p0=initial_guess, maxfev=10000)

    # Extract the fitted values for a and b
    a_fit, b_fit = params

    print(f"The fit looks as follows: m/q = {a_fit:.2e} * tof^{b_fit:.2f}")
    
    return a_fit, b_fit



def calibrate(backgrd_dfevent):
    'Computes calibration by least mean squares using backgrd_dfevent'
    'Uses user input to compute fit based on displayed plots'
    
    # Show a large widget ion tof
    big_ion_tof(backgrd_dfevent)

    done = False
    while not done:

        # Ask for five numbers input
        anchors = []
        for i in range(5):
            value = input(f"Enter value Ar{i + 1}: ")
            try:
                anchors.append(float(value))
            except ValueError:
                print("Invalid input. Please enter a number.")

        # Show five additional plots based on inputs
        %matplotlib inline
        for anchor in anchors:
            zoomed_ion_tof(backgrd_dfevent,anchor)

        # Ask if the user is done
        done_response = input("Are you done? (y/n): ").strip().lower()
        if done_response == 'y':
            done = True
    
    # Compute calibration fit
    a_fit, b_fit = compute_calibration(anchors)
    
    return a_fit, b_fit



def apply_calibration(dfevents,a_fit,b_fit):
    'Applies calibration to each dfevent of the list of dfevents and outputs calibrated_dfevents list of dataframes with m/q column'
        
    calibrated_dfevents = list()
        
    for dfevent in dfevents:
        dfevent['mq'] = a_fit * dfevent.tof ** b_fit
        calibrated_dfevents.append(dfevent)
        
    return calibrated_dfevents



def mq_selection(calibrated_dfevent,dfpulse,etof,lower_mq,upper_mq):
    'Selects based on m/q values. Need to input calibrated_dfevent! Returns m/q selected dfevent,dfpulse,etof.'
    
    mqselected_dfevent = calibrated_dfevent[lower_mq < calibrated_dfevent.mq][calibrated_dfevent.mq < upper_mq]
    mqselected_dfpulse = dfpulse[dfpulse.pulseId.isin(mqselected_dfevent.pulseId)]
    mqselected_etof = etof.sel(pulseId=etof.coords['pulseId'].isin(mqselected_dfevent.pulseId))
    
    return mqselected_dfevent,mqselected_dfpulse,mqselected_etof

# Analysis

In [6]:
RUNID = [389,390]

In [None]:
%matplotlib inline
LOWER_BOUND = 2000
UPPER_BOUND = 8000
selected_dfevent, selected_dfpulse, selected_etof = events_selection_plots(RUNID,LOWER_BOUND,UPPER_BOUND)

In [None]:
LOWER_BACKGRD_BOUND = 20
UPPER_BACKGRD_BOUND = 40

backgrd_dfevent, backgrd_dfpulse, backgrd_etof = events_selection([390],LOWER_BACKGRD_BOUND,UPPER_BACKGRD_BOUND)

In [None]:
X = 127
Y = 117
WIDTH = 13
HEIGHT = 10
ZONE = [X,Y,WIDTH,HEIGHT,0]

In [None]:
### To test where your square selection is, use:
heatmap_with_zones(selected_dfevent,[ZONE])

In [None]:
spatial_bkgrd_dfevent,spatial_bkgrd_dfpulse,spatial_bkgrd_etof = square_spatial_ion_selection(backgrd_dfevent,backgrd_dfpulse,backgrd_etof,ZONE)

In [None]:
%matplotlib widget
big_ion_tof(spatial_bkgrd_dfevent)

In [None]:
#CALIBRATION_LINES = [1.312e-6, 9.26e-7, 7.56e-7, 6.65e-7, 5.97e-7]

In [None]:
%matplotlib inline
a_fit, b_fit = calibrate(spatial_bkgrd_dfevent)

In [None]:
a_fit,b_fit

In [None]:
calibrated_selected_dfevent, calibrated_backgrd_dfevent = apply_calibration([selected_dfevent,backgrd_dfevent],a_fit,b_fit) 

In [None]:
calibrated_selected_dfevent[calibrated_selected_dfevent.mq]

In [None]:
MQ_LINES = [40,20,40/3,40/4,40/5]

In [None]:
%matplotlib widget
plt.figure(figsize=(20, 10))
histselected, bin_edgesselected = np.histogram(calibrated_selected_dfevent.mq, bins=np.linspace(0,200,10000),range=(0,200))
histgrd, bin_edgesgrd = np.histogram(calibrated_backgrd_dfevent.mq, bins=np.linspace(0,200,10000),range=(0,200))
plt.plot(bin_edgesselected[:-1], histselected/max(histselected), linewidth = 1, c='b')
plt.plot(bin_edgesgrd[:-1], histgrd/max(histgrd), linewidth = 1, c='g')
plt.vlines(MQ_LINES,0,1,colors='black')
plt.show()

In [None]:
%matplotlib inline
LOWER_MQ = .25
UPPER_MQ = 3

mqselected_dfevent,mqselected_dfpulse,mqselected_etof = mq_selection(calibrated_selected_dfevent,selected_dfpulse,selected_etof,LOWER_MQ,UPPER_MQ)
mqselected_backgrd_dfevent,mqselected_backgrd_dfpulse,mqselected_backgrd_etof = mq_selection(calibrated_backgrd_dfevent,backgrd_dfpulse,backgrd_etof,LOWER_MQ,UPPER_MQ)

heatmap(mqselected_dfevent)
heatmap(mqselected_backgrd_dfevent)

TIME_BETWEEN_PULSES = 3.54462e-6
CHANNELS_PER_PULSE = 14080
channel_time = TIME_BETWEEN_PULSES/CHANNELS_PER_PULSE
    
xaxis = np.arange(14080)*channel_time
avg_mqselected_etof = -np.mean(mqselected_etof, axis=0)
avg_mqbackgrd_etof = -np.mean(mqselected_backgrd_etof, axis=0)
max_mqselected_etof = max(avg_mqselected_etof)
max_mqbackgrd_etof = max(avg_mqbackgrd_etof)

plt.figure()
plt.plot(xaxis,avg_mqselected_etof/max_mqselected_etof,c='r')
plt.plot(xaxis,avg_mqbackgrd_etof/max_mqbackgrd_etof,c='g')
plt.xlabel('Time of flight (s)')
plt.ylabel('Normalized signal')
plt.title('Electrons time of flight')
plt.show()

In [None]:
LOWER_MQ = 20.5
UPPER_MQ = 21.5

mqselected_dfevent,mqselected_dfpulse,mqselected_etof = mq_selection(calibrated_selected_dfevent,selected_dfpulse,selected_etof,LOWER_MQ,UPPER_MQ)
heatmap(mqselected_dfevent)

In [None]:
LOWER_MQ = 60
UPPER_MQ = 100

mqselected_dfevent1,mqselected_dfpulse1,mqselected_etof1 = mq_selection(calibrated_selected_dfevent,selected_dfpulse,selected_etof,LOWER_MQ,UPPER_MQ)
heatmap(mqselected_dfevent1)
e_tof(mqselected_etof1)

In [None]:
avg_mqselected_etof = -np.mean(mqselected_etof, axis=0)
avg_mqselected_etof1 = -np.mean(mqselected_etof1, axis=0)
max_mqselected_etof = max(avg_mqselected_etof)
max_mqselected_etof1 = max(avg_mqselected_etof1)

plt.figure()
plt.plot(xaxis,avg_mqselected_etof/max_mqselected_etof,c='r')
plt.plot(xaxis,avg_mqselected_etof1/max_mqselected_etof1,c='b')
plt.xlabel('Time of flight (s)')
plt.ylabel('Normalized signal')
plt.title('Electrons time of flight')
plt.show()

In [None]:
plt.figure()
plt.plot(xaxis,avg_mqselected_etof/max_mqselected_etof,c='r')
plt.plot(xaxis,avg_mqselected_etof1/max_mqselected_etof1,c='b')
plt.xlabel('Time of flight (s)')
plt.ylabel('Normalized signal')
plt.title('Electrons time of flight')
plt.xlim(0,500e-9)
plt.show()

In [None]:
LOWER_MQ = .25
UPPER_MQ = 3

mqselected_dfevent,mqselected_dfpulse,mqselected_etof = mq_selection(calibrated_backgrd_dfevent,backgrd_dfpulse,backgrd_etof,LOWER_MQ,UPPER_MQ)
heatmap(mqselected_dfevent)

In [None]:
LOWER_MQ = 20.5
UPPER_MQ = 21.5

mqselected_dfevent,mqselected_dfpulse,mqselected_etof = mq_selection(calibrated_backgrd_dfevent,backgrd_dfpulse,backgrd_etof,LOWER_MQ,UPPER_MQ)
heatmap(mqselected_dfevent)

In [None]:
LOWER_MQ = 35
UPPER_MQ = 100

mqselected_dfevent,mqselected_dfpulse,mqselected_etof = mq_selection(calibrated_backgrd_dfevent,backgrd_dfpulse,backgrd_etof,LOWER_MQ,UPPER_MQ)
heatmap(mqselected_dfevent)