In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import statistics
import os
###############################################
from peakutils import indexes
from peakutils import baseline
from scipy.signal import find_peaks as fp
from scipy.signal import savgol_filter 
###############################################
from bokeh.plotting import figure , show
from pybaselines import whittaker as pl

from bokeh.io import output_notebook
output_notebook()

In [3]:
def load_data(folder_path):
    # List to store DataFrames for intensity columns
    intensity_dfs = []

    # List to store CSV file names
    csv_file_names = []

    # Get a list of CSV files in the folder
    csv_files = [file_name for file_name in os.listdir(folder_path) if file_name.endswith('.csv')]

    # Sort the CSV files based on their numerical order
    csv_files.sort(key=lambda x: int(x.split('-')[1].split('.')[0]))

    # Loop through each file in ascending order
    for file_name in csv_files:
        file_path = os.path.join(folder_path, file_name)
        
        # Read CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Extract intensity column and store it in the list
        intensity_dfs.append(df.iloc[:, 1])  # Assuming intensity is in the second column
        
        # Store CSV file name
        csv_file_names.append(os.path.splitext(file_name)[0])

    # Read the wavelength column from the first CSV file
    wavelength_df = pd.read_csv(os.path.join(folder_path, csv_files[0]), usecols=[0])

    # Concatenate wavelength column with intensity columns
    result_df = pd.concat([wavelength_df] + intensity_dfs, axis=1)

    # Rename the columns with CSV file names
    column_names = ['wavelength'] + [f'{csv_file_names[i]}' for i in range(len(intensity_dfs))]

    result_df.columns = column_names

    return result_df



# LOADING THE DATAFRAME

Please note that the '-x' behind the the name of the sample is cleanin shots , where -1 -> correspond to 0 cleaning shots , -2 -> correspond to 5 cleaning shots , -3 -> 10 cleaning shots , ------ , -12 -> 55 cleaning shots

In [4]:
df_raw = load_data('hLIBS_Spectra/CR300LA')
df_raw

Unnamed: 0,wavelength,CR300LA-1,CR300LA-2,CR300LA-3,CR300LA-4,CR300LA-5,CR300LA-6,CR300LA-7,CR300LA-8,CR300LA-9,CR300LA-10,CR300LA-11,CR300LA-12
0,180.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,180.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,180.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,180.100000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,180.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23426,960.866667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23427,960.900000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23428,960.933333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23429,960.966667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
def baseline_correction(df):
    """
    Perform baseline correction on the intensity columns of the input DataFrame and create a new DataFrame with corrected values.
    
    Parameters:
        df (DataFrame): Input DataFrame containing the wavelength and intensity columns.
        
    Returns:
        DataFrame: New DataFrame with baseline-corrected intensity columns and the same wavelength column as the input DataFrame.
    """
    # Copy the 'wavelength' column from the input DataFrame
    new_df = pd.DataFrame({'wavelength': df['wavelength']})
    
    # Perform baseline correction for each intensity column and add them to the new DataFrame
    for col in df.columns[1:]:  # Exclude the 'wavelength' column
        baseline, _ = pl.airpls(df[col],lam=0.1)
        corrected_values = df[col] - baseline
        new_df[col] = corrected_values
    
    return new_df

In [6]:
df_baselinecorrected =  df_raw
df_baselinecorrected

Unnamed: 0,wavelength,CR300LA-1,CR300LA-2,CR300LA-3,CR300LA-4,CR300LA-5,CR300LA-6,CR300LA-7,CR300LA-8,CR300LA-9,CR300LA-10,CR300LA-11,CR300LA-12
0,180.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,180.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,180.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,180.100000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,180.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23426,960.866667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23427,960.900000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23428,960.933333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23429,960.966667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Normalization of the Spectra

Focusing on two main Normalization :- 

1) Normalization by Max Intensity of the Spectra
2) Normalization by the Matrix Peak i.e Fe Peak 259.90 - 259.8999999999988 , 238.16666666666336 , 263.10000000000133

In [7]:
# Normalization by the Maximum Intensity

def normalize_intensities(df):
    # Get the maximum intensity for each column starting from column 2
    max_intensities = df.iloc[:, 1:].max()
    max_wavelengths = df.iloc[:, 0][df.iloc[:, 1:].idxmax()]
    # Normalize intensities for each column
    for col_num in range(1, len(df.columns)):
        df.iloc[:, col_num] /= max_intensities[col_num - 1]
    
    return df


In [8]:
# Normalization by the Matrix Fe Peak i.e Fe Peak 259.90 - 259.8999999999988 , 238.16666666666336 , 263.10000000000133

# Lets try to locate the Matrix Peak first

Matrix_Wavelength_Min = 259.6
Matrix_Wavelength_Max = 260.2

Matrix_df = df_baselinecorrected[(df_baselinecorrected['wavelength'] >= Matrix_Wavelength_Min) & (df_baselinecorrected['wavelength'] <= Matrix_Wavelength_Max)]


Selected_Matrix_Plot = figure(title='Selected Matrix Peak', x_axis_label='Wavelength', y_axis_label='Intensity', width=600, height=500)

# Loop through each column (starting from the second one)
for col_num, col_name in enumerate(Matrix_df.columns[1:], start=1):
    intensity_values = Matrix_df[col_name]
    Selected_Matrix_Plot.line(Matrix_df['wavelength'], intensity_values, line_width=2, color="red")

show(Selected_Matrix_Plot)
# Matrix_df

In [9]:
def normalize_data(matrix_df, baseline_df):
    normalized_df = pd.DataFrame() #crete a df
    normalized_df['wavelength'] = baseline_df['wavelength'] #add first column as wavelength

    for sample_col in matrix_df.columns[1:]:
        peaks, _ = fp(matrix_df[sample_col], prominence=100) #finds peak from matrix_df
        return_intensities = matrix_df[sample_col].iloc[peaks] #gets the intensity for that matrix peak
        corresponding_col = sample_col
        normalized_df[sample_col] = baseline_df[corresponding_col] / return_intensities.values[0] #create a columns in normalized_df , by dividing the baseline_df by the intensities of the matrix peak 

    return normalized_df

In [10]:
#NORMALIZATION BY MATRIX PEAK
df_normalized = normalize_data(matrix_df=Matrix_df , baseline_df=df_baselinecorrected)
df_normalized


Unnamed: 0,wavelength,CR300LA-1,CR300LA-2,CR300LA-3,CR300LA-4,CR300LA-5,CR300LA-6,CR300LA-7,CR300LA-8,CR300LA-9,CR300LA-10,CR300LA-11,CR300LA-12
0,180.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,180.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,180.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,180.100000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,180.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23426,960.866667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23427,960.900000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23428,960.933333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23429,960.966667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# #NORMALIZATION BY MAXIMUM INTENSITYT

# df_normalized = normalize_intensities(df_baselinecorrected)
# df_normalized

# Plotting the Zinc Peak
The Zn peaks for handheld LIBS are located at Zn 213.86 , Zn 202.56

In [12]:
Zn1_Peak_Min = 213.7 , 
Zn2_Peak_Min = 202.4 , 
Zn1_Peak_Max = 214.1 , 
Zn2_Peak_Max = 202.8 , 

Zn1_df = df_normalized[(df_normalized['wavelength'] >= Zn1_Peak_Min) & (df_normalized['wavelength'] <= Zn1_Peak_Max)]
Zn2_df = df_normalized[(df_normalized['wavelength'] >= Zn2_Peak_Min) & (df_normalized['wavelength'] <= Zn2_Peak_Max)]

Zn1_df

Unnamed: 0,wavelength,CR300LA-1,CR300LA-2,CR300LA-3,CR300LA-4,CR300LA-5,CR300LA-6,CR300LA-7,CR300LA-8,CR300LA-9,CR300LA-10,CR300LA-11,CR300LA-12
1012,213.733333,2.429026,1.958671,0.990222,1.835114,0.770764,0.425781,0.742518,0.573425,0.348955,0.369404,0.41521,0.337256
1013,213.766667,3.395204,2.777096,1.300823,2.433294,0.99633,0.599169,1.049553,0.808909,0.524093,0.550621,0.601847,0.482369
1014,213.8,4.337883,3.580377,1.583768,2.980675,1.197041,0.769787,1.349186,1.038987,0.703264,0.735196,0.787732,0.627032
1015,213.833333,5.003611,4.138008,1.779644,3.350635,1.334823,0.884288,1.552792,1.194044,0.821502,0.857592,0.911373,0.722516
1016,213.866667,5.206916,4.285734,1.843509,3.451826,1.38053,0.906219,1.598572,1.226123,0.835772,0.874109,0.931332,0.736248
1017,213.9,4.952773,4.044902,1.770951,3.291726,1.330038,0.846454,1.499882,1.147954,0.764482,0.802221,0.862341,0.680907
1018,213.933333,4.280144,3.469717,1.564905,2.895419,1.183791,0.72416,1.283313,0.982916,0.636727,0.670056,0.728942,0.57706
1019,213.966667,3.306366,2.667746,1.255214,2.326179,0.96146,0.564217,0.990829,0.763747,0.482313,0.507818,0.55985,0.447402
1020,214.0,2.287419,1.840938,0.919308,1.714837,0.716722,0.40162,0.691588,0.539733,0.332306,0.349381,0.391169,0.318394
1021,214.033333,1.487812,1.196405,0.637936,1.196618,0.505726,0.271659,0.456021,0.360813,0.217214,0.228237,0.258976,0.216315


In [13]:
# Selected_Zn2_Plot = figure(title='Selected Zn2 plot', x_axis_label='Wavelength', y_axis_label='Intensity', width=300, height=500)

# for col_num, col_name in enumerate(Zn2_df.columns[1:], start=1):
#     intensity_values = Zn2_df[col_name]
#     Selected_Zn2_Plot.line(Zn2_df['wavelength'], intensity_values, line_width=2, color="red")

# show(Selected_Zn2_Plot)

In [14]:

# Load the dataframes
df1 = Zn1_df
df2 = Zn2_df

# Initialize lists to store peak intensities for each dataframe
peak_intensities_df1 = []
peak_intensities_df2 = []

# Determine x-axis values
x_values = list(range(0, 60, 5))

# Find the peak intensity for each column in each dataframe
for col_num in range(1, 13):  # Columns 2 to 13 are intensities
    peak_intensity_df1 = df1.iloc[:, col_num].max()  # Get the maximum intensity value for the column
    peak_intensity_df2 = df2.iloc[:, col_num].max()  # Get the maximum intensity value for the column
    peak_intensities_df1.append(peak_intensity_df1)
    peak_intensities_df2.append(peak_intensity_df2)

# Plotting
plot = figure(title='Normalization by Matrix Peak', x_axis_label='Cleaning Pulses', y_axis_label='Normalized Peak Intensity (a.u.)')
plot.scatter(x_values, peak_intensities_df1, legend_label='Zn 213.86', marker='+', color='blue', size=4)
plot.scatter(x_values, peak_intensities_df2, legend_label='Zn 202.56', marker='+', color='red', size=4)
plot.legend.location = "top_right"

# Connect markers with lines
for i in range(min(len(x_values), len(peak_intensities_df1)) - 1):
    plot.line([x_values[i], x_values[i+1]], [peak_intensities_df1[i], peak_intensities_df1[i+1]], line_color='blue')
    plot.line([x_values[i], x_values[i+1]], [peak_intensities_df2[i], peak_intensities_df2[i+1]], line_color='red')

# Show plot
show(plot)
