# IMPORTING THE LIBRARIES 

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import statistics
import os

###############################################
from peakutils import indexes
from peakutils import baseline
from scipy.signal import find_peaks as fp
from scipy.signal import savgol_filter 
###############################################
from bokeh.plotting import figure , show
from pybaselines import whittaker as pl



from bokeh.io import output_notebook
output_notebook()

# DATA ARRANGEMENT

In the main directory we can see that there are 8 subfolders. 

Each subfolder have almost 12 spectras per sample , the idea behind that would be , instead of having just one spectra per sample , and to just rely on one information , its always better have to multiple measurements per samples , and then this could be used for building the Calibration Model

Instead of having 12 different csv per samples , its always good to have a single dataframe -> This new dataframe will have 1st column as wavelength , and 2nd -13th column as Intensities 

In [3]:
def load_data(folder_path):
    # List to store DataFrames for intensity columns
    intensity_dfs = []

    # List to store CSV file names
    csv_file_names = []

    # Loop through each file in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            
            # Read CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Extract intensity column and store it in the list
            intensity_dfs.append(df.iloc[:, 1])  # Assuming intensity is in the second column
            
            # Store CSV file name
            csv_file_names.append(os.path.splitext(file_name)[0])

    # Read the wavelength column from the first CSV file
    wavelength_df = pd.read_csv(os.path.join(folder_path, os.listdir(folder_path)[1]), usecols=[0])

    # Concatenate wavelength column with intensity columns
    result_df = pd.concat([wavelength_df] + intensity_dfs, axis=1)

    # Rename the columns with CSV file names
    column_names = ['wavelength'] + [f'{csv_file_names[i]}' for i in range(len(intensity_dfs))]

    result_df.columns = column_names
    # result_df.reset_index(drop=True, inplace=True)

    return result_df

In [4]:
def baseline_correction(df):
    """
    Perform baseline correction on the intensity columns of the input DataFrame and create a new DataFrame with corrected values.
    
    Parameters:
        df (DataFrame): Input DataFrame containing the wavelength and intensity columns.
        
    Returns:
        DataFrame: New DataFrame with baseline-corrected intensity columns and the same wavelength column as the input DataFrame.
    """
    # Copy the 'wavelength' column from the input DataFrame
    new_df = pd.DataFrame({'wavelength': df['wavelength']})
    
    # Perform baseline correction for each intensity column and add them to the new DataFrame
    for col in df.columns[1:]:  # Exclude the 'wavelength' column
        baseline, _ = pl.airpls(df[col],lam=0.1)
        corrected_values = df[col] - baseline
        new_df[col] = corrected_values
    
    return new_df

Loading the dataframe.

In [5]:
CR210LA_Raw_df = load_data('CR210LA')
###################################################
CR240LA_Raw_df = load_data('CR240LA')
###################################################
CR300LA_Raw_df = load_data('CR300LA')
###################################################
CR440Y_Raw_df = load_data('CR440Y')
###################################################
CR570CP_Raw_df = load_data('CR570CP')
###################################################
CR700Y_Raw_df = load_data('CR700Y')
###################################################
HR660Y_Raw_df = load_data('HR660Y')
# ###################################################
CR1000Y_Raw_df = load_data('CR1000Y')

In [6]:
CR210LA_BaselineCorrected_df = baseline_correction(CR210LA_Raw_df)

CR240LA_BaselineCorrected_df = baseline_correction(CR240LA_Raw_df)

CR300LA_BaselineCorrected_df = baseline_correction(CR300LA_Raw_df)

CR440Y_BaselineCorrected_df = baseline_correction(CR440Y_Raw_df)

CR570CP_BaselineCorrected_df = baseline_correction(CR570CP_Raw_df)

CR700Y_BaselineCorrected_df = baseline_correction(CR700Y_Raw_df)

HR660Y_BaselineCorrected_df = baseline_correction(HR660Y_Raw_df)

CR1000Y_BaselineCorrected_df = baseline_correction(CR1000Y_Raw_df)

# Peak Selection and Data Trimming

The dataframe  which we have is very big ,it could be trimmed now according to the wavelength , by adjusting two parameters "Wavelength_Min" , "Wavelength_Max"

In [105]:
Wavelength_Min = 203.9
Wavelength_Max = 204.3

CR210LA_Select_df = CR210LA_BaselineCorrected_df[(CR210LA_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (CR210LA_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]
CR240LA_Select_df = CR240LA_BaselineCorrected_df[(CR240LA_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (CR240LA_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]
CR300LA_Select_df = CR300LA_BaselineCorrected_df[(CR300LA_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (CR300LA_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]
CR440Y_Select_df = CR440Y_BaselineCorrected_df[(CR440Y_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (CR440Y_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]
CR570CP_Select_df = CR570CP_BaselineCorrected_df[(CR570CP_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (CR570CP_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]
CR700Y_Select_df = CR700Y_BaselineCorrected_df[(CR700Y_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (CR700Y_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]
HR660Y_Select_df = HR660Y_BaselineCorrected_df[(HR660Y_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (HR660Y_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]
CR1000Y_Select_df = CR1000Y_BaselineCorrected_df[(CR1000Y_BaselineCorrected_df['wavelength'] >= Wavelength_Min) & (CR1000Y_BaselineCorrected_df['wavelength'] <= Wavelength_Max)]

CR210LA_Select_df


Unnamed: 0,wavelength,CR210LA_10,CR210LA_11,CR210LA_8,CR210LA_9,CR210LA_12,CR210LA_4,CR210LA_5,CR210LA_7,CR210LA_6,CR210LA_2,CR210LA_3,CR210LA_1
718,203.933333,108.858638,203.496779,-0.14974,145.936273,10.411911,-0.093406,-0.164461,-0.051842,145.499405,-0.012634,278.192605,-0.026436
719,203.966667,368.293207,485.141887,109.653689,361.799042,210.28881,98.548707,103.847899,110.193477,232.75188,78.393405,366.475174,90.916063
720,204.0,717.096321,837.894317,422.416328,639.898372,513.255639,406.212364,411.707398,430.137261,514.014626,315.969426,670.312976,416.92074
721,204.033333,1055.679009,1166.498863,875.577118,917.532448,803.205398,860.035191,860.926834,895.724977,930.736124,664.535512,1125.561998,910.477291
722,204.066667,1293.663118,1387.840289,1224.546367,1135.327823,980.445142,1205.245825,1201.839302,1252.431751,1249.875141,933.425324,1473.359373,1293.466242
723,204.1,1377.190439,1466.826789,1230.763091,1247.169968,1010.179384,1193.316106,1190.958317,1252.019984,1244.060851,936.536826,1461.137903,1294.242234
724,204.133333,1261.489052,1380.248554,919.127093,1210.280758,873.649526,851.668101,855.41024,921.593381,936.726969,691.510505,1115.695405,937.936378
725,204.166667,948.990681,1136.73965,527.000482,1016.108578,585.979775,430.258021,440.55135,510.298741,553.56315,380.29064,689.117613,489.008801
726,204.2,563.85687,827.053834,276.585495,744.365918,249.672064,163.005127,176.018386,251.244128,305.896915,173.462198,417.271926,195.422729
727,204.233333,248.809443,554.47626,189.710002,488.374723,-19.518757,72.459041,84.234934,166.283268,214.963348,90.095364,322.201687,84.907398


In [106]:
CR210LA_Select_Raw_df = CR210LA_Raw_df[(CR210LA_Raw_df['wavelength'] >= Wavelength_Min) & (CR210LA_Raw_df['wavelength'] <= Wavelength_Max)]
CR240LA_Select_Raw_df = CR240LA_Raw_df[(CR240LA_Raw_df['wavelength'] >= Wavelength_Min) & (CR240LA_Raw_df['wavelength'] <= Wavelength_Max)]
CR300LA_Select_Raw_df = CR300LA_Raw_df[(CR300LA_Raw_df['wavelength'] >= Wavelength_Min) & (CR300LA_Raw_df['wavelength'] <= Wavelength_Max)]
CR440Y_Select_Raw_df = CR440Y_Raw_df[(CR440Y_Raw_df['wavelength'] >= Wavelength_Min) & (CR440Y_Raw_df['wavelength'] <= Wavelength_Max)]
CR570CP_Select_Raw_df = CR570CP_Raw_df[(CR570CP_Raw_df['wavelength'] >= Wavelength_Min) & (CR570CP_Raw_df['wavelength'] <= Wavelength_Max)]
CR700Y_Select_Raw_df = CR700Y_Raw_df[(CR700Y_Raw_df['wavelength'] >= Wavelength_Min) & (CR700Y_Raw_df['wavelength'] <= Wavelength_Max)]
HR660Y_Select_Raw_df = HR660Y_Raw_df[(HR660Y_Raw_df['wavelength'] >= Wavelength_Min) & (HR660Y_Raw_df['wavelength'] <= Wavelength_Max)]
CR1000Y_Select_Raw_df = CR1000Y_Raw_df[(CR1000Y_Raw_df['wavelength'] >= Wavelength_Min) & (CR1000Y_Raw_df['wavelength'] <= Wavelength_Max)]

CR210LA_Select_Raw_df

Unnamed: 0,wavelength,CR210LA_10,CR210LA_11,CR210LA_8,CR210LA_9,CR210LA_12,CR210LA_4,CR210LA_5,CR210LA_7,CR210LA_6,CR210LA_2,CR210LA_3,CR210LA_1
718,203.933333,927.379629,1056.208101,640.800893,1069.820502,1022.441858,559.652965,560.886494,582.759425,575.262918,538.581006,558.078608,626.541569
719,203.966667,1187.180123,1304.243229,739.399267,1269.622708,1301.721642,652.256055,656.415461,679.440723,664.985654,615.39822,651.481247,724.124598
720,204.0,1536.197937,1627.064589,1041.276975,1532.057732,1666.10556,957.108068,959.473632,987.389137,948.679962,853.175936,960.695984,1056.911544
721,204.033333,1874.846132,1929.296083,1483.908376,1794.48976,2000.870316,1410.784031,1406.99765,1442.459099,1367.792973,1203.286338,1421.583682,1557.138739
722,204.066667,2112.748585,2127.701587,1822.739203,1997.611346,2207.70667,1757.949979,1748.744452,1790.034699,1689.282101,1474.615195,1775.286352,1946.433347
723,204.1,2196.049117,2187.068404,1819.243895,2095.373965,2253.202839,1749.515152,1740.651399,1781.7873,1685.775172,1480.612574,1769.241673,1952.896644
724,204.133333,2079.977836,2084.065837,1498.35768,2045.065491,2119.983929,1412.339016,1409.268692,1444.729573,1380.70455,1238.471068,1430.252337,1601.406379
725,204.166667,1766.9685,1827.207062,1097.478086,1838.200315,1824.557852,995.815182,999.376459,1027.91728,999.75854,929.68706,1010.408956,1156.169316
726,204.2,1381.18468,1507.124946,838.842775,1554.556925,1470.810246,733.30031,740.035278,764.36745,754.263311,724.397622,745.583805,864.895315
727,204.233333,1065.350232,1226.983755,744.31504,1287.52327,1175.879665,646.781422,653.092583,675.842773,665.452597,641.225044,657.825105,755.060251


Lets plot a line plot ,to get a better picture 

In [107]:
Selected_df_Plot = figure(title = 'Selected Data Plot' , x_axis_label = 'Wavelength' , y_axis_label = 'Intensity')

Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_1 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_2 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_3 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_4 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_5 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_6 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_7 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_8 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_9 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_10 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_11 , line_width = 2, color ="red" )
Selected_df_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_12 , line_width = 2, color ="red" )

######################################################################################################################
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_1 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_2 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_3 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_4 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_5 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_6 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_7 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_8 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_9 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_10 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_11 , line_width = 2, color ="green" )
Selected_df_Plot.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_12 , line_width = 2, color ="green" )

###############################################################################################################

Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_1 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_2 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_3 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_4 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_5 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_6 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_7 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_8 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_9 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_10 , line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_11, line_width = 2, color ="pink")
Selected_df_Plot.line(CR300LA_Select_df.wavelength,CR300LA_Select_df.CR300LA_12, line_width = 2, color ="pink")

######################################################################################################################

Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_1 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_2 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_3 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_4 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_5 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_6 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_7 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_8 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_9 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_10 , line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_11, line_width = 2, color ="yellow")
Selected_df_Plot.line(CR440Y_Select_df.wavelength,CR440Y_Select_df.CR440Y_12, line_width = 2, color ="yellow")

##############################################################################################################

Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_1 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_2 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_3 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_4 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_5 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_6 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_7 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_8 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_9 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_10 , line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_11, line_width = 2, color ="blue")
Selected_df_Plot.line(CR570CP_Select_df.wavelength,CR570CP_Select_df.CR570CP_12, line_width = 2, color ="blue")

###############################################################################################################

Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_1 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_2 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_3 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_4 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_5 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_6 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_7 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_8 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_9 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_10 , line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_11, line_width = 2, color ="orange")
Selected_df_Plot.line(CR700Y_Select_df.wavelength,CR700Y_Select_df.CR700Y_12, line_width = 2, color ="orange")


####################################################################################################################

Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_1 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_2 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_3 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_4 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_5 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_6 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_7 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_8 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_9 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_10 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_11 , line_width = 2, color ="brown")
Selected_df_Plot.line(HR660Y_Select_df.wavelength,HR660Y_Select_df.HR660Y_12 , line_width = 2, color ="brown")


##################################################################################################################

Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_1 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_2 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_3 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_4 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_5 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_6 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_7 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_8 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_9 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_10 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_11 , line_width = 2, color ="black")
Selected_df_Plot.line(CR1000Y_Select_df.wavelength,CR1000Y_Select_df.CR1000Y_12 , line_width = 2, color ="black")

###################################################################################################################

Selected_df_Plot.width = 600
Selected_df_Plot.height = 500
show(Selected_df_Plot)

# Data Preprocessing of the Spectra

The above plot eventhough a spectra , is still a Raw Spectra , which still has lot of Artifects , before proceeding for the Univariate Calibration , its important to Pre Process the Raw Spectra accordingly. Various Pre Processing Techniques could be used here :- 

1) Baseline Correction - Very Very little  background radiation is still present in the spectra, which corresponds to the spectral baseline and imposes difficulties for quantitative elemental analysis.

In [108]:
Baseline_Correction_Plot = figure(title = 'Baseline Correction' , x_axis_label = 'Wavelength' , y_axis_label = 'Intensity')

Baseline_Correction_Plot.line(CR210LA_Select_df.wavelength,CR210LA_Select_df.CR210LA_1 , line_width = 2, color ="green" )
Baseline_Correction_Plot.line(CR210LA_Select_Raw_df.wavelength,CR210LA_Select_Raw_df.CR210LA_1 , line_width =2 , color = "red")
Baseline_Correction_Plot.width = 600
Baseline_Correction_Plot.height = 500
show(Baseline_Correction_Plot)

2) Normalization - Its usually noticed that , for a measurement of a similar sample , the spectra obtained for them are quite different , but there could me many reasons for it , like laser energy fluctuations , material surface , even though is homegnous isnt same overall the surface

   For Instance , you could see the plot below , Though this are the plots from the same sample CR210LA , measured on 12 different Areas , its quite visible , for some  spectras , the peak heights or Intensities are not same.


In [109]:

Before_Normalization = figure(title = 'Before Normalization' , x_axis_label = 'Wavelength' , y_axis_label = 'Intensity')

Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_1 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_2 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_3 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_4 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_5 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_6 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_7 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_8 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_9 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_10 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_11 , line_width = 2, color ="red" )
Before_Normalization.line(CR240LA_Select_df.wavelength,CR240LA_Select_df.CR240LA_12 , line_width = 2, color ="red" )


Before_Normalization.width = 600
Before_Normalization.height = 500
show(Before_Normalization)

Now , its important the understand this chnages in Peaks , are common not just to a single peaks , but to the entire spectra. If theres a change in intensity of a peak , its not just for a single peak , but it applies to the entire spectrum proportionally. 

Normalization by peak of an Matrix Element i.e Fe 238.20 (237.9 ~ 238.7) , 263.10 (262.9 ~ 263.4)

Lets try to focus now on analysis for peak of Matrix Element

In [110]:
Matrix_Wavelength_Min = 262.9
Matrix_Wavelength_Max = 263.4

CR210LA_Matrix_df = CR210LA_BaselineCorrected_df[(CR210LA_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (CR210LA_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]
CR240LA_Matrix_df = CR240LA_BaselineCorrected_df[(CR240LA_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (CR240LA_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]
CR300LA_Matrix_df = CR300LA_BaselineCorrected_df[(CR300LA_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (CR300LA_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]
CR440Y_Matrix_df = CR440Y_BaselineCorrected_df[(CR440Y_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (CR440Y_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]
CR570CP_Matrix_df = CR570CP_BaselineCorrected_df[(CR570CP_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (CR570CP_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]
CR700Y_Matrix_df = CR700Y_BaselineCorrected_df[(CR700Y_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (CR700Y_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]
HR660Y_Matrix_df = HR660Y_BaselineCorrected_df[(HR660Y_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (HR660Y_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]
CR1000Y_Matrix_df = CR1000Y_BaselineCorrected_df[(CR1000Y_BaselineCorrected_df['wavelength'] >= Matrix_Wavelength_Min) & (CR1000Y_BaselineCorrected_df['wavelength'] <= Matrix_Wavelength_Max)]

CR210LA_Matrix_df

Unnamed: 0,wavelength,CR210LA_10,CR210LA_11,CR210LA_8,CR210LA_9,CR210LA_12,CR210LA_4,CR210LA_5,CR210LA_7,CR210LA_6,CR210LA_2,CR210LA_3,CR210LA_1
2487,262.9,5244.34128,2310.343671,2948.382699,1491.142851,2500.199641,3571.778812,2829.251338,2460.656198,2527.833464,2170.980312,2586.561581,2686.05722
2488,262.933333,4245.604653,1422.598203,3088.860776,796.067571,1698.18589,3730.491359,3019.069377,2667.912693,2700.663174,2200.026177,2713.658795,2664.202073
2489,262.966667,6258.440092,2994.723336,4586.33027,1733.118569,3406.458417,5265.11722,4662.356916,4235.64251,4282.121667,3363.646767,4377.883174,3991.367265
2490,263.0,11888.500891,7681.192527,7038.971716,4792.767009,8210.430281,7805.184975,7365.07538,6766.822169,6879.923898,5439.627068,7198.26225,6477.844794
2491,263.033333,18521.451221,13213.917277,9898.181854,8509.63526,13818.502641,10834.715049,10578.958285,9718.665945,9951.842492,8089.077018,10632.970914,9799.160161
2492,263.066667,22394.961331,16282.913848,12272.955837,10720.913894,16913.491012,13414.588934,13324.777218,12188.602439,12558.832871,10513.307644,13634.8129,12965.091292
2493,263.1,21323.680018,15030.684964,12848.11716,10175.321223,15633.257467,14081.497962,14089.422379,12828.352962,13270.268006,11344.024954,14530.53142,14160.679142
2494,263.133333,17023.974728,11193.38851,10492.503334,7876.959479,11715.596437,11569.787326,11564.559418,10476.932279,10851.197455,9384.623541,11868.182128,11788.150372
2495,263.166667,11778.766874,7019.629477,6225.282956,5155.467804,7412.38539,6967.092587,6874.092679,6170.888048,6379.034749,5555.244527,6855.650616,6993.604024
2496,263.2,7106.8321,3776.385691,2324.469395,2840.715436,4014.115032,2738.710312,2565.805372,2233.245543,2284.895335,2015.212767,2257.937587,2529.454292


In [111]:
Selected_Matrix_Plot = figure(title = 'Selected Matrix Peak' , x_axis_label = 'Wavelength' , y_axis_label = 'Intensity')

Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_1 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_2 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_3 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_4 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_5 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_6 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_7 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_8 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_9 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_10 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_11 , line_width = 2, color ="red" )
Selected_Matrix_Plot.line(CR210LA_Matrix_df.wavelength,CR210LA_Matrix_df.CR210LA_12 , line_width = 2, color ="red" )

######################################################################################################################
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_1 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_2 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_3 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_4 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_5 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_6 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_7 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_8 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_9 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_10 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_11 , line_width = 2, color ="green" )
Selected_Matrix_Plot.line(CR240LA_Matrix_df.wavelength,CR240LA_Matrix_df.CR240LA_12 , line_width = 2, color ="green" )

###############################################################################################################

Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_1 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_2 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_3 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_4 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_5 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_6 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_7 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_8 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_9 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_10 , line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_11, line_width = 2, color ="pink")
Selected_Matrix_Plot.line(CR300LA_Matrix_df.wavelength,CR300LA_Matrix_df.CR300LA_12, line_width = 2, color ="pink")

######################################################################################################################

Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_1 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_2 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_3 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_4 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_5 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_6 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_7 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_8 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_9 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_10 , line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_11, line_width = 2, color ="yellow")
Selected_Matrix_Plot.line(CR440Y_Matrix_df.wavelength,CR440Y_Matrix_df.CR440Y_12, line_width = 2, color ="yellow")

##############################################################################################################

Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_1 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_2 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_3 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_4 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_5 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_6 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_7 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_8 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_9 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_10 , line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_11, line_width = 2, color ="blue")
Selected_Matrix_Plot.line(CR570CP_Matrix_df.wavelength,CR570CP_Matrix_df.CR570CP_12, line_width = 2, color ="blue")

###############################################################################################################

Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_1 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_2 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_3 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_4 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_5 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_6 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_7 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_8 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_9 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_10 , line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_11, line_width = 2, color ="orange")
Selected_Matrix_Plot.line(CR700Y_Matrix_df.wavelength,CR700Y_Matrix_df.CR700Y_12, line_width = 2, color ="orange")


####################################################################################################################

Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_1 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_2 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_3 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_4 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_5 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_6 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_7 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_8 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_9 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_10 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_11 , line_width = 2, color ="brown")
Selected_Matrix_Plot.line(HR660Y_Matrix_df.wavelength,HR660Y_Matrix_df.HR660Y_12 , line_width = 2, color ="brown")


##################################################################################################################

Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_1 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_2 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_3 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_4 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_5 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_6 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_7 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_8 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_9 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_10 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_11 , line_width = 2, color ="black")
Selected_Matrix_Plot.line(CR1000Y_Matrix_df.wavelength,CR1000Y_Matrix_df.CR1000Y_12 , line_width = 2, color ="black")

###################################################################################################################

Selected_Matrix_Plot.width = 600
Selected_Matrix_Plot.height = 500
show(Selected_Matrix_Plot)

In [112]:

def normalize_data(matrix_df, baseline_df):
    normalized_df = pd.DataFrame() #crete a df
    normalized_df['wavelength'] = baseline_df['wavelength'] #add first column as wavelength

    for sample_col in matrix_df.columns[1:]:
        peaks, _ = fp(matrix_df[sample_col], prominence=1000) #finds peak from matrix_df
        return_intensities = matrix_df[sample_col].iloc[peaks] #gets the intensity for that matrix peak
        corresponding_col = sample_col
        normalized_df[sample_col] = baseline_df[corresponding_col] / return_intensities.values[0] #create a columns in normalized_df , by dividing the baseline_df by the intensities of the matrix peak 
    
    return normalized_df


In [113]:
# normalized_df = pd.DataFrame()
# normalized_df['wavelength'] = CR210LA_BaselineCorrected_df['wavelength']

# for sample_col in CR210LA_Matrix_df.columns[1:]:
#         peaks, _ = fp(CR210LA_Matrix_df[sample_col], prominence=1000 )
#         return_intensities = CR210LA_Matrix_df[sample_col].iloc[peaks]
#         correspondin_col = sample_col
#         print(return_intensities)
#         print(correspondin_col)
#         normalized_df[sample_col] = CR210LA_BaselineCorrected_df[correspondin_col] / return_intensities.values[0] 

# print (normalized_df)

In [114]:
CR210LA_Normalized_df = normalize_data (matrix_df=CR210LA_Matrix_df , baseline_df=CR210LA_BaselineCorrected_df)
CR240LA_Normalized_df = normalize_data (matrix_df=CR240LA_Matrix_df , baseline_df=CR240LA_BaselineCorrected_df)
CR300LA_Normalized_df = normalize_data (matrix_df=CR300LA_Matrix_df , baseline_df=CR300LA_BaselineCorrected_df)
CR440Y_Normalized_df = normalize_data (matrix_df=CR440Y_Matrix_df , baseline_df=CR440Y_BaselineCorrected_df)
CR570CP_Normalized_df = normalize_data (matrix_df=CR570CP_Matrix_df , baseline_df=CR570CP_BaselineCorrected_df)
CR700Y_Normalized_df = normalize_data (matrix_df=CR700Y_Matrix_df , baseline_df=CR700Y_BaselineCorrected_df)
HR660Y_Normalized_df = normalize_data (matrix_df=HR660Y_Matrix_df , baseline_df=HR660Y_BaselineCorrected_df)
CR1000Y_Normalized_df = normalize_data (matrix_df=CR1000Y_Matrix_df , baseline_df=CR1000Y_BaselineCorrected_df)
# print (CR210LA_Normalized_df.iloc[2493])

In [115]:
CR210LA_Processed_Select_df = CR210LA_Normalized_df[(CR210LA_Normalized_df['wavelength'] >= Wavelength_Min) & (CR210LA_Normalized_df['wavelength'] <= Wavelength_Max)]
CR240LA_Processed_Select_df = CR240LA_Normalized_df[(CR240LA_Normalized_df['wavelength'] >= Wavelength_Min) & (CR240LA_Normalized_df['wavelength'] <= Wavelength_Max)]
CR300LA_Processed_Select_df = CR300LA_Normalized_df[(CR300LA_Normalized_df['wavelength'] >= Wavelength_Min) & (CR300LA_Normalized_df['wavelength'] <= Wavelength_Max)]
CR440Y_Processed_Select_df = CR440Y_Normalized_df[(CR440Y_Normalized_df['wavelength'] >= Wavelength_Min) & (CR440Y_Normalized_df['wavelength'] <= Wavelength_Max)]
CR570CP_Processed_Select_df = CR570CP_Normalized_df[(CR570CP_Normalized_df['wavelength'] >= Wavelength_Min) & (CR570CP_Normalized_df['wavelength'] <= Wavelength_Max)]
CR700Y_Processed_Select_df = CR700Y_Normalized_df[(CR700Y_Normalized_df['wavelength'] >= Wavelength_Min) & (CR700Y_Normalized_df['wavelength'] <= Wavelength_Max)]
HR660Y_Processed_Select_df = HR660Y_Normalized_df[(HR660Y_Normalized_df['wavelength'] >= Wavelength_Min) & (HR660Y_Normalized_df['wavelength'] <= Wavelength_Max)]
CR1000Y_Processed_Select_df = CR1000Y_Normalized_df[(CR1000Y_Normalized_df['wavelength'] >= Wavelength_Min) & (CR1000Y_Normalized_df['wavelength'] <= Wavelength_Max)]

CR210LA_Select_df

Unnamed: 0,wavelength,CR210LA_10,CR210LA_11,CR210LA_8,CR210LA_9,CR210LA_12,CR210LA_4,CR210LA_5,CR210LA_7,CR210LA_6,CR210LA_2,CR210LA_3,CR210LA_1
718,203.933333,108.858638,203.496779,-0.14974,145.936273,10.411911,-0.093406,-0.164461,-0.051842,145.499405,-0.012634,278.192605,-0.026436
719,203.966667,368.293207,485.141887,109.653689,361.799042,210.28881,98.548707,103.847899,110.193477,232.75188,78.393405,366.475174,90.916063
720,204.0,717.096321,837.894317,422.416328,639.898372,513.255639,406.212364,411.707398,430.137261,514.014626,315.969426,670.312976,416.92074
721,204.033333,1055.679009,1166.498863,875.577118,917.532448,803.205398,860.035191,860.926834,895.724977,930.736124,664.535512,1125.561998,910.477291
722,204.066667,1293.663118,1387.840289,1224.546367,1135.327823,980.445142,1205.245825,1201.839302,1252.431751,1249.875141,933.425324,1473.359373,1293.466242
723,204.1,1377.190439,1466.826789,1230.763091,1247.169968,1010.179384,1193.316106,1190.958317,1252.019984,1244.060851,936.536826,1461.137903,1294.242234
724,204.133333,1261.489052,1380.248554,919.127093,1210.280758,873.649526,851.668101,855.41024,921.593381,936.726969,691.510505,1115.695405,937.936378
725,204.166667,948.990681,1136.73965,527.000482,1016.108578,585.979775,430.258021,440.55135,510.298741,553.56315,380.29064,689.117613,489.008801
726,204.2,563.85687,827.053834,276.585495,744.365918,249.672064,163.005127,176.018386,251.244128,305.896915,173.462198,417.271926,195.422729
727,204.233333,248.809443,554.47626,189.710002,488.374723,-19.518757,72.459041,84.234934,166.283268,214.963348,90.095364,322.201687,84.907398


In [116]:

After_Normalization = figure(title = 'After Normalization' , x_axis_label = 'Wavelength' , y_axis_label = 'Intensity')

After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_1 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_2 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_3 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_4 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_5 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_6 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_7 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_8 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_9 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_10 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_11 , line_width = 2, color ="red" )
After_Normalization.line(CR240LA_Processed_Select_df.wavelength,CR240LA_Processed_Select_df.CR240LA_12 , line_width = 2, color ="red" )


After_Normalization.width = 600
After_Normalization.height = 500
show(After_Normalization)