**FTIR Quantification Project**

***Plan of Action:***
    
    1. Import packages

    2. Import .csv files w/ Pandas

    3. Visualise spectra w/ Matplotlib.pyplot

    4. Perform initial exploratory data analysis

    5. Normalize the data w/ machine learning (MinMaxScalar)

    6. Isolate one data frame, perform the intial steps labelled above (except normilization as this can be imported from the above cells).

        6.1 Invert the peaks to allow troughs/peaks to be picked

        6.2 Set a threshold for peaks to be picked.

        6.3 Perform quantification on these isolated peaks

    7. Create a dataframe with the quantification data
    
    8. Have this dataframe exported to a new file.

***Next Steps:***

    1. Classify the wavenumbers to correspond to the Epoxy peaks of interest
    
    2. Understand the variation (if any/ significant) between quantification data points


In [17]:
# Basic package imports
import numpy as np
import pandas as pd
import os

# Visualisation packages
import matplotlib.pyplot as plt

# Statistical analysis packages
from scipy.signal import find_peaks, peak_prominences, peak_widths
from scipy.stats import cauchy, ttest_ind

# Machine learning
from sklearn.preprocessing import MinMaxScaler

In [18]:
# Read the CSV files into DataFrames
# The df's are NOT hardcoded with the exact path location
df02 = pd.read_csv('filename')
df03 = pd.read_csv('filename')
df04 = pd.read_csv('filename')
df05 = pd.read_csv('filename')

In [None]:
# Matplotlib visualisation
plt.figure(figsize=(30, 10))
plt.plot(df02['Wavenumber'], df02['Intensity'], label='02')
plt.plot(df03['Wavenumber'], df03['Intensity'], label='03')
plt.plot(df04['Wavenumber'], df04['Intensity'], label='04')
plt.plot(df05['Wavenumber'], df05['Intensity'], label='05')
plt.ylabel('Intensity')
plt.title('IR Spectra')
plt.legend()
plt.show()

In [None]:
# Find peaks in spec
peaks2, _ = find_peaks(df02['Intensity'], height=0.1)
peaks3, _ = find_peaks(df03['Intensity'], height=0.1)
peaks4, _ = find_peaks(df04['Intensity'], height=0.1)
peaks5, _ = find_peaks(df04['Intensity'], height=0.1)

# Quantify changes
sum_peaks2 = np.sum(df02['Intensity'][peaks2])
sum_peaks3 = np.sum(df03['Intensity'][peaks3])
sum_peaks4 = np.sum(df04['Intensity'][peaks4])
sum_peaks5 = np.sum(df05['Intensity'][peaks5])

# Print changes with concatination
print(f'Sum of peak intenisties for df02: {sum_peaks2}')
print(f'Sum of peak intenisties for df03: {sum_peaks3}')
print(f'Sum of peak intenisties for df04: {sum_peaks4}')
print(f'Sum of peak intenisties for df05: {sum_peaks5}')

In [21]:
# Normilize the data
scaler = MinMaxScaler()

df02['Normalized_Intensity'] = scaler.fit_transform(df02[['Intensity']])
df03['Normalized_Intensity'] = scaler.fit_transform(df03[['Intensity']])
df04['Normalized_Intensity'] = scaler.fit_transform(df04[['Intensity']])
df05['Normalized_Intensity'] = scaler.fit_transform(df05[['Intensity']])

**Further Understanding**

***Depth:*** This is the intensity value at the trough. It indicates how deep the trough is compared to the baseline.

***Prominence:*** This measures how much a trough stands out due to its depth and its surrounding peaks. It is the vertical distance between the trough and the highest point of the surrounding baseline.

***Width:*** This is the width of the trough at half its depth or FWHM. It provides information about how broad or narrow the trough is.

In [None]:
# Plot the normalized data
plt.figure(figsize=(25, 10))

# plt.plot(df02['Wavenumber'], df02['Normalized_Intensity'], label='df02 Normalized Intensity')
plt.plot(df03['Wavenumber'], df03['Normalized_Intensity'], label='df03 Normalized Intensity')
# plt.plot(df04['Wavenumber'], df04['Normalized_Intensity'], label='df04 Normalized Intensity')
# plt.plot(df05['Wavenumber'], df05['Normalized_Intensity'], label='df05 Normalized Intensity')

# Invert the 'Normalized_Intensity' to find troughs
inverted_intensity = -df03['Normalized_Intensity']

# Find peaks in the inverted intensity (which are troughs in the original data)
potential_troughs, _ = find_peaks(inverted_intensity)

# Filter troughs to only include those below the intensity threshold
intensity_threshold = 0.1
troughs = potential_troughs[df03['Normalized_Intensity'][potential_troughs] < intensity_threshold]

# Extract the wavenumber positions of the troughs
trough_wavenumbers = df03['Wavenumber'][troughs]

# Quantify the troughs
depths = df03['Normalized_Intensity'][troughs]  # Depths of the troughs
prominences = peak_prominences(inverted_intensity, troughs)[0]  # Prominences of the troughs
widths = peak_widths(inverted_intensity, troughs, rel_height=0.5)[0]  # Widths of the troughs

# Create a DataFrame to store the quantification results
troughs_table = pd.DataFrame({
    'Wavenumber': trough_wavenumbers.values,
    'Depth': depths.values,
    'Prominence': prominences,
    'Width': widths
})

# Print the table
print(troughs_table)

# Plot the troughs
plt.plot(df03['Wavenumber'][troughs], df03['Normalized_Intensity'][troughs], "x", label='Troughs in df03')

# Annotate the troughs with their wavenumber positions
for i, trough in enumerate(troughs):
    plt.annotate(f'{df03["Wavenumber"][trough]:.0f}', 
                (df03['Wavenumber'][trough], df03['Normalized_Intensity'][trough]), 
                textcoords="offset points", 
                 xytext=(0,10 + (i % 2) * 10),  # Alternate the offset to avoid overlap
                ha='center')

plt.xlabel('Wavenumber')
plt.ylabel('Normalized Intensity')
plt.title('Normalized IR Spectra')
plt.legend()
plt.show()

In [23]:
# Save the results table to a CSV file
troughs_table.to_csv('troughs_quantification_results.csv', index=False)

In [None]:
# Plot the normalized data
plt.figure(figsize=(25, 10))

plt.plot(df02['Wavenumber'], df02['Normalized_Intensity'], label='df02 Normalized Intensity')
plt.plot(df03['Wavenumber'], df03['Normalized_Intensity'], label='df03 Normalized Intensity')
plt.plot(df04['Wavenumber'], df04['Normalized_Intensity'], label='df04 Normalized Intensity')
plt.plot(df05['Wavenumber'], df05['Normalized_Intensity'], label='df05 Normalized Intensity')

In [None]:
import glob
import pandas as pd

# Define path to folder containing the CSV files
folder_path = r'insert folder path'

# Use glob to get all CSV files in the folder
csv_files = glob.glob(folder_path)

# Loop through the list of CSV files and process each one
for file in csv_files:
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file)
        
        # Perform your processing here
        print(f"Processing file: {file}")
        print(df.head())  # Example: print the first few rows of each file

    except Exception as e:
        print(f"Error processing file {file}: {e}")


In [None]:
#Assigning the path to the folder variable
folder = 'insert folder path'

#Getting the list of files from the assigned path
excel_files = [file for file in os.listdir(folder)]

list_of_dfs = []
for file in excel_files :
    df = pd.concat(pd.read_excel(folder + "\\" + file, sheet_name=None))
    df.index = df.index.get_level_values(0)
    df.reset_index().rename({'index':'Tab'}, axis=1)
    df['excelfile_name'] = file.split('.')[0]
    list_of_dfs.append(df)