In [74]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
from scipy.optimize import curve_fit
from scipy.signal import find_peaks as fp
from sklearn import preprocessing
from lmfit.models import LorentzianModel, QuadraticModel, LinearModel

In [75]:
def process_data(data_folder):
    '''
    RAW DATA EXCTRACTION.
    '''
    print('Initializing data processing.')
    
    directory_processed_data = '../data/processed_data/' + data_folder
    try:
        os.mkdir(directory_processed_data)
        print(f"Directory '{directory_processed_data}' created successfully.")
    except FileExistsError:
        print(f"Directory '{directory_processed_data}' already exists.")
    except PermissionError:
        print(f"Permission denied: Unable to create '{directory_processed_data}'.")
    except Exception as e:
        print(f"An error occurred: {e}")
            
    raw_data_list = np.array(list(Path('../data/raw_data/' + data_folder).rglob('*.txt')))
    print(raw_data_list)
    print('A total of '+ str(len(raw_data_list)) + ' data files have been found to process.')
    
    for file_path in raw_data_list:
        df_raw = pd.read_csv(file_path, sep='\t', names=['shift','intensity'], index_col = 'shift')

        #fig0 = plt.figure(figsize=(20,5))
        #ax = fig0.add_subplot(1,1,1)
        #ax.plot(df_raw,linewidth=1,label='Graphene Spectra')
        #ax.grid()
        #ax.set_ylabel('Intensity [arbs]')
        #ax.set_xlabel('Raman Shift [cm^-1]')
        #ax.set_title(filename + '_raw')
        #ax.legend()
        #plt.show()
    
        '''
        NOISE CLEARING.
        To perform proper analysis on the RAMAN spectrum, it is necessary to perform noise cleaning on the raw data.
        This cleaning includes silicon spectrum subtraction, analysis range limits, normalization and signal smoothing.
        '''
        # Spectral Substraction of Silicon
        df_reference =  pd.read_csv('../data/spectra_reference_graphene.txt', sep='\t', names=['shift','intensity'], index_col = 'shift')
        df = df_raw - df_reference 

        # Shift Range Reduction
        upper_bound = 3500
        lower_bound = 1000
        df = df.loc[lower_bound:upper_bound]

        # Normalization
        df = (df - df.min())/df.max()

        # Save Clean Data
        df.to_csv(Path(directory_processed_data + '/' + os.path.basename(file_path)[:-4] + '_noiseless.txt'),sep='\t', index=True, header = False)
        
        #fig1 = plt.figure(figsize=(20,5))
        #ax = fig1.add_subplot(1,1,1)
        #ax.plot(df,linewidth=1,label='Graphene Spectra')
        #ax.grid()
        #ax.set_ylabel('Intensity [normalized]')
        #ax.set_xlabel('Raman Shift [cm^-1]')
        #ax.set_title(filename + '_noisless')
        #ax.legend()
        #plt.show()
    
    processed_data_list = np.array(list(Path(directory_processed_data).rglob('*.txt')))
    print(processed_data_list)
    print('A total of '+ str(len(processed_data_list)) + ' data files have been processed.')

In [145]:
def add_peak(prefix, center, amplitude=0.05, sigma=0.5):
    peak = LorentzianModel(prefix=prefix)
    pars = peak.make_params()
    pars[prefix + 'center'].set(center)
    pars[prefix + 'amplitude'].set(amplitude)
    pars[prefix + 'sigma'].set(sigma, min=0)
    return peak, pars

def find_peaks(df, height = 0.025, prominence = 0.025, distance = None):
    p, _ = fp(x=df.intensity,height = height, prominence = prominence, distance = distance)
    peaks = df.iloc[p]
    n = len(peaks) # total peaks found
    return n, peaks

    #fig2 = plt.figure(figsize=(20,5))
    #ax = fig2.add_subplot(1,1,1)
    #ax.plot(df,linewidth=1,label='Graphene Spectra')
    #ax.scatter(peaks.index.values,peaks)
    #ax.grid()
    #ax.set_ylabel('Intensity [normalized]')
    #ax.set_xlabel('Raman Shift [cm^-1]')
    #ax.set_title(filename + '_noiseless_peaks')
    #ax.legend()
    #plt.show()

def lorentz_fitting(data_folder):
    print('Initializing data analysis.')
    
    directory_fit_report = '../data/fit_reports/' + data_folder
    try:
        os.mkdir(directory_fit_report)
        print(f"Directory '{directory_fit_report}' created successfully.")
    except FileExistsError:
        print(f"Directory '{directory_fit_report}' already exists.")
    except PermissionError:
        print(f"Permission denied: Unable to create '{directory_fit_report}'.")
    except Exception as e:
        print(f"An error occurred: {e}")  
    
    processed_data_list = np.array(list(Path('../data/processed_data/' + data_folder).rglob('*.txt')))
    print(processed_data_list)
    print('A total of '+ str(len(processed_data_list)) + ' data files have been found to analyze.')
    print('Initializing Lorentzian fitting.')
    count = 0
    
    for file_path in processed_data_list:
        #progress bar
        sys.stdout.write('\r')
        sys.stdout.write("[%-s] %d%%" % ('='*count, 100/len(processed_data_list)*count))
        sys.stdout.flush()
        
        df = pd.read_csv(file_path, sep='\t', names=['shift','intensity'], index_col = 'shift')
        xData, yData = np.loadtxt(file_path, unpack= True)
        
        n, peaks = find_peaks(df)     
        model = LinearModel(prefix='bkg_')
        params = model.make_params(a=0, b=0, c=0)

        rough_peak_positions = peaks.index.values
        for i, cen in enumerate(rough_peak_positions):
            peak, pars = add_peak('lz%d_' % (i+1), cen)
            model = model + peak
            params.update(pars)

        init = model.eval(params, x=xData)
        result = model.fit(yData, params, x=xData)
        comps = result.eval_components()
        report = result.fit_report(min_correl=0.5)

        f = open(directory_fit_report + '/' + os.path.basename(file_path)[:-4] + '_fit_report.txt', 'w') #save report as txt
        f.write(report)
        f.close()
        #print(report)
        count += 1
    
    fit_report_list = np.array(list(Path(directory_fit_report).rglob('*.txt')))
    print(fit_report_list)
    print('A total of '+ str(len(fit_report_list)) + ' data files have been analyzed.')

In [146]:
data_folder = '20241112_data_LUCE_ICAT'
process_data(data_folder)
lorentz_fitting(data_folder)

Initializing data processing.
Directory '../data/processed_data/20241112_data_LUCE_ICAT' already exists.
[WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona1_azul.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona1_rojo.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona1_verde.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona2_azul.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona2_rojo.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona2_verde.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona3_azul.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona3_cian.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona3_rojo.txt')
 WindowsPath('../data/raw_data/20241112_data_LUCE_ICAT/Muestra 1_100x_zona3_verde.txt')
 WindowsPath('../data/

KeyboardInterrupt: 