In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import find_peaks, savgol_filter

# Directory containing the CSV files
input_directory = r'C:\Users\kulma\OneDrive\Documents\School\MASc Year 1\Surface Profilometry Analyzer\Surface-Profilometry-Analyzer\Peak picking algorithm\path_to_your_output_folder'

# Directory to save the output files
output_directory = os.path.join(input_directory, "Final")

# Create the output directory if it does not exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Function to perform moving average baseline correction
def baseline_correction(data, window_size, max_sweeps):
    corrected = data.copy()
    for _ in range(max_sweeps):
        baseline = pd.Series(corrected).rolling(window=window_size, min_periods=1, center=True).mean()
        corrected = data - baseline
        if np.all(np.isclose(corrected, 0, atol=1e-8)):
            break
    return corrected

# Function to find the lowest value among the local negative peaks (troughs)
def find_lowest_trough_value(data, smooth_window=51, polyorder=3, prominence=0.001):
    # Smooth the data to reduce noise
    smoothed_data = savgol_filter(data, window_length=smooth_window, polyorder=polyorder)
    
    # Invert the data to find negative peaks
    inverted_data = -smoothed_data
    peaks, properties = find_peaks(inverted_data, prominence=prominence)
    
    if len(peaks) == 0:
        return None
    trough_values = data[peaks]
    lowest_trough_value = np.min(trough_values)
    return lowest_trough_value, peaks, smoothed_data

# Process each CSV file in the directory
for filename in os.listdir(input_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_directory, filename)
        
        # Load data from CSV file
        data = pd.read_csv(file_path)

        # Assuming the relevant data is in the first column
        # Converting the first column to numeric, forcing errors to NaN
        data_values = pd.to_numeric(data.iloc[:, 0], errors='coerce').dropna().values

        if data_values.size == 0:
            print(f"No valid numeric data in {filename}")
            continue

        # Plot the original data
        plt.figure(figsize=(14, 7))
        plt.plot(data_values, label='Original Data')
        plt.title(f'Original Data - {filename}')
        plt.xlabel('Index')
        plt.ylabel('Value')
        plt.legend()
        plt.savefig(os.path.join(output_directory, f"{filename}_original.png"))
        plt.close()

        # Apply baseline correction
        corrected_data = baseline_correction(data_values, window_size=2, max_sweeps=1001)

        # Find the lowest value among the local negative peaks (troughs)
        result = find_lowest_trough_value(corrected_data)
        
        if result is not None:
            lowest_trough_value, troughs, smoothed_data = result
            
            # Plot the corrected data with troughs marked
            plt.figure(figsize=(14, 7))
            plt.plot(corrected_data, label='Corrected Data')
            plt.plot(troughs, corrected_data[troughs], "x", label='Troughs')
            plt.plot(smoothed_data, label='Smoothed Data', linestyle='--')
            plt.title(f'Corrected Data with Troughs - {filename}')
            plt.xlabel('Index')
            plt.ylabel('Value')
            plt.legend()
            plt.savefig(os.path.join(output_directory, f"{filename}_corrected_with_troughs.png"))
            plt.close()

            # Print the lowest trough value
            print(f"Lowest trough value in {filename}: {lowest_trough_value}")
        else:
            print(f"No troughs found in {filename}")


Lowest trough value in 100mV 100Hz_uA.csv: -0.003984774518114011
Lowest trough value in 100mV 100Hz_µA1.csv: -0.003984774518114011
Lowest trough value in 100mV 100Hz_µA2.csv: -0.04234210942702074
Lowest trough value in 100mV 100Hz_µA3.csv: -0.012720408336429623
Lowest trough value in 100mV 100Hz_µA4.csv: -0.0009634749825361928
Lowest trough value in 100mV 100Hz_µA5.csv: -0.008085323124043465
Lowest trough value in 100mV 100Hz_µA6.csv: -0.024989265182252113
Lowest trough value in 100mV 100Hz_µA7.csv: 0.00030325260604489943
Lowest trough value in 100mV 100Hz_µA8.csv: -0.030736607507972177
Lowest trough value in 100mV 150Hz_uA.csv: -0.01493234000156413
Lowest trough value in 100mV 150Hz_µA1.csv: -0.01493234000156413
Lowest trough value in 100mV 150Hz_µA2.csv: -0.05961470823923865
Lowest trough value in 100mV 150Hz_µA3.csv: -0.020407637695631246
Lowest trough value in 100mV 150Hz_µA4.csv: -0.007167846508965261
Lowest trough value in 100mV 150Hz_µA5.csv: -0.0017084354806225265
Lowest trough