In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Function to find the lowest point using a 3-point peak picking process
def find_lowest_point(data):
    n = len(data)
    lowest_value = data[0]  # Initialize with the first value
    lowest_index = 0
    
    # Iterate through the data to find the lowest point
    for i in range(1, n-1):
        if data[i] < data[i-1] and data[i] < data[i+1]:
            if data[i] < lowest_value:
                lowest_value = data[i]
                lowest_index = i
    
    return lowest_index, lowest_value

# Function to interpolate the baseline
def interpolate_baseline(data):
    if len(data) > 210:
        left_segment_mean = np.mean(data[:150])  # Mean of the first 150 points
        right_segment_mean = np.mean(data[-50:])  # Mean of the last 50 points
    else:
        left_segment_mean = np.mean(data[:20])  # Mean of the first 20 points
        right_segment_mean = np.mean(data[-20:])  # Mean of the last 20 points
    
    baseline_slope = (right_segment_mean - left_segment_mean) / (len(data) - 1)
    baseline_intercept = left_segment_mean
    
    baseline = baseline_intercept + baseline_slope * np.arange(len(data))
    
    return baseline

# Folder containing the CSV files
folder_path = r"C:\Users\kulma\OneDrive\Documents\School\MASc Year 1\Surface Profilometry Analyzer\Surface-Profilometry-Analyzer\Peak picking algorithm\extracted_csv_files\all_output_files"

# Initialize an empty list to store the results
results = []

# Loop through each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        data_frame = pd.read_csv(file_path)

        # Ensure that the CSV has exactly one column
        if data_frame.shape[1] != 1:
            raise ValueError(f"The CSV file {filename} must contain exactly one column.")

        # Convert data to numpy array and ensure all values are numeric
        data = pd.to_numeric(data_frame.iloc[:, 0], errors='coerce').values

        # Remove NaN values that may have been introduced by non-numeric entries
        data = data[~np.isnan(data)]

        # Debugging print statements
        print(f"Processing file: {filename}")
        print(f"Data loaded from CSV (first 10 values): {data[:10]}")
        print(f"Number of data points: {len(data)}")

        # Find the lowest point
        lowest_index, lowest_value = find_lowest_point(data)

        # Interpolate the baseline
        baseline = interpolate_baseline(data)

        # Calculate the normalized value of the lowest peak
        normalized_value = lowest_value - baseline[lowest_index]

        # Append the result to the list
        results.append((filename, normalized_value))

        # Plotting the data with the interpolated baseline
        plt.figure(figsize=(10, 6))
        plt.plot(data, marker='o', linestyle='-', color='b', label='Data')
        plt.plot(baseline, linestyle='--', color='r', label='Baseline')
        plt.scatter(lowest_index, lowest_value, color='g', s=100, label='Lowest Point')
        plt.vlines(lowest_index, lowest_value, baseline[lowest_index], color='r', linestyle='--')

        plt.title(f'Normalized Peak Relative to Interpolated Baseline - {filename}')
        plt.xlabel('Index')
        plt.ylabel('Value')
        plt.legend()

        plt.grid(True)
        plt.tight_layout()

        # Save the plot as a PNG file
        plot_filename = os.path.join(folder_path, f"{os.path.splitext(filename)[0]}_plot.png")
        plt.savefig(plot_filename)
        plt.close()  # Ensure the plot is closed properly

        print(f"Lowest value found: {lowest_value}")
        print(f"Normalized value of the lowest peak: {normalized_value}")

# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['Filename', 'Normalized Value'])
results_df.set_index('Filename', inplace=True)

# Save the results to a CSV file
results_csv = os.path.join(folder_path, "normalized_values.csv")
results_df.to_csv(results_csv)

# Print the results
print("\nNormalized values of the lowest peaks:")
print(results_df)


Processing file: cleaned_100mV 100Hz_µA_1.csv
Data loaded from CSV (first 10 values): [-3.8497591  -3.74247074 -3.69764805 -3.65807056 -3.61563206 -3.57557774
 -3.52884769 -3.5083437  -3.46161366 -3.42680454]
Number of data points: 504
Lowest value found: -3.84975910186768
Normalized value of the lowest peak: -1.840893840789799
Processing file: cleaned_100mV 100Hz_µA_2.csv
Data loaded from CSV (first 10 values): [-2.68293381 -2.64764786 -2.63143539 -2.6199913  -2.60568619 -2.59567261
 -2.582798   -2.57564545 -2.56563187 -2.55275726]
Number of data points: 504
Lowest value found: -2.68293380737305
Normalized value of the lowest peak: -0.6217972755432153
Processing file: cleaned_100mV 100Hz_µA_3.csv
Data loaded from CSV (first 10 values): [-1.11461163 -1.0860014  -1.07789516 -1.06597424 -1.06072903 -1.05453014
 -1.04213238 -1.03688717 -1.0244894  -1.00970745]
Number of data points: 504
Lowest value found: -1.11461162567139
Normalized value of the lowest peak: -0.47837044397990247
Process