In [None]:
import pandas as pd
import re
import os
import numpy as np
from scipy.interpolate import interp1d
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw
import matplotlib.pyplot as plt

# Function to compute e based on the x-coordinate (Strain) at the rupture point
def compute_e_based_on_rupture_standardized(data_series):
    rupture_strains = np.array([df['Strain (%)'].iloc[-1] for df in data_series])
    mean_rupture_strain = np.mean(rupture_strains)
    std_rupture_strain = np.std(rupture_strains)
    
    e_values = (rupture_strains - mean_rupture_strain) / std_rupture_strain
    return e_values


# Initialize an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['filename', 'sample', 'R1(HA)', 'R2(IA)', 'R3(NVP)', 'R4(AA)', 'R5(HEAA)', 'R6(IBOA)', 'e_value'])

# Load your formula CSV file to a DataFrame
formula_df = pd.read_csv("2022-7-21.csv")


#folder = './final_data'
prefixes = get_prefixes(folder)

all_files_in_folder = set(os.listdir(folder))

# Loop through each prefix
for prefix in prefixes:
    data_series = read_data_from_folder(folder, prefix)
    
    if not data_series:  
        print(f"No data found for prefix {prefix}. Skipping.")
        continue

    # Compute e values based on rupture point Strain
    e_values = compute_e_based_on_rupture_standardized(data_series)

    # For each data file in this prefix
    for idx, e_value in enumerate(e_values):
        sample_number = re.findall(r'\d+', prefix)[0]
        formula_row = formula_df[formula_df['sample'] == int(sample_number)].iloc[0]
        
        filename_to_check = f"{prefix}{idx+1}.csv"
        
        if filename_to_check not in all_files_in_folder:
            print(f"File {filename_to_check} does not exist. Skipping.")
            continue
        
        new_row = {
            'filename': filename_to_check,
            'sample': sample_number,
            'R1(HA)': formula_row['R1(HA)'],
            'R2(IA)': formula_row['R2(IA)'],
            'R3(NVP)': formula_row['R3(NVP)'],
            'R4(AA)': formula_row['R4(AA)'],
            'R5(HEAA)': formula_row['R5(HEAA)'],
            'R6(IBOA)': formula_row['R6(IBOA)'],
            'e_value': e_value
        }
        
        results_df = results_df.append(new_row, ignore_index=True)

# Save the DataFrame to a new CSV file
results_df.to_csv("new_results_with_e_values.csv", index=False)
