In [None]:
import os
import re


main_folder_path = "5-51 data"


def check_filename(file_name):
    file_name_without_extension = os.path.splitext(file_name)[0].lower()
    cleaned_name = re.sub(r'[0-9\.,-]', '', file_name_without_extension)
    words = re.findall(r'\b\w+\b', cleaned_name)

    for word in words:
        if word not in ["pick", "represent","pickrepresent"]:
            return True
    return False


for folder_name in os.listdir(main_folder_path):
    subfolder_path = os.path.join(main_folder_path, folder_name)

    if os.path.isdir(subfolder_path):
        print(f"check folder: {folder_name}")

        for file_name in os.listdir(subfolder_path):

            if check_filename(file_name):
                print(f"bad file: {file_name} in folder {folder_name}")


In [None]:
import os
import shutil

def copy_files_with_keyword(src_folder, dst_folder, keyword):

    for subdir, dirs, files in os.walk(src_folder):
        for file in files:

            if keyword in file:

                src_file_path = os.path.join(subdir, file)
                relative_path = os.path.relpath(subdir, src_folder)
                dst_file_path = os.path.join(dst_folder, relative_path, file)


                os.makedirs(os.path.dirname(dst_file_path), exist_ok=True)

                shutil.copy(src_file_path, dst_file_path)

src_folder = '5-51 data' 
dst_folder = '5-51 data_sort'  
keyword = 'pick'  

copy_files_with_keyword(src_folder, dst_folder, keyword)


In [None]:
import pandas as pd
import os

# Path to the main folder containing subfolders
main_folder_path = "5-51 data_sort"

# Read the CSV file containing the sample information
sample_data = pd.read_csv("2022-7-21.csv")

# Create a DataFrame to store the final result
final_data = pd.DataFrame()

# Iterate through all subfolders in the main folder
for folder_name in os.listdir(main_folder_path):
    subfolder_path = os.path.join(main_folder_path, folder_name)
    if os.path.isdir(subfolder_path):
        print(folder_name)
        
        # Iterate through the 5 log files in each subfolder
        for file_number in range(1, 6):
            # Find the file that starts with the specific file number
            file_list = [f for f in os.listdir(subfolder_path) if f.startswith(f"{file_number}-")]
            
            if file_list:  # Check if the file list is not empty
                log_file_name = file_list[0]
                log_file_path = os.path.join(subfolder_path, log_file_name)

                # Remove the file extension before splitting
                log_file_name_without_extension = os.path.splitext(log_file_name)[0]

                # Extract the cross-sectional area and stretch length from the file name
                _, cross_section_area, stretch_length, *_ = log_file_name_without_extension.split('-')
                cross_section_area = float(cross_section_area)
                stretch_length = float(stretch_length)
                
                # Read the log file
                log_data = pd.read_csv(log_file_path, skiprows=5, sep="\t")
                
                # Calculate stress and strain
                log_data['Stress (MPa)'] = log_data['Load'] / cross_section_area
                log_data['Strain (%)'] = (log_data['Travel'] / stretch_length) * 100
                
                # Extract the sample information from the CSV file
                sample_info = sample_data[sample_data['sample'] == int(folder_name)].iloc[0]
                
                # Combine the sample information with the stress and strain data
                combined_data = pd.concat([pd.Series(sample_info)] * len(log_data), axis=1).T
                combined_data[['Stress (MPa)', 'Strain (%)']] = log_data[['Stress (MPa)', 'Strain (%)']]
                
                # Append to the final DataFrame
                final_data = final_data.append(combined_data, ignore_index=True)

# Save the final DataFrame to a CSV file
#final_data.to_csv("final_data.csv", index=False)


In [None]:
final_data

In [None]:
import pandas as pd
import os

# Path to the main folder containing subfolders
main_folder_path = "5-51 data_sort"

# Create a directory to store the processed CSV files
output_folder_path = "processed_data_onlypicked"
os.makedirs(output_folder_path, exist_ok=True)

# Iterate through all subfolders in the main folder
for folder_name in os.listdir(main_folder_path):
    subfolder_path = os.path.join(main_folder_path, folder_name)
    if os.path.isdir(subfolder_path):
        print(f"Processing folder: {folder_name}")

        # Iterate through the 5 log files in each subfolder
        for file_number in range(1, 10):
            file_list = [f for f in os.listdir(subfolder_path) if f.startswith(f"{file_number}-")]
            
            if file_list:  # Check if the file list is not empty
                log_file_name = file_list[0]
                log_file_path = os.path.join(subfolder_path, log_file_name)
                print(f"Processing file: {log_file_name}")  # Print the file being processed

                # Remove the file extension before splitting
                log_file_name_without_extension = os.path.splitext(log_file_name)[0]

                # Extract the cross-sectional area and stretch length from the file name
                
                _, cross_section_area, stretch_length, *_ = log_file_name_without_extension.split('-')
                cross_section_area = float(cross_section_area)
                stretch_length = float(stretch_length)

                # Read the log file
                log_data = pd.read_csv(log_file_path, skiprows=5, sep="\t")

                # Calculate stress and strain
                log_data['Stress (MPa)'] = log_data['Load'] / cross_section_area
                log_data['Strain (%)'] = (log_data['Travel'] / stretch_length) * 100

                # Save the stress and strain data to a CSV file with a new name
                output_file_name = f"{folder_name}_{file_number}.csv"
                output_file_path = os.path.join(output_folder_path, output_file_name)
                log_data[['Stress (MPa)', 'Strain (%)']].to_csv(output_file_path, index=False)



print("Processing complete!")


In [None]:
import matplotlib.pyplot as plt

# Path to the folder containing processed CSV files
processed_folder_path = "processed_data_onlypicked"

# Iterate through all CSV files in the processed folder
for file_name in os.listdir(processed_folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(processed_folder_path, file_name)
        
        # Read the CSV file
        data = pd.read_csv(file_path)

        # Remove rows where both Stress and Strain are 0
        data = data[(data['Stress (MPa)'] != 0) | (data['Strain (%)'] != 0)]

        # Plotting the stress-strain curve
        plt.figure(figsize=(10, 6))
        plt.plot(data['Strain (%)'], data['Stress (MPa)'], label="Stress-Strain Curve")
        plt.title(f"Stress-Strain Curve for {file_name.split('.')[0]}")
        plt.xlabel("Strain (%)")
        plt.ylabel("Stress (MPa)")
        plt.legend()
        plt.grid(True)

        # Save the plot as an image
        image_file_name = file_name.split('.')[0] + '.png'
        image_file_path = os.path.join(processed_folder_path, image_file_name)
        plt.savefig(image_file_path)
        plt.close()

        # Save the cleaned CSV file
        data.to_csv(file_path, index=False)

print("Cleaning and plotting complete!")


In [None]:
import pandas as pd
import numpy as np
import os

# Path to the folder containing processed CSV files
processed_folder_path = "processed_data_onlypicked"

# Path to the folder where the final CSV files will be saved
final_folder_path = "final_data_onlypicked"
os.makedirs(final_folder_path, exist_ok=True) # Ensure the folder exists

# Number of interpolation points (excluding the last point)
num_interpolation_points = 49

# Iterate through all CSV files in the processed folder
for file_name in os.listdir(processed_folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(processed_folder_path, file_name)
        
        # Read the CSV file
        data = pd.read_csv(file_path)

        # Calculate the unit length between adjacent points in the original data (excluding the last point)
        unit_length = np.mean(np.diff(data['Strain (%)'].iloc[:-1]))

        # Interpolate the stress-strain curve to have 19 points (excluding the last point)
        strain_interpolated = np.linspace(data['Strain (%)'].iloc[0], data['Strain (%)'].iloc[-2], num_interpolation_points)
        stress_interpolated = np.interp(strain_interpolated, data['Strain (%)'].iloc[:-1], data['Stress (MPa)'].iloc[:-1])

        # Create the final data with 20 points, including the last point with 0 stress, using the calculated unit length
        final_strain = np.append(strain_interpolated, strain_interpolated[-1] + unit_length) 
        final_stress = np.append(stress_interpolated, 0)

        # Create a DataFrame with the final data
        final_data = pd.DataFrame({
            'Strain (%)': final_strain,
            'Stress (MPa)': final_stress
        })

        # Create a new file name and path for the final folder
        new_file_name = "final_" + file_name
        final_file_path = os.path.join(final_folder_path, new_file_name)

        # Save the final CSV file
        final_data.to_csv(final_file_path, index=False)

print("Interpolation and data modification complete!")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Path to the folder where the final CSV files are saved
final_folder_path = "final_data_onlypicked"

# Iterate through all CSV files in the final folder
for file_name in os.listdir(final_folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(final_folder_path, file_name)
        
        # Read the CSV file
        data = pd.read_csv(file_path)

        # Plotting the stress-strain curve
        plt.figure(figsize=(10, 6))
        plt.plot(data['Strain (%)'], data['Stress (MPa)'], label="Stress-Strain Curve")
        plt.title(f"Stress-Strain Curve for {file_name.split('.')[0]}")
        plt.xlabel("Strain (%)")
        plt.ylabel("Stress (MPa)")
        plt.legend()
        plt.grid(True)

        # Save the plot as an image
        image_file_name = file_name.split('.')[0] + '.png'
        image_file_path = os.path.join(final_folder_path, image_file_name)
        plt.savefig(image_file_path)
        plt.close()

print("Plotting and saving complete!")


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os

# Read the raw material ratio data
ratio_data_path = "2022-7-21.csv"
ratio_data = pd.read_csv(ratio_data_path)
# Select only the required columns
ratio_data = ratio_data[['sample', 'R1(HA)', 'R2(IA)', 'R3(NVP)', 'R4(AA)', 'R5(HEAA)', 'R6(IBOA)']]

# Path to the folder containing the final CSV files
final_folder_path = "final_data_onlypicked"

# Lists to store the input and output data
inputs = []
outputs = []

# Iterate through all CSV files in the final folder
for file_name in os.listdir(final_folder_path):
    if file_name.startswith('final_') and file_name.endswith('.csv'):
        print(file_name)
        file_path = os.path.join(final_folder_path, file_name)
        
        # Read the CSV file for stress-strain curve
        stress_strain_curve_data = pd.read_csv(file_path)

        # Extract sample number from file name
        sample_number = int(file_name.split('_')[1])

        # Find the corresponding raw material ratios
        R_values = ratio_data[ratio_data['sample'] == sample_number].iloc[0, 1:].values

        # Flatten the stress-strain curve
        stress_strain_curve = stress_strain_curve_data[['Stress (MPa)', 'Strain (%)']].values.flatten()

        # Append to the input and output lists
        inputs.append(R_values)
        outputs.append(stress_strain_curve)

# Convert to numpy arrays and ensure float type
X = np.array(inputs, dtype=float)
y = np.array(outputs, dtype=float)

# Split into training and testing sets
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data preparation complete!")
