In [None]:
import os
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np


folder_path = './DATA/...' 
n_components = 8  # n of PCA

all_data = []
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        filepath = os.path.join(folder_path, filename)
        df = pd.read_csv(filepath)
        x = df['Strain (%)'].values
        y = df['Stress (MPa)'].values
        curve = np.column_stack((x, y))
        all_data.append(curve)

all_data = np.array(all_data)  # shape: (n_files, 50, 2)
all_data = all_data.reshape(-1, 100)  # shape: (n_files, 100)

In [None]:
import matplotlib.pyplot as plt
import numpy as np


#n_components_list = [1,2,3,4, 5,6, 7,8, 9, 11, 15, 20]


#sample_index = 15
#sample_index = 138
sample_index = 99
original_curve = all_data[sample_index].reshape(50, 2)

plt.figure(figsize=(10, 6))
plt.plot(original_curve[:, 0], original_curve[:, 1], label='Original', linewidth=2,alpha=0.5)


for n_components in n_components_list:
    pca = PCA(n_components=n_components)
    all_data_pca = pca.fit_transform(all_data)
    
    all_data_reconstructed = pca.inverse_transform(all_data_pca)
    reconstructed_curve = all_data_reconstructed[sample_index].reshape(50, 2)

    plt.plot(reconstructed_curve[:, 0], reconstructed_curve[:, 1], label=f'Reconstructed (n={n_components})',linewidth=3,alpha=0.5)
ax = plt.gca()  # Get the current axis
plt.xlabel('Strain (%)', fontsize=24, fontweight='bold')
plt.ylabel('Stress (MPa)', fontsize=24, fontweight='bold')
#plt.xlim(left=0, right=220)  # Setting x-axis to start from 0
#plt.ylim(bottom=0, top=16)  # Setting y-axis to start from 0
#plt.xlim(left=0, right=150)  # Setting x-axis to start from 0
#plt.ylim(bottom=0, top=24)  # Setting y-axis to start from 0
plt.xticks(fontsize=20, fontweight='bold')
plt.yticks(fontsize=20, fontweight='bold')
plt.tick_params(axis='x', which='major', length=10, width=2)  # X-axis
plt.tick_params(axis='y', which='major', length=10, width=2)  # Y-axis

legend = plt.legend(fontsize=12, edgecolor='black', loc='upper center', bbox_to_anchor=(0.45, 0.5), fancybox=False, shadow=False, ncol=2)

for text in legend.get_texts():
    text.set_fontweight('bold')
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(2)  # Increase the width of the frame
plt.tight_layout()    
    
#plt.title('Effect of Different PCA Dimensions')
#plt.savefig('Figure2_reconstructed_different_n.png', format='png', dpi=300)
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error


#n_components_list = [2, 4, 6, 8, 10, 15, 20]
#n_components_list = [3, 4,5, 6,7, 8, 10,12, 15,18, 20]
n_components_list = [2,3, 4, 5, 6, 7, 8, 10, 12, 15, 18, 20]
mse_list = []


for n_components in n_components_list:
    pca = PCA(n_components=n_components)
    all_data_pca = pca.fit_transform(all_data)

    all_data_reconstructed = pca.inverse_transform(all_data_pca)
    mse = mean_squared_error(all_data, all_data_reconstructed)
    mse_list.append(mse)

plt.figure(figsize=(10, 6))
ax = plt.gca()
#plt.bar(n_components_list, mse_list, color='blue')
plt.plot(n_components_list, mse_list, marker='o', linestyle='-', color='blue')

plt.xlabel('Number of Components', fontsize=24, fontweight='bold')
plt.ylabel('Mean Squared Error', fontsize=24, fontweight='bold')
#plt.xlim(left=0, right=0.9)  # Setting x-axis to start from 0
#plt.ylim(bottom=0, top=18)  # Setting y-axis to start from 0
plt.xticks(fontsize=20, fontweight='bold')
plt.yticks(fontsize=20, fontweight='bold')
# Bold and lengthen the tick marks
plt.tick_params(axis='x', which='major', length=10, width=2)  # X-axis
plt.tick_params(axis='y', which='major', length=10, width=2)  # Y-axis


#plt.title('Effect of Different PCA Dimensions on Reconstruction Error')
plt.xticks(n_components_list)
plt.grid(True)
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(2)  # Increase the width of the frame
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import PCA


rmse_x_list = []
rmse_y_list = []
#n_components_list = [2,3, 4, 5, 6, 7, 8, 10, 12, 15, 18, 20]
#n_components_list = [3, 4, 5, 6, 7, 8, 10, 12, 15, 18, 20]
n_components_list = [1,2,3, 4, 5, 6, 7, 8, 10, 12,14,16, 18, 20]


for n_components in n_components_list:
    pca = PCA(n_components=n_components)
    all_data_pca = pca.fit_transform(all_data)
    
    all_data_reconstructed = pca.inverse_transform(all_data_pca)
    all_data_reconstructed_reshaped = all_data_reconstructed.reshape(-1, 50, 2)

    mse_x = mean_squared_error(all_data.reshape(-1, 50, 2)[:, :, 0], all_data_reconstructed_reshaped[:, :, 0])
    mse_y = mean_squared_error(all_data.reshape(-1, 50, 2)[:, :, 1], all_data_reconstructed_reshaped[:, :, 1])

    rmse_x = np.sqrt(mse_x)
    rmse_x_list.append(rmse_x)

    rmse_y = np.sqrt(mse_y)
    rmse_y_list.append(rmse_y)
plt.figure(figsize=(10, 6))
ax = plt.gca()
#plt.bar(n_components_list, mse_list, color='blue')
plt.plot(n_components_list, rmse_x_list, marker='o', linestyle='-', color='blue')
plt.xlabel('Number of Components', fontsize=24, fontweight='bold')
plt.ylabel('RMSE on Strain axis', fontsize=24, fontweight='bold')
#plt.xlim(left=0, right=0.9)  # Setting x-axis to start from 0
#plt.ylim(bottom=0, top=18)  # Setting y-axis to start from 0
plt.xticks(fontsize=20, fontweight='bold')
plt.yticks(fontsize=20, fontweight='bold')
# Bold and lengthen the tick marks
plt.tick_params(axis='x', which='major', length=10, width=2)  # X-axis
plt.tick_params(axis='y', which='major', length=10, width=2)  # Y-axis


#plt.title('Effect of Different PCA Dimensions on Reconstruction Error')
plt.xticks(n_components_list)
#plt.grid(True)
#plt.grid(True, linewidth=1.5, axis='x')
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(2)  # Increase the width of the frame
plt.tight_layout()
plt.show()
plt.figure(figsize=(10, 6))
ax = plt.gca()
#plt.bar(n_components_list, mse_list, color='blue')
plt.plot(n_components_list, rmse_y_list, marker='o', linestyle='-', color='blue')

plt.xlabel('Number of Components', fontsize=24, fontweight='bold')
#plt.ylabel('Root Mean Squared Error on Y', fontsize=24, fontweight='bold')
plt.ylabel('RMSE on Stress axis', fontsize=24, fontweight='bold')
#plt.xlim(left=0, right=0.9)  # Setting x-axis to start from 0
#plt.ylim(bottom=0, top=18)  # Setting y-axis to start from 0
plt.xticks(fontsize=20, fontweight='bold')
plt.yticks(fontsize=20, fontweight='bold')
# Bold and lengthen the tick marks
plt.tick_params(axis='x', which='major', length=10, width=2)  # X-axis
plt.tick_params(axis='y', which='major', length=10, width=2)  # Y-axis


#plt.title('Effect of Different PCA Dimensions on Reconstruction Error')
plt.xticks(n_components_list)
#plt.grid(True)
#plt.grid(True, linewidth=1.5, axis='x')
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(2)  # Increase the width of the frame
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA


#n_components_list = [1,2,3, 4, 5, 6, 7, 8, 10, 12, 15, 18, 20]
n_components_list = [1,2,3, 4, 5, 6, 7, 8, 10, 12,14,16, 18, 20]

cumulative_explained_variance_list = []


for n_components in n_components_list:
    pca = PCA(n_components=n_components)
    pca.fit(all_data)  
    explained_variance_ratio = pca.explained_variance_ratio_
    cumulative_explained_variance = np.cumsum(explained_variance_ratio)[-1]
    cumulative_explained_variance_list.append(cumulative_explained_variance)
plt.figure(figsize=(10, 6))

plt.plot(n_components_list, cumulative_explained_variance_list, marker='o', linestyle='-', color='blue',linewidth=3)
ax = plt.gca()  # Get the current axis

plt.xlabel('Number of Principal Components', fontsize=24, fontweight='bold')
plt.ylabel('Cumulative\nExplained Variance', fontsize=24, fontweight='bold')
#plt.title('Cumulative Explained Variance by Different PCA Dimensions')
plt.ylim(bottom=0.95, top=1.005)  # Setting y-axis to start from 0
plt.xticks(n_components_list)
#plt.grid(True, linewidth=1.5)
#plt.grid(True, linewidth=1.5, axis='x')

plt.xticks(fontsize=20, fontweight='bold')
plt.yticks(fontsize=20, fontweight='bold')
# Bold and lengthen the tick marks
plt.tick_params(axis='x', which='major', length=10, width=2)  # X-axis
plt.tick_params(axis='y', which='major', length=10, width=2)  # Y-axis

for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(2)  # Increase the width of the frame
plt.tight_layout()
#plt.savefig('Figure2_PCA_Cumulative_Explained Variance.png', format='png', dpi=300)

plt.show()
cumulative_explained_variance_list


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA

# Use PCA with n=8 components
n_components = 8
pca = PCA(n_components=n_components)
all_data_pca = pca.fit_transform(all_data)

# Randomly select a sample where Strain is greater than 150
high_strain_indices = [i for i, sample in enumerate(all_data.reshape(-1, 50, 2)) if (max(sample[:, 0]) > 150)]

# If such samples are found
if high_strain_indices:
    # Randomly choose one of these samples
    random_index = np.random.choice(high_strain_indices)
    
    # Get the PCA values for this sample
    sample_pca_values = all_data_pca[random_index]

# Create a new figure
plt.figure(figsize=(15, 10))

# Define percentages to try
percentages = [0.1, 0.5, 1, 2, 10, 15, 20]

# Process each principal component
for i in range(n_components):
    plt.subplot(3, 3, i+1)
    
    # Plot the original data
    original_curve = all_data[random_index].reshape(50, 2)
    plt.plot(original_curve[:, 0], original_curve[:, 1], label='Original', linewidth=2)
    
    # Get the original PCA values
    original_pca_values = np.copy(sample_pca_values)
    
    # Process each percentage
    for percentage in percentages:
        modified_pca_values = np.copy(original_pca_values)
        
        # Modify the value of the current principal component
        modified_pca_values[i] *= 1 + (percentage / 100)
        
        # Reconstruct the data
        modified_reconstructed_data = pca.inverse_transform(modified_pca_values)
        modified_reconstructed_curve = modified_reconstructed_data.reshape(50, 2)
        
        # Plot the modified reconstructed data
        plt.plot(modified_reconstructed_curve[:, 0], modified_reconstructed_curve[:, 1], label=f'Modified (PC{i+1} + {percentage}%)', linestyle='--')
    
    plt.title(f'Effect of Modifying PC{i+1}')
    plt.xlabel('Strain (%)')
    plt.ylabel('Stress (MPa)')
    plt.legend()

plt.tight_layout()
plt.show()
