# Strain-Stress Prediction:Task 2
Use the baseline data to train a first model and then investigate how accurate its predictions are. Experiment with an arbitrary split of the baseline data into training/test data, and more systematic splitting. Does the accuracy change if you train only on the extreme force values, i.e. minimum, maximum, and some value(s) in between compared to a random split? What does that imply for the generation of training data?

In [42]:
import pandas as pd
from collections import defaultdict
import torch
import torch.nn as nn
import torch.optim as optim
import os
import numpy as np
import random
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
from typing import Tuple
from tqdm.auto import tqdm


from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, median_absolute_error

In [43]:
preprocessed_folder = "preprocessed"

In [46]:
X = df_out[['X', 'Y']].values
y = df_out['Seqv'].values.reshape(-1, 1)

In [47]:
# Step 2: Define the neural network model
model = nn.Sequential(
    nn.Linear(2, 196), # changed input to 2
    nn.ReLU(),
    nn.Linear(196, 196),
    nn.ReLU(),
    nn.Linear(196, 196),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(196, 392),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(392, 1) # changed outpu to 1
)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.L1Loss()



In [59]:
# Step 3: Split the data into train and test sets

# Case 1: Train on the entire dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Case 2: Train on extreme force values
extreme_values = df_out[(df_out['Seqv'] == df_out['Seqv'].min()) | (df_out['Seqv'] == df_out['Seqv'].max())]
print(extreme_values)
X_extreme = extreme_values[['X', 'Y']].values
y_extreme = extreme_values['Seqv'].values.reshape(-1, 1)

# Case 3: Train on random split
X_random_train, X_random_test, y_random_train, y_random_test = train_test_split(X, y, test_size=0.2, random_state=42)



        X       Y       EPTOeqv       EPELeqv  EPPLeqv     Seqv
9    60.0  100.00  5.374000e-06  5.374000e-06      0.0  1.06800
12   40.0  100.00  5.374000e-06  5.374000e-06      0.0  1.06800
77   50.0  114.30  5.601000e-07  5.601000e-07      0.0  0.08842
220  50.0   85.66  5.601000e-07  5.601000e-07      0.0  0.08842


In [49]:
# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)

X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

X_extreme_tensor = torch.FloatTensor(X_extreme)
y_extreme_tensor = torch.FloatTensor(y_extreme)

X_random_train_tensor = torch.FloatTensor(X_random_train)
y_random_train_tensor = torch.FloatTensor(y_random_train)

X_random_test_tensor = torch.FloatTensor(X_random_test)
y_random_test_tensor = torch.FloatTensor(y_random_test)

In [50]:
# Step 4: Training loop
num_epochs = 100

In [51]:
# Function to train the model
def train_model(model, X_train, y_train, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    return loss.item()



In [52]:
# Training for the entire dataset
for epoch in range(num_epochs):
    loss = train_model(model, X_train_tensor, y_train_tensor, optimizer, criterion)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss:.4f}')


Epoch [1/100], Loss: 2.7098
Epoch [2/100], Loss: 5.0246
Epoch [3/100], Loss: 3.1923
Epoch [4/100], Loss: 1.1362
Epoch [5/100], Loss: 1.4684
Epoch [6/100], Loss: 1.5241
Epoch [7/100], Loss: 1.0702
Epoch [8/100], Loss: 0.6155
Epoch [9/100], Loss: 0.5473
Epoch [10/100], Loss: 0.5893
Epoch [11/100], Loss: 0.5050
Epoch [12/100], Loss: 0.3853
Epoch [13/100], Loss: 0.3629
Epoch [14/100], Loss: 0.3608
Epoch [15/100], Loss: 0.2972
Epoch [16/100], Loss: 0.2463
Epoch [17/100], Loss: 0.2259
Epoch [18/100], Loss: 0.2169
Epoch [19/100], Loss: 0.1954
Epoch [20/100], Loss: 0.1837
Epoch [21/100], Loss: 0.1733
Epoch [22/100], Loss: 0.1613
Epoch [23/100], Loss: 0.1521
Epoch [24/100], Loss: 0.1490
Epoch [25/100], Loss: 0.1433
Epoch [26/100], Loss: 0.1324
Epoch [27/100], Loss: 0.1302
Epoch [28/100], Loss: 0.1304
Epoch [29/100], Loss: 0.1230
Epoch [30/100], Loss: 0.1307
Epoch [31/100], Loss: 0.1166
Epoch [32/100], Loss: 0.1149
Epoch [33/100], Loss: 0.1141
Epoch [34/100], Loss: 0.1106
Epoch [35/100], Loss: 0

In [53]:
# Evaluation for the entire dataset
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    median_error = median_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    min_error = min(abs(y_test_tensor.numpy() - predictions.numpy()))
    max_error = max(abs(y_test_tensor.numpy() - predictions.numpy()))

print("Evaluation for entire dataset:")
print(f"Mean Absolute Error: {mae}")
print(f"Median Absolute Error: {median_error}")
print(f"Min Error: {min_error}")
print(f"Max Error: {max_error}")


Evaluation for entire dataset:
Mean Absolute Error: 0.041159648448228836
Median Absolute Error: 0.014511823654174805
Min Error: [0.00028819]
Max Error: [0.4192473]


In [54]:
# Training for extreme force values
for epoch in range(num_epochs):
    loss = train_model(model, X_extreme_tensor, y_extreme_tensor, optimizer, criterion)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss:.4f}')

Epoch [1/100], Loss: 0.4793
Epoch [2/100], Loss: 0.4753
Epoch [3/100], Loss: 0.5187
Epoch [4/100], Loss: 0.5672
Epoch [5/100], Loss: 0.4446
Epoch [6/100], Loss: 0.4623
Epoch [7/100], Loss: 0.5148
Epoch [8/100], Loss: 0.4799
Epoch [9/100], Loss: 0.4995
Epoch [10/100], Loss: 0.4843
Epoch [11/100], Loss: 0.4841
Epoch [12/100], Loss: 0.5022
Epoch [13/100], Loss: 0.4813
Epoch [14/100], Loss: 0.4787
Epoch [15/100], Loss: 0.5010
Epoch [16/100], Loss: 0.5144
Epoch [17/100], Loss: 0.4957
Epoch [18/100], Loss: 0.4803
Epoch [19/100], Loss: 0.4946
Epoch [20/100], Loss: 0.5592
Epoch [21/100], Loss: 0.4835
Epoch [22/100], Loss: 0.5055
Epoch [23/100], Loss: 0.5086
Epoch [24/100], Loss: 0.5144
Epoch [25/100], Loss: 0.4774
Epoch [26/100], Loss: 0.4511
Epoch [27/100], Loss: 0.5739
Epoch [28/100], Loss: 0.6094
Epoch [29/100], Loss: 0.5363
Epoch [30/100], Loss: 0.4219
Epoch [31/100], Loss: 0.3130
Epoch [32/100], Loss: 0.3962
Epoch [33/100], Loss: 0.3815
Epoch [34/100], Loss: 0.6751
Epoch [35/100], Loss: 0

In [55]:
# Evaluation for extreme force values
model.eval()
with torch.no_grad():
    predictions_extreme = model(X_test_tensor)
    mae_extreme = mean_absolute_error(y_test_tensor.numpy(), predictions_extreme.numpy())
    median_error_extreme = median_absolute_error(y_test_tensor.numpy(), predictions_extreme.numpy())
    min_error_extreme = min(abs(y_test_tensor.numpy() - predictions_extreme.numpy()))
    max_error_extreme = max(abs(y_test_tensor.numpy() - predictions_extreme.numpy()))

print("\nEvaluation for extreme force values:")
print(f"Mean Absolute Error: {mae_extreme}")
print(f"Median Absolute Error: {median_error_extreme}")
print(f"Min Error: {min_error_extreme}")
print(f"Max Error: {max_error_extreme}")




Evaluation for extreme force values:
Mean Absolute Error: 0.1412920504808426
Median Absolute Error: 0.12290212512016296
Min Error: [0.00233743]
Max Error: [0.52977157]


In [56]:
# Training for random split
for epoch in range(num_epochs):
    loss = train_model(model, X_random_train_tensor, y_random_train_tensor, optimizer, criterion)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss:.4f}')

# Evaluation for random split
model.eval()
with torch.no_grad():
    predictions_random = model(X_random_test_tensor)
    mae_random = mean_absolute_error(y_random_test_tensor.numpy(), predictions_random.numpy())
    median_error_random = median_absolute_error(y_random_test_tensor.numpy(), predictions_random.numpy())
    min_error_random = min(abs(y_random_test_tensor.numpy() - predictions_random.numpy()))
    max_error_random = max(abs(y_random_test_tensor.numpy() - predictions_random.numpy()))




Epoch [1/100], Loss: 0.2337
Epoch [2/100], Loss: 0.2327
Epoch [3/100], Loss: 0.1971
Epoch [4/100], Loss: 0.1662
Epoch [5/100], Loss: 0.1589
Epoch [6/100], Loss: 0.1522
Epoch [7/100], Loss: 0.1457
Epoch [8/100], Loss: 0.1242
Epoch [9/100], Loss: 0.1268
Epoch [10/100], Loss: 0.1185
Epoch [11/100], Loss: 0.1193
Epoch [12/100], Loss: 0.1156
Epoch [13/100], Loss: 0.1139
Epoch [14/100], Loss: 0.1065
Epoch [15/100], Loss: 0.0966
Epoch [16/100], Loss: 0.1003
Epoch [17/100], Loss: 0.0907
Epoch [18/100], Loss: 0.0855
Epoch [19/100], Loss: 0.0808
Epoch [20/100], Loss: 0.0816
Epoch [21/100], Loss: 0.0768
Epoch [22/100], Loss: 0.0765
Epoch [23/100], Loss: 0.0749
Epoch [24/100], Loss: 0.0738
Epoch [25/100], Loss: 0.0708
Epoch [26/100], Loss: 0.0690
Epoch [27/100], Loss: 0.0688
Epoch [28/100], Loss: 0.0657
Epoch [29/100], Loss: 0.0627
Epoch [30/100], Loss: 0.0623
Epoch [31/100], Loss: 0.0598
Epoch [32/100], Loss: 0.0610
Epoch [33/100], Loss: 0.0589
Epoch [34/100], Loss: 0.0580
Epoch [35/100], Loss: 0

In [57]:
print("\nEvaluation for random split:")
print(f"Mean Absolute Error: {mae_random}")
print(f"Median Absolute Error: {median_error_random}")
print(f"Min Error: {min_error_random}")
print(f"Max Error: {max_error_random}")


Evaluation for random split:
Mean Absolute Error: 0.04025224223732948
Median Absolute Error: 0.014662891626358032
Min Error: [0.00019494]
Max Error: [0.40988392]


# Observations:

Evaluation for random split:
Mean Absolute Error: 0.04025224223732948
Median Absolute Error: 0.014662891626358032
Min Error: [0.00019494]
Max Error: [0.40988392]


Evaluation for extreme force values:
Mean Absolute Error: 0.1412920504808426
Median Absolute Error: 0.12290212512016296
Min Error: [0.00233743]
Max Error: [0.52977157]

