In [1]:
import numpy as np
import pandas as pd
from joblib import dump, load
from tqdm import tqdm

from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

In [2]:
def shuffle_arrays(arr1, arr2):
    assert len(arr1) == len(arr2)
    permutation = np.random.permutation(len(arr1))
    return arr1[permutation], arr2[permutation]

# Training Dataset

In [3]:
df = pd.read_csv("../files/data/FINAL_CSV/SURFACE_CRACK.csv")
df

Unnamed: 0,c_index,a/c,a/t,c/b,phi,K-T
0,0.0,0.90,0.6,0.3,0.104098,40.318066
1,0.0,0.90,0.6,0.3,0.127231,39.942442
2,0.0,0.90,0.6,0.3,0.150363,39.589036
3,0.0,0.90,0.6,0.3,0.173496,39.258976
4,0.0,0.90,0.6,0.3,0.196628,38.948442
...,...,...,...,...,...,...
322299,2517.0,1.25,0.7,0.4,2.949401,42.138823
322300,2517.0,1.25,0.7,0.4,2.972534,42.570806
322301,2517.0,1.25,0.7,0.4,2.995667,43.018442
322302,2517.0,1.25,0.7,0.4,3.018799,43.480511


In [4]:
# Get unique combinations of the first four columns
SC_train_combinations = df.iloc[:, 1:4].drop_duplicates().to_numpy()

print(len(SC_train_combinations))

print("Different a/c values: ", np.unique(SC_train_combinations[:,0], axis=0))
print("Different a/t values: ", np.unique(SC_train_combinations[:,1], axis=0))
print("Different c/b values: ", np.unique(SC_train_combinations[:,2], axis=0))

2518
Different a/c values:  [0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6  0.65 0.7  0.75 0.8  0.85
 0.9  0.95 1.   1.05 1.1  1.15 1.2  1.25 1.3  1.35 1.4  1.45 1.5  1.55
 1.6  1.65 1.7  1.75 1.8  1.85 1.9  1.95 2.  ]
Different a/t values:  [0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6  0.65 0.7  0.75 0.8  0.85]
Different c/b values:  [0.01 0.1  0.2  0.3  0.4 ]


In [5]:
phi_values = df.iloc[:, 4].drop_duplicates().to_numpy()
print(phi_values)
print(len(phi_values))

[0.10409821 0.12723075 0.1503633  0.17349585 0.1966284  0.21976094
 0.24289349 0.26602604 0.28915858 0.31229113 0.33542368 0.35855622
 0.38168877 0.40482132 0.42795387 0.45108641 0.47421896 0.49735151
 0.52048405 0.5436166  0.56674915 0.5898817  0.61301424 0.63614679
 0.65927934 0.68241188 0.70554443 0.72867698 0.75180952 0.77494207
 0.79807462 0.82120717 0.84433971 0.86747226 0.89060481 0.91373735
 0.9368699  0.96000245 0.983135   1.00626754 1.02940009 1.05253264
 1.07566518 1.09879773 1.12193028 1.14506282 1.16819537 1.19132792
 1.21446047 1.23759301 1.26072556 1.28385811 1.30699065 1.3301232
 1.35325575 1.37638829 1.39952084 1.42265339 1.44578594 1.46891848
 1.49205103 1.51518358 1.53831612 1.56144867 1.58458122 1.60771377
 1.63084631 1.65397886 1.67711141 1.70024395 1.7233765  1.74650905
 1.76964159 1.79277414 1.81590669 1.83903924 1.86217178 1.88530433
 1.90843688 1.93156942 1.95470197 1.97783452 2.00096707 2.02409961
 2.04723216 2.07036471 2.09349725 2.1166298  2.13976235 2.16289

In [6]:
d = df.to_numpy()[:,1:]
d.shape

(322304, 5)

In [7]:
X_train, y_train = shuffle_arrays(d[:,:-1], d[:,-1])
X_train.shape, y_train.shape

((322304, 4), (322304,))

# RFR

In [13]:
rfr = RandomForestRegressor(max_depth=None)
rfr.fit(X_train, y_train)

In [14]:
dump(rfr, '../files/trained_models/SURFACE_CRACK/rfr.joblib')

['../files/trained_models/SURFACE_CRACK/rfr.joblib']

# SVR

In [None]:
svr = SVR()
svr.fit(X_train, y_train)

In [None]:
dump(rfr, '../files/trained_models/SURFACE_CRACK/svr.joblib')

# NN

In [None]:
class Net10(nn.Module):
    def __init__(self, inp_feat, num_neurons):
        super(Net10, self).__init__()
        self.fc0 = nn.Linear(inp_feat, num_neurons)
        self.fc1 = nn.Linear(num_neurons, num_neurons)
        self.fc2 = nn.Linear(num_neurons, num_neurons)
        self.fc3 = nn.Linear(num_neurons, num_neurons)
        self.fc4 = nn.Linear(num_neurons, num_neurons)
        self.fc5 = nn.Linear(num_neurons, num_neurons)
        self.fc6 = nn.Linear(num_neurons, num_neurons)
        self.fc7 = nn.Linear(num_neurons, num_neurons)
        self.fc8 = nn.Linear(num_neurons, num_neurons)
        self.fc9 = nn.Linear(num_neurons, num_neurons)
        self.fc10 = nn.Linear(num_neurons, 1)

    def forward(self, x):
        x = F.leaky_relu(self.fc0(x))
        x = F.leaky_relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x))
        x = F.leaky_relu(self.fc3(x))
        x = F.leaky_relu(self.fc4(x))
        x = F.leaky_relu(self.fc5(x))
        x = F.leaky_relu(self.fc6(x))
        x = F.leaky_relu(self.fc7(x))
        x = F.leaky_relu(self.fc8(x))
        x = F.leaky_relu(self.fc9(x))
        x = self.fc10(x)
        return x
    
def train(net, X_train, y_train, X_val, y_val, loss_fn, optimizer, EPOCHS, FILENAME):
    train_mse = []
    val_mse = []
    epoch_tolerance = 0
    
    for it in tqdm(range(EPOCHS+1)):
        optimizer.zero_grad()
        y_pred = net(X_train)

        loss = loss_fn(y_pred, y_train)

        with torch.no_grad():
            y_val_pred = net(X_val)
            val_loss = loss_fn(y_val_pred, y_val)

        loss.backward()
        optimizer.step()

        if len(val_mse) == 0:
            torch.save(net.state_dict(), "../files/trained_models/SURFACE_CRACK/{}.pt".format(FILENAME))
            train_mse.append(loss.item())
            val_mse.append(val_loss.item())
            epoch_tolerance = 0
        elif val_mse[-1] > val_loss.item():
            torch.save(net.state_dict(), "../files/trained_models/SURFACE_CRACK/{}.pt".format(FILENAME))
            train_mse.append(loss.item())
            val_mse.append(val_loss.item())
            epoch_tolerance = 0
        elif val_mse[-1] < val_loss.item() and epoch_tolerance < 50000:
            epoch_tolerance += 1
            continue
        elif val_mse[-1] < val_loss.item() and epoch_tolerance >= 50000 and it >= 100000:
            print("Exiting loop...")
            print("iteration: {}; Train MSE: {}; Val MSE: {}".format(it, train_mse[-1], val_mse[-1]))
            epoch_tolerance = 0
            break
    print("Train MSE: {}; Val MSE: {}".format(train_mse[-1], val_mse[-1]))
    return train_mse, val_mse

In [9]:
device = 'cuda'

In [10]:
X_train_gpu = torch.FloatTensor(X_train[:int(0.8*len(X_train))]).to(device)
X_val_gpu = torch.FloatTensor(X_train[int(0.8*len(X_train)):]).to(device)

y_train_gpu = torch.FloatTensor(np.expand_dims(y_train[:int(0.8*len(y_train))], axis=-1)).to(device)
y_val_gpu = torch.FloatTensor(np.expand_dims(y_train[int(0.8*len(y_train)):], axis=-1)).to(device)

X_train_gpu.shape, y_train_gpu.shape, X_val_gpu.shape, y_val_gpu.shape

(torch.Size([257843, 4]),
 torch.Size([257843, 1]),
 torch.Size([64461, 4]),
 torch.Size([64461, 1]))

In [None]:
net = Net10(X_train_gpu.shape[1], 100).to(device)
EPOCHS = 250000
loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(net.parameters(), lr=0.0001)


FILENAME = "nn10"
train_loss, val_loss = train_minibatch(net, X_train_gpu, y_train_gpu, X_val_gpu, y_val_gpu, loss_fn, optimizer, EPOCHS, FILENAME, int(len(X_train_gpu)/2))

Training Epochs:   0%|          | 0/250000 [00:00<?, ?it/s]

Training Epochs:   0%|          | 9/250000 [00:27<208:34:34,  3.00s/it]Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7313d44dd4f0>>
Traceback (most recent call last):
  File "/home/rvn/research-projects/SIFBench/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 
Training Epochs:   0%|          | 10/250000 [00:30<211:53:11,  3.05s/it]

Epoch 10/250000, Train Loss: 1045.1994, Val Loss: 1102.4573, Tolerance: 0


Training Epochs:   0%|          | 20/250000 [01:00<203:15:04,  2.93s/it]

Epoch 20/250000, Train Loss: 1371.8456, Val Loss: 1098.4138, Tolerance: 0


Training Epochs:   0%|          | 22/250000 [01:06<205:03:40,  2.95s/it]