# Milestone 1 : Regression - Predicting seismic collapse capacity

#### Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import metrics

## 1.Data

### 1.1 Import

In [None]:
df = {}
for t in ['train', 'test', 'val'] :
    df[t] = pd.read_csv(f"Data/{t}_set.csv")
df['train'].head(5)

Unnamed: 0,0.01,0.02,0.022,0.025,0.029,0.03,0.032,0.035,0.036,0.04,...,8.5,9.0,9.5,10.0,sa_avg,da5_75,da5_95,fiv3,sa_ratio,sat1_col
0,0.011075,0.011107,0.011132,0.011176,0.011274,0.011288,0.011341,0.011377,0.011422,0.011567,...,0.000247,0.000223,0.000206,0.00019,0.010809,14.484,21.416,2.881797,0.832237,0.78
1,0.15538,0.15305,0.15459,0.156281,0.161038,0.160171,0.159765,0.166164,0.173643,0.174708,...,0.002316,0.002225,0.002106,0.001998,0.031044,14.43,18.27,7.642059,0.961638,1.96
2,0.060774,0.060783,0.06079,0.060795,0.060799,0.0608,0.060804,0.060794,0.060793,0.060798,...,0.013225,0.012226,0.011904,0.011696,0.098425,16.7,35.105,42.218868,1.737888,1.39
3,0.016016,0.016088,0.016106,0.016135,0.016198,0.016218,0.016263,0.016401,0.016445,0.016595,...,0.000192,0.000163,0.000139,0.000124,0.010169,7.32,17.37,2.599605,1.000551,1.97
4,0.030632,0.030699,0.030724,0.030738,0.030785,0.030809,0.030811,0.030818,0.030856,0.031014,...,0.004094,0.00335,0.002638,0.002224,0.037375,28.005,41.635,11.434507,1.37131,1.43


#### Columns options :

In [None]:
X_col = {}
y_col = ['sat1_col']
#Option 1 : All
X_col["All"] = df['test'].columns.to_numpy()
#Option 2 : Reduce to last columns (no sa(T), only the other columns. sa_ratio and sa_avg is there to convey the sa information)
X_col["Reduced"] = ['1.3','sa_avg','da5_75', 'da5_95', 'fiv3', 'sa_ratio']
#Option 3 : Best 15 cols (section 7.)
X_col['Best_15'] = ['sa_avg', 'fiv3', 'da5_95', '0.06', '0.1', 'max_period', 'da5_75', '1.5', '0.07'
, '0.34', '0.32', '0.03', '0.4', '0.13', '9.0']

### Data Expansion : Max Period

In [None]:
for t in ['train', 'test', 'val'] :
    df[t]['max_period'] = df[t][df['test'].columns.difference(X_col["Reduced"]+["max_period"])].idxmax(axis="columns").astype("float")

In [None]:
df['val']['sat1_col']

0       1.65
1       1.29
2       1.40
3       1.90
4       2.34
        ... 
1495    1.00
1496    1.00
1497    1.26
1498    1.30
1499    1.54
Name: sat1_col, Length: 1500, dtype: float64

In [None]:
#Option 3 and 3.1 : Add max_period
X_col["Red_max_period"] = ['sa_avg','da5_75', 'da5_95', 'fiv3', 'sa_ratio', 'max_period']
X_col["All_max_period"] = df['test'].columns.to_numpy()

### Choice of parameters

In [None]:
#Choix :
columns = "All_max_period"

In [None]:
X_train = df['train'][X_col[columns]].to_numpy()
y_train = df['train'][y_col].to_numpy()

X_val = df['val'][X_col[columns]].to_numpy()
y_val = df['val'][y_col].to_numpy()

X_test = df['test'][X_col[columns]].to_numpy()

cols_map = np.array(X_col[columns])

print(X_train.shape)
print(X_test.shape)
print(X_val.shape)
print(cols_map)

(12646, 111)
(3000, 111)
(1500, 111)
['0.01' '0.02' '0.022' '0.025' '0.029' '0.03' '0.032' '0.035' '0.036'
 '0.04' '0.042' '0.044' '0.045' '0.046' '0.048' '0.05' '0.055' '0.06'
 '0.065' '0.067' '0.07' '0.075' '0.08' '0.085' '0.09' '0.095' '0.1' '0.11'
 '0.12' '0.13' '0.133' '0.14' '0.15' '0.16' '0.17' '0.18' '0.19' '0.2'
 '0.22' '0.24' '0.25' '0.26' '0.28' '0.29' '0.3' '0.32' '0.34' '0.35'
 '0.36' '0.38' '0.4' '0.42' '0.44' '0.45' '0.46' '0.48' '0.5' '0.55' '0.6'
 '0.65' '0.667' '0.7' '0.75' '0.8' '0.85' '0.9' '0.95' '1.0' '1.1' '1.2'
 '1.3' '1.4' '1.5' '1.6' '1.7' '1.8' '1.9' '2.0' '2.2' '2.4' '2.5' '2.6'
 '2.8' '3.0' '3.2' '3.4' '3.5' '3.6' '3.8' '4.0' '4.2' '4.4' '4.6' '4.8'
 '5.0' '5.5' '6.0' '6.5' '7.0' '7.5' '8.0' '8.5' '9.0' '9.5' '10.0'
 'sa_avg' 'da5_75' 'da5_95' 'fiv3' 'sa_ratio' 'max_period']


### 1.2 Normalisation

In [None]:
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)

def Normalise(X):
    return (X-mean)/std

X_train = Normalise(X_train)
X_val = Normalise(X_val)
X_test = Normalise(X_test)



### 1.3 Import en Pytorch

https://stackoverflow.com/questions/44429199/how-to-load-a-list-of-numpy-arrays-to-pytorch-dataset-loader

In [None]:
def convert_to_dataloader(x, y=None, batch_size = 10):
    tensor_x = torch.Tensor(x)
    try:
        if y == None:
            dataset = torch.utils.data.TensorDataset(tensor_x)
    except:
        tensor_y = torch.Tensor(y)
        dataset = torch.utils.data.TensorDataset(tensor_x,tensor_y)
    return torch.utils.data.DataLoader(dataset, batch_size = batch_size)

dataload_train = convert_to_dataloader(X_train, y_train)
dataload_val = convert_to_dataloader(X_val, y_val)
dataload_test = convert_to_dataloader(X_test)

## 2. Models

### 2.1 OneLayerNet

In [None]:
class OneLayerNet(nn.Module):
    """1-Layer linear"""
    
    def __init__(self, cols):
        super().__init__()
        self.fc1 = nn.Linear(cols, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.flatten(start_dim=1)
        x = self.fc1(x)
        return x
  
    def predict(self, x: torch.Tensor) -> torch.Tensor:
        y = self.forward(x)
        return y

one_layer_net = OneLayerNet(X_train.shape[1])

### 2.2 ThreeLayerNet

In [None]:
class ThreeLayerNet(nn.Module):
    """3-Layer linear+RELU"""
    
    def __init__(self, cols):
        super().__init__()
        self.fc1 = nn.Linear(cols, cols//2)
        self.fc2 = nn.Linear(cols//2, cols//4)
        self.fc3 = nn.Linear(cols//4, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.flatten(start_dim=1)
        x = self.fc1(x)
        x = self.fc2(F.relu(x))
        x = self.fc3(F.relu(x))
        return x
  
    def predict(self, x: torch.Tensor) -> torch.Tensor:
        y = self.forward(x)
        return y

three_layer_net = ThreeLayerNet(X_train.shape[1])

### 2.3 FourLayerNet

In [None]:
class FourLayerNet(nn.Module):
    """4-Layer linear+RELU + Non-linear"""
    
    def __init__(self, cols):
        super().__init__()
        self.fc1 = nn.Linear(cols, cols//2)
        self.fc2 = nn.Linear(cols//2, cols//4)
        self.fc3 = nn.Linear(2*(cols//4), cols//4)
        self.fc4 = nn.Linear(cols//4, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.flatten(start_dim=1)
        x = self.fc1(x)
        x = self.fc2(F.relu(x))
        x2 = x**2
        x = torch.stack([x, x2], dim=2).flatten(start_dim=1) # Adding x^2 terms
        x = self.fc3(F.relu(x))
        x = self.fc4(F.relu(x))
        return x

    def predict(self, x: torch.Tensor) -> torch.Tensor:
        y = self.forward(x)
        return y

four_layer_net = FourLayerNet(X_train.shape[1])

### 2.4 NonLinearNet

Very Bad Result : Loss goes in dent de scie

In [None]:
class NonLinearNet(nn.Module):
    """Non-linear (first)"""
    
    def __init__(self, cols):
        super().__init__()
        self.fc1 = nn.Linear(cols, cols//4)
        self.fc2 = nn.Linear(4*(cols//4), cols//2)
        self.fc3 = nn.Linear(cols//2, cols//8)
        self.fc4 = nn.Linear(cols//8, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.flatten(start_dim=1)
        x = F.relu(self.fc1(x))
        x = torch.stack([x, x**2, torch.log(x+1e-3), torch.exp(x)], dim=2).flatten(start_dim=1) # Adding x^2, log, exp terms
        x = self.fc2(F.gelu(x))
        x = self.fc3(F.relu(x))
        x = self.fc4(F.relu(x))
        return x

    def predict(self, x: torch.Tensor) -> torch.Tensor:
        y = self.forward(x)
        return y

non_linear_net = NonLinearNet(X_train.shape[1])

In [None]:
class NonLinearNet2(nn.Module):
    """Non-linear (second)"""
    
    def __init__(self, cols):
        super().__init__()
        self.fc1 = nn.Linear(3*cols, cols)
        self.fc2 = nn.Linear(cols, cols//2)
        self.fc3 = nn.Linear(cols//2, cols//4)
        self.fc4 = nn.Linear(cols//4, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.flatten(start_dim=1)
        x2 = torch.hstack((x[:,1:]*x[:,:-1], (x[:,1]*x[:,-1]).view(-1,1))) #xy terms
        x = torch.hstack([x, x**2, x2]) #Adding x^2, xy terms
        x = self.fc1(x)
        x = self.fc2(F.gelu(x))
        x = self.fc3(F.relu(x))
        x = self.fc4(F.relu(x))
        return x

    def predict(self, x: torch.Tensor) -> torch.Tensor:
        y = self.forward(x)
        return y

non_linear_net2 = NonLinearNet2(X_train.shape[1])

## 3. Training

### 3.1 Loss function

In [None]:
loss_fn = nn.MSELoss(reduction="mean")

### 3.2 Training

In [None]:
def train(model: torch.nn.Module, train_loader: torch.utils.data.DataLoader, loss_fn: torch.nn.Module, optimizer: torch.optim.Optimizer, epochs: int):
    
    # Initialize metrics for loss and accuracy
    #loss_metric = metrics.LossMetric()
    
    # Sets the module in training mode (doesn't have any effect here, but good habit to take)
    model.train()
    
    for epoch in range(1, epochs + 1):
        losses = []
        # Progress bar set-up
        #pbar = tqdm(total=len(train_loader), leave=True)
        #pbar.set_description(f"Epoch {epoch}")
        
        # Iterate through data
        for data, target in train_loader:
            
            ### START CODE HERE ###
            
            # Zero-out the gradients
            optimizer.zero_grad()
            
            # Forward pass
            out = model(data)
            
            # Compute loss
            loss = loss_fn(out, target)
            
            # Backward pass
            loss.backward()
            
            # Optimizer step
            optimizer.step()
            
            ### END CODE HERE ###
            
            # Update metrics & progress bar
            #loss_metric.update(loss.item(), data.shape[0])
            #pbar.update()
            losses.append(loss.item())
            
        # End of epoch, show loss and acc
        #pbar.set_postfix_str(f"Train loss: {loss_metric.compute():.3f} | Train acc: {acc_metric.compute() * 100:.2f}%")
        #print(f"Train loss: {loss_metric.compute():.3f}")
        #loss_metric.reset()

        #print(epoch, np.mean(losses))
    return np.mean(losses)

In [None]:
models = [three_layer_net, four_layer_net]
loss_dict = {}

for model in models:
    for name, module in model.named_children():
        module.reset_parameters()
        
for model in models:
    #opti_Adadelta = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0)
    #opti_Adagrad = torch.optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
    opti_Adam = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    opti_AdamW = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
    #opti_SparseAdam = torch.optim.SparseAdam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08)
    opti_Adamax = torch.optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    #opti_ASGD = torch.optim.ASGD(model.parameters(), lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
    #opti_LBFGS = torch.optim.LBFGS(model.parameters(), lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=None)
    #opti_RMSprop = torch.optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    #opti_Rprop = torch.optim.Rprop(model.parameters(), lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50))
    opti_SGD = torch.optim.SGD(model.parameters(), lr=.001, momentum=0, dampening=0, weight_decay=0, nesterov=False)
    #optimizers = [opti_Adadelta, opti_Adagrad, opti_Adam, opti_AdamW, opti_Adamax, opti_ASGD, opti_RMSprop, opti_Rprop, opti_SGD]
    optimizers = [opti_Adam, opti_AdamW, opti_Adamax, opti_ASGD, opti_SGD]
    for optimizer in optimizers:
        loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=200)
        name = str(optimizer).split()[0] + str(model).split()[0]
        loss_dict[name] = loss_train
        print(name, "trained , train loss is", loss_train)
        
        for name, module in model.named_children():
            module.reset_parameters()
        
print(loss_dict)

AdadeltaThreeLayerNet( trained , train loss is 0.15078850221919565
AdagradThreeLayerNet( trained , train loss is 0.1383068047625863
AdamThreeLayerNet( trained , train loss is 0.10467863466237255
AdamWThreeLayerNet( trained , train loss is 0.12521074468512897
AdamaxThreeLayerNet( trained , train loss is 0.10954993677145172
ASGDThreeLayerNet( trained , train loss is 0.11723588932997625
RMSpropThreeLayerNet( trained , train loss is 0.1732360282588912
RpropThreeLayerNet( trained , train loss is 0.2019813600515306
SGDThreeLayerNet( trained , train loss is 0.14246537910236906
AdadeltaFourLayerNet( trained , train loss is 0.15628147209836207
AdagradFourLayerNet( trained , train loss is 0.13729510836509376
AdamFourLayerNet( trained , train loss is 0.11156777737757906
AdamWFourLayerNet( trained , train loss is 0.12283716428833516
AdamaxFourLayerNet( trained , train loss is 0.10380421756240336
ASGDFourLayerNet( trained , train loss is nan
RMSpropFourLayerNet( trained , train loss is 0.3134871259

AttributeError: module 'torch' has no attribute 'hstack'

## 4. Test on validation

In [None]:
def test(model: torch.nn.Module, dataloader: torch.utils.data.DataLoader):

    model.eval()
    losses = []
    
    with torch.no_grad():
        for data, target in dataloader:
            # Forward pass
            out = model(data)
            
            losses.append(loss_fn(out,target).item())
        ### END CODE HERE ###
            
    return np.mean(losses)


In [None]:
model = FourLayerNet(X_train.shape[1])

optimizer = torch.optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=500)
name = str(optimizer).split()[0] + str(model).split()[0]
print(name, "trained , train loss is", loss_train)      
val_loss = test(model, dataload_val)
print(name, "trained , validation loss is", val_loss)      


## Tested out two models with different optimizers

In [None]:
model = FourLayerNet(X_train.shape[1])

opti_Adam = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
opti_AdamW = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
opti_Adamax = torch.optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
opti_ASGD = torch.optim.ASGD(model.parameters(), lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
opti_SGD = torch.optim.SGD(model.parameters(), lr=.001, momentum=0, dampening=0, weight_decay=0, nesterov=False)
optimizers = [opti_Adam, opti_AdamW, opti_Adamax, opti_ASGD, opti_SGD]

for optimizer in optimizers:
    loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=500)
    name = str(optimizer).split()[0] + str(model).split()[0]
    print(name, "trained , train loss is", loss_train)      
    val_loss = test(model, dataload_val)
    print(name, "trained , validation loss is", val_loss)      

        
    for name, module in model.named_children():
            module.reset_parameters()
        


In [None]:
model = ThreeLayerNet(X_train.shape[1])

#opti_Adam = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
opti_AdamW = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
#opti_Adamax = torch.optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
opti_ASGD = torch.optim.ASGD(model.parameters(), lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
opti_SGD = torch.optim.SGD(model.parameters(), lr=.001, momentum=0, dampening=0, weight_decay=0, nesterov=False)
optimizers = [ opti_AdamW, opti_ASGD, opti_SGD]

for optimizer in optimizers:
    loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=500)
    name = str(optimizer).split()[0] + str(model).split()[0]
    print(name, "trained , train loss is", loss_train)      
    val_loss = test(model, dataload_val)
    print(name, "trained , validation loss is", val_loss)      

        
    for name, module in model.named_children():
            module.reset_parameters()
        


### Trying out best optimizers with model

#### Four Layer Network

In [None]:
model = FourLayerNet(X_train.shape[1])
optimizer = opti_AdamW = torch.optim.AdamW(model.parameters(), lr=0.0001, betas=(0.99, 0.9999), eps=1e-07, weight_decay=0.001, amsgrad=False)
loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=800)
name = str(optimizer).split()[0] + str(model).split()[0]
print(name, "trained , train loss is", loss_train)      
val_loss = test(model, dataload_val)
print(name, "trained , validation loss is", val_loss) 


AdamWFourLayerNet( trained , train loss is 0.08069558408582517
AdamWFourLayerNet( trained , validation loss is 0.19486766502261163


In [None]:
model = FourLayerNet(X_train.shape[1])
optimizer =  torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0, dampening=0, weight_decay=0.001, nesterov=False)
loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=900)
name = str(optimizer).split()[0] + str(model).split()[0]
print(name, "trained , train loss is", loss_train)      
val_loss = test(model, dataload_val)
print(name, "trained , validation loss is", val_loss)      

model = FourLayerNet(X_train.shape[1])

optimizer = torch.optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-09, weight_decay=0.0001)
loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=500)
name = str(optimizer).split()[0] + str(model).split()[0]
print(name, "trained , train loss is", loss_train)      
val_loss = test(model, dataload_val)
print(name, "trained , validation loss is", val_loss)

In [None]:
model = FourLayerNet(X_train.shape[1])

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=800)
name = str(optimizer).split()[0] + str(model).split()[0]
print(name, "trained , train loss is", loss_train)      
val_loss = test(model, dataload_val)
print(name, "trained , validation loss is", val_loss)

AdamFourLayerNet( trained , train loss is 0.08266068142047336
AdamFourLayerNet( trained , validation loss is 0.23321783877288302


#### Non Linear Network

In [None]:
model = NonLinearNet(X_train.shape[1])

optimizer = torch.optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
loss_train = train(model, dataload_train, loss_fn, optimizer, epochs=500)
name = str(optimizer).split()[0] + str(model).split()[0]
print(name, "trained , train loss is", loss_train)      
val_loss = test(model, dataload_val)
print(name, "trained , validation loss is", val_loss) 

AdamaxNonLinearNet( trained , train loss is 0.09256233597108027
AdamaxNonLinearNet( trained , validation loss is 1.5539655258134006


## 5. Export Results

In [None]:
model_name = model.__repr__().split('(')[0]

y_test = model(torch.Tensor(X_test))
df_out = pd.DataFrame(y_test.detach().numpy())
df_out.columns = ['sat1_col']
df_out.to_csv(f"submission_Joking_{model_name}.csv")

### 6. Saving results for future purpose


In [None]:
f= open("history.txt", "a+")
f.write(f"""
------------

{model}

Columns : {columns}
VAL LOSS : {val_loss}""")
f.close()

## 7. Exploring weights : identifying which cols are used

In [None]:
weightss = torch.Tensor(list(one_layer_net.parameters())[0]).detach().numpy().reshape((-1))
print(weightss)
print(cols_map[np.argsort(np.abs(weightss))])

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=493ee647-e437-4c81-80f8-96d4eefd9c39' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>