In [1]:
import torch
import csv
import os
import random
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, random_split
import pandas as pd
import sys  # Import the sys module
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import uuid
import itertools
import torch.nn as nn
import torch.nn.init as init

sys.path.append('..')
from dataset_reader import Traces_Dataset

In [2]:
# making training reproducible
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [25]:
# load and process dataset 
dataset = Traces_Dataset('../dataset2mil.csv')
dataset.split_dataset(0.9, 0.1, 0)
# dataset.clean_features()
dataset.find_mean_std()
dataset.normalize()
print(dataset.inputs.shape)
# history_dict['normalize_mean'] = dataset.train_mean.tolist()
# history_dict['normalize_std'] = dataset.train_std.tolist()
# history_dict['dataset'] = (dataset.inputs.shape[0], dataset.inputs.shape[1])

# initialize train, val, test set
X_train = dataset[dataset.train_set.indices][0]
Y_train = dataset[dataset.train_set.indices][1]

X_val = dataset[dataset.val_set.indices][0]
Y_val = dataset[dataset.val_set.indices][1]

X_test = dataset[dataset.test_set.indices][0]
Y_test = dataset[dataset.test_set.indices][1]


torch.Size([2000000, 329])


In [96]:
setup_train = X_train[:, :9]
traces_train = X_train[:, 8:]
setup_val = X_val[:, :9]
traces_val = X_val[:, 9:]
setup_train.shape, traces_train.shape

In [60]:
# load model RNN parameters
def rnn_add_dim(inputs, n_points=20, n_traces=16): 
    '''
    adding a dim to dataset, default = 2, so theres one channel for time, one for current
    '''
    return inputs.reshape(-1, n_traces, n_points).transpose(1,2)


traces_train_transformed = rnn_add_dim(traces_train)
traces_val_transformed = rnn_add_dim(traces_val)
traces_train_transformed.shape, traces_val_transformed.shape

(torch.Size([1800000, 20, 16]), torch.Size([200000, 20, 16]))

In [108]:
class RNN_MLP_Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, mlp_hidden_size, output_size):
        super(RNN_MLP_Model, self).__init__()
        
        # Define the RNN layer (LSTM in this case)
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Define the MLP layers
        self.mlp = nn.Sequential(
            nn.Linear(hidden_size + 9, mlp_hidden_size),
            nn.ReLU(),
            nn.Linear(mlp_hidden_size, output_size)
        )
        
    def forward(self, x):
        x_setup = x[:, :9]
        x_traces = x[:, 9:]
        
        # Forward pass through the RNN layer
        _, (h_n, _) = self.rnn(rnn_add_dim(x_traces))
        
        # Extract the hidden state from the last time step
        rnn_output = h_n[-1]
        
        # Forward pass through the MLP layers
        # print(torch.cat((x_setup, rnn_output), dim=1).shape)
        output = self.mlp(torch.cat((x_setup, rnn_output), dim=1))
        
        return output

    def initialize_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)

In [109]:
# set up model hyper-parameters
input_size = 16 # numer of traces times 2
hidden_size = 128
n_layers = 1
mlp_hidden_size = 64
n_classes = 7 # 8 parameters to estimate

In [110]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Uncomment this line
print(f"Using device: {device}")

Using device: cuda


In [111]:
# initialize NN model
model = RNN_MLP_Model(input_size, hidden_size, n_layers, mlp_hidden_size, n_classes).to(device)
model.initialize_weights()

In [112]:
# training parameters
n_epochs = 300   # number of epochs to run
batch_size = 1024  # size of each batch

In [113]:
# initialize dataloader 
train_dataset = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(X_val, Y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [114]:
# loss function and optimizer
loss_fn = nn.MSELoss()  # mean square error
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)

In [None]:
# initialization train, val losses
train_losses = []
val_losses = []
best_validation_loss = float('inf')

# Training loop
for epoch in range(1, n_epochs + 1):
    model.train()  # Set the model to training mode
    total_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass
        loss = loss_fn(outputs, labels)  # Calculate the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        total_loss += loss.item()
    # Average training loss for the epoch
    avg_train_loss = total_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Validation loop
    model.eval()  # Set the model to evaluation mode
    total_val_loss = 0.0

    with torch.no_grad():  # Disable gradient calculation during validation
        # validation
        for val_inputs, val_labels in val_loader:
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
            val_outputs = model(val_inputs)
            val_loss = loss_fn(val_outputs, val_labels)
            total_val_loss += val_loss.item()

    # Average validation loss for the epoch
    avg_val_loss = total_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    print(f'{epoch}: train-{avg_train_loss}, val-{avg_val_loss}')

    if avg_val_loss < best_validation_loss:
        best_epoch = epoch
        # model_path = checkpoint(model, f"model_{unique_id}.pth")
        best_training_loss = avg_train_loss
        best_validation_loss = avg_val_loss

1: train-55.83905145820904, val-20.65707439305831
2: train-19.354117458590874, val-18.579236935596075
3: train-13.806811706469192, val-11.718442148091842
4: train-10.112605646066156, val-7.8259642197161305
5: train-8.73757749721323, val-5.922453607831683
6: train-8.071302918443907, val-17.13266634941101
7: train-7.612460662887365, val-5.399930688799644
8: train-6.949658720295313, val-5.5169393772981605
9: train-6.899972063960747, val-7.69943198135921
10: train-6.1565974343216325, val-3.9842091859603417
11: train-5.2785491171686045, val-4.057454382886692
12: train-4.452463392918427, val-3.165950312906382
13: train-3.9544196187161478, val-4.262090083287687
14: train-3.75137394110633, val-2.9680109644422727
15: train-6.56616749962849, val-24.310941715629735
16: train-17.67979163331519, val-13.67090985726337
17: train-10.546148637862743, val-18.345007925617452
18: train-7.998585707227253, val-8.187169291535202
19: train-7.0207163848160885, val-9.160915866190074
20: train-6.1770203041677725

In [None]:
# record training, validationg losses, weight updates, and the result model path
history_dict['best_epoch'] = best_epoch
history_dict['best_val'] = best_validation_loss
history_dict['best_train'] = best_training_loss
history_dict['training_loss'] = train_losses
history_dict['validation_loss'] = val_losses

for history in history_dict:
    print(f'{history}: {history_dict[history]}\n')