In [16]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

import pandas as pd

import random
import numpy as np

## Data

In [2]:
class Dataset(Dataset):
    def __init__(self, features_file, label_file):
        self.features = pd.read_csv(features_file)
        self.labels = pd.read_csv(label_file)

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        tensor_features = torch.tensor(self.features.iloc[idx].values, dtype=torch.float)
        tensor_labels = torch.tensor(self.labels.iloc[idx].values, dtype=torch.float)

        return tensor_features, tensor_labels

In [3]:
dataset = Dataset('H1_AI_Dataset/H1_Features_Wh.csv', 'H1_AI_Dataset/H1_Labels_Wh.csv')

In [4]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [5]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## MLP

In [23]:
class MLP(nn.Module):
    def __init__(self, input_size=8, hidden_size=128, output_size=6):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        
        self.fc5 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.2)
        
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc4(x))
        x = F.dropout(x, p=0.2)
        
        x = self.fc5(x)  
        return x

In [24]:
# Instantiate the model, optimizer, and loss function
model = MLP()
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_function = nn.L1Loss()

In [25]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MLP(
  (fc1): Linear(in_features=8, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=128, bias=True)
  (fc5): Linear(in_features=128, out_features=6, bias=True)
)

In [26]:
# Training loop
num_epochs = 4
for epoch in range(num_epochs):

    
    model.train()  # Set the model to training mode
    total_loss = 0
    for batch_idx, (data, targets) in enumerate(train_dataloader):
        data, targets = data.to(device), targets.to(device)

        # Forward pass
        outputs = model(data)
        loss = loss_function(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()


        if batch_idx % 300 == 0:  # Example: Check every 100 batches
            max_grad = 0
            min_grad = np.inf
            for name, param in model.named_parameters():
                if param.requires_grad:
                    # Note: Use param.grad.abs().max() to get the max absolute gradient for this parameter
                    max_grad = max(max_grad, param.grad.abs().max().item())
                    min_grad = min(min_grad, param.grad.abs().min().item())
    
            print(f"Batch {batch_idx}: Max gradient: {max_grad}, Min gradient: {min_grad}")
    
            # Optionally, add logic to detect vanishing/exploding gradients
            if max_grad > 1e+3:  # Example threshold for exploding gradients
                print("Warning: Possible exploding gradients detected.")
            if min_grad < 1e-3:  # Example threshold for vanishing gradients
                print("Warning: Possible vanishing gradients detected.")
        


        
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {total_loss/len(train_dataloader):.4f}')

    

    # Evaluate the model on the test set
    model.eval()  # Set the model to evaluation mode
    total_test_loss = 0
    with torch.no_grad():  # No need to track gradients for validation data
        for batch_idx, (data, targets) in enumerate(test_dataloader):
            data, targets = data.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(data)
            loss = loss_function(outputs, targets)
            
            total_test_loss += loss.item()


            # Randomly print data and targets based on percentage - on last epoch
            if (epoch == num_epochs-1) and (random.random() < 0.1):
                
                # Pick a random item index from the batch
                item_idx = random.randint(0, data.size(0) - 1)

                # Extract and print the data, target, and output for the randomly selected item
                item_target = targets[item_idx]
                item_output = outputs[item_idx]
                
                print(f"Target: {item_target.cpu().numpy()}")
                print(f"Output: {item_output.cpu().numpy()}")

    
    print(f'Epoch [{epoch+1}/{num_epochs}], Test Loss: {total_test_loss/len(test_dataloader):.4f}')

Batch 0: Max gradient: 1.605897307395935, Min gradient: 0.0
Batch 300: Max gradient: 0.5844643712043762, Min gradient: 0.0
Batch 600: Max gradient: 1.8740284442901611, Min gradient: 0.0
Epoch [1/4], Training Loss: 9.1295
Epoch [1/4], Test Loss: 8.2067
Batch 0: Max gradient: 1.1917110681533813, Min gradient: 0.0
Batch 300: Max gradient: 1.1702181100845337, Min gradient: 0.0
Batch 600: Max gradient: 4.928669452667236, Min gradient: 0.0
Epoch [2/4], Training Loss: 9.2605
Epoch [2/4], Test Loss: 8.2574
Batch 0: Max gradient: 2.811589241027832, Min gradient: 0.0
Batch 300: Max gradient: 0.7960371971130371, Min gradient: 0.0
Batch 600: Max gradient: 0.915511965751648, Min gradient: 0.0
Epoch [3/4], Training Loss: 9.2463
Epoch [3/4], Test Loss: 8.2476
Batch 0: Max gradient: 8.931171417236328, Min gradient: 0.0
Batch 300: Max gradient: 1.8302421569824219, Min gradient: 0.0
Batch 600: Max gradient: 5.407120227813721, Min gradient: 0.0
Epoch [4/4], Training Loss: 9.2583
Target: [10.759444  18.58