In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

import pandas as pd

In [2]:
class Dataset(Dataset):
    def __init__(self, features_file, label_file):
        self.features = pd.read_csv(features_file)
        self.labels = pd.read_csv(label_file)

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        tensor_features = torch.tensor(self.features.iloc[idx].values, dtype=torch.float)
        tensor_labels = torch.tensor(self.labels.iloc[idx].values, dtype=torch.float)

        return tensor_features, tensor_labels

In [9]:
dataset = Dataset('H1_AI_Dataset/H1_Features_Wh.csv', 'H1_AI_Dataset/H1_Labels_Wh.csv')
dataloader = DataLoader(dataset, batch_size=64, shuffle=False)

## Below we investigate the first batch of Features & Labels

In [10]:
for features, labels in dataloader:  # This correctly unpacks each batch into features and labels
    print("Features:\n", features)
    print("Labels:\n", labels)
    print("Shape of features:", features.shape)  
    print("Shape of labels:", labels.shape)  
    break

Features:
 tensor([[ 35.6726,  43.5336,  23.1728,  23.7152,  24.6601,  35.2410,  23.5270,
          34.5210],
        [ 43.5336,  23.1728,  23.7152,  24.6601,  35.2410,  23.5270,  34.5210,
          43.5534],
        [ 23.1728,  23.7152,  24.6601,  35.2410,  23.5270,  34.5210,  43.5534,
          40.9924],
        [ 23.7152,  24.6601,  35.2410,  23.5270,  34.5210,  43.5534,  40.9924,
          28.8335],
        [ 24.6601,  35.2410,  23.5270,  34.5210,  43.5534,  40.9924,  28.8335,
          23.1566],
        [ 35.2410,  23.5270,  34.5210,  43.5534,  40.9924,  28.8335,  23.1566,
          33.5603],
        [ 23.5270,  34.5210,  43.5534,  40.9924,  28.8335,  23.1566,  33.5603,
          26.3208],
        [ 34.5210,  43.5534,  40.9924,  28.8335,  23.1566,  33.5603,  26.3208,
          25.9587],
        [ 43.5534,  40.9924,  28.8335,  23.1566,  33.5603,  26.3208,  25.9587,
          42.8160],
        [ 40.9924,  28.8335,  23.1566,  33.5603,  26.3208,  25.9587,  42.8160,
          33.9373],

In [4]:
class MLP(nn.Module):
    def __init__(self, input_size=8, hidden_size=128, output_size=6):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        
        self.fc5 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.2)
        
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc4(x))
        x = F.dropout(x, p=0.2)
        
        x = self.fc5(x)  
        return x

In [5]:
# Instantiate the model, optimizer, and loss function
model = MLP()
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_function = nn.L1Loss()

In [6]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MLP(
  (fc1): Linear(in_features=8, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=128, bias=True)
  (fc5): Linear(in_features=128, out_features=6, bias=True)
)

In [7]:
# Training loop
num_epochs = 4
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    total_loss = 0
    for batch_idx, (data, targets) in enumerate(dataloader):
        data, targets = data.to(device), targets.to(device)

        # Forward pass
        outputs = model(data)
        loss = loss_function(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}')

Epoch [1/4], Loss: 8.9369
Epoch [2/4], Loss: 8.8397
Epoch [3/4], Loss: 8.7880
Epoch [4/4], Loss: 8.7502


## Below shows mean values for each column & total mean value in Labels
* This is done to get an idea of the loss function

In [8]:
total_sum = torch.zeros(6)  # Assuming there are 6 columns in your labels
total_count = 0

for _, labels in dataloader:
    total_sum += labels.sum(dim=0)  # Sum each column across the batch
    total_count += labels.size(0)  # Count the number of samples (rows)

column_means = total_sum / total_count
print("Column-wise mean values of labels:", column_means)

total_mean = column_means.sum() / column_means.numel()
print("Scalar total mean of labels:", total_mean)

Column-wise mean values of labels: tensor([ 9.4287,  7.4341, 16.7252, 22.2961,  4.6413,  6.2078])
Scalar total mean of labels: tensor(11.1222)
