In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

import pandas as pd

torch.set_printoptions(precision=5, threshold=10000, linewidth=120)

In [2]:
class Dataset(Dataset):
    def __init__(self, features_file, label_file):
        self.features = pd.read_csv(features_file)
        self.labels = pd.read_csv(label_file)

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        tensor_features = torch.tensor(self.features.iloc[idx].values, dtype=torch.float)
        tensor_labels = torch.tensor(self.labels.iloc[idx].values, dtype=torch.float)

        return tensor_features, tensor_labels

In [3]:
dataset = Dataset('H1_AI_Dataset/H1_Features_24.csv', 'H1_AI_Dataset/H1_Labels_S_24.csv')
dataloader = DataLoader(dataset, batch_size=64, shuffle=False)

## Below we investigate the first batch of Features & Labels

In [4]:
for features, labels in dataloader:  # This correctly unpacks each batch into features and labels
    print("Features:\n", features)
    print("Labels:\n", labels)
    print("Shape of features:", features.shape)  
    print("Shape of labels:", labels.shape)  
    break

Features:
 tensor([[ 35.67262,  43.53362,  23.17281,  23.71524,  24.66010,  35.24099,  23.52699,  34.52103,  43.55336,  40.99237,
          28.83355,  23.15656,  33.56032,  26.32076,  25.95865,  42.81598,  33.93731,  23.12867,  22.80414,  26.21095,
          33.61465,  23.18116,  46.01272,  56.35895,  32.58302],
        [ 43.53362,  23.17281,  23.71524,  24.66010,  35.24099,  23.52699,  34.52103,  43.55336,  40.99237,  28.83355,
          23.15656,  33.56032,  26.32076,  25.95865,  42.81598,  33.93731,  23.12867,  22.80414,  26.21095,  33.61465,
          23.18116,  46.01272,  56.35895,  32.58302,  23.47134],
        [ 23.17281,  23.71524,  24.66010,  35.24099,  23.52699,  34.52103,  43.55336,  40.99237,  28.83355,  23.15656,
          33.56032,  26.32076,  25.95865,  42.81598,  33.93731,  23.12867,  22.80414,  26.21095,  33.61465,  23.18116,
          46.01272,  56.35895,  32.58302,  23.47134,  23.07064],
        [ 23.71524,  24.66010,  35.24099,  23.52699,  34.52103,  43.55336,  40.9

In [5]:
class MLP(nn.Module):
    def __init__(self, input_size=25, hidden_size=128, output_size=6):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        
        self.fc5 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.2)
        
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc4(x))
        x = F.dropout(x, p=0.2)
        
        x = self.fc5(x)  
        return x

In [6]:
class RMSE(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss

In [7]:
# Instantiate the model, optimizer, and loss function
model = MLP()
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_function = RMSE()

In [8]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MLP(
  (fc1): Linear(in_features=25, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=128, bias=True)
  (fc5): Linear(in_features=128, out_features=6, bias=True)
)

In [9]:
# Training loop
num_epochs = 4
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_idx, (data, targets) in enumerate(dataloader):
        data, targets = data.to(device), targets.to(device)

        # Forward pass
        outputs = model(data)
        loss = loss_function(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}')

Epoch [1/4], Loss: 48.0721
Epoch [2/4], Loss: 47.0637
Epoch [3/4], Loss: 46.4648
Epoch [4/4], Loss: 46.6740


## Shows mean values for each column & sum mean value for row
* This is done to get an idea of the loss function

In [11]:
total_sum = torch.zeros(6)  # 6 columns in your labels
total_count = 0

for _, labels in dataloader:
    total_sum += labels.sum(dim=0)  # Sum each column across the batch
    total_count += labels.size(0)  # Count the number of samples


column_means = total_sum / total_count
print("For each column, mean of data points:", column_means)

row_mean = column_means.sum()
print("For row, sum of data points mean:", row_mean)

For each column, mean of data points: tensor([ 9.43126,  7.43596, 16.72972, 22.29979,  4.64119,  6.20794])
For row, sum of data points mean: tensor(66.74588)
