In [3]:
#import libraries
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import DataLoader
from Dataset import CustomTimeSeriesDataset
from torch.utils.data import random_split
import torch.nn.functional as F
import torch
import os

In [17]:
#Initialize dataset object and do train validation split

#Fix random seed for reproducibility
torch.manual_seed(1)
np.random.seed(1)

# Assuming CustomTimeSeriesDataset is your Dataset class
dataset = CustomTimeSeriesDataset('ptbdb_train.csv')

# Define the split sizes. In this case, 70% for training and 30% for validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Use random_split to split the dataset into training and validation sets
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

#Initialize dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)

In [12]:
#Define my LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))  
        out = self.fc(out[:, -1, :])
        out = F.sigmoid(out)
        return out

In [20]:
#Prepare for training
#Initialize model, loss function and optimizer
model = LSTM(input_size=1, hidden_size=100, num_layers=2)

#Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Number of epochs
n_epochs = 5

#Sent model to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model.to(device)

cuda


LSTM(
  (lstm): LSTM(1, 100, num_layers=2, batch_first=True)
  (fc): Linear(in_features=100, out_features=1, bias=True)
)

In [21]:
# Loop over the dataset multiple times
for epoch in range(n_epochs):
    model.train()  # Set model to training mode

    # Training loop
    for i, (inputs, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            inputs = inputs.to(device)
            labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    # Validation loop
    model.eval()  # Set model to evaluation mode
    total = 0
    correct = 0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(val_loader):
            if torch.cuda.is_available():
                inputs = inputs.to(device)
                labels = labels.to(device)

            outputs = model(inputs)
            predicted = torch.round(outputs)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()


    print(f'Epoch {epoch+1}/{n_epochs}, Validation Accuracy: {100 * correct / total}%')

print('Finished Training')


# Save the model parameters
torch.save(model.state_dict(), os.path.join('Model_Parameters', 'RNN1_parameters.pth'))

Epoch 1/5, Validation Accuracy: 72.2508591065292%
Epoch 2/5, Validation Accuracy: 72.2508591065292%
Epoch 3/5, Validation Accuracy: 72.2508591065292%
Epoch 4/5, Validation Accuracy: 72.2508591065292%
Epoch 5/5, Validation Accuracy: 72.2508591065292%
Finished Training


In [16]:
from sklearn.metrics import roc_auc_score

#Prepare test dataset
dataset_test = CustomTimeSeriesDataset('ptbdb_test.csv')
test_loader = DataLoader(dataset_test, batch_size=64, shuffle=False)

#Load the model parameters
model = LSTM(input_size=1, hidden_size=200, num_layers=5)
model.load_state_dict(torch.load(os.path.join('Model_Parameters', 'RNN1_parameters.pth')))
model.to(device)

# Test loop
model.eval()  # Set model to evaluation mode
all_preds = []
all_labels = []
with torch.no_grad():
    for i, (inputs, labels) in enumerate(test_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)

        # Store predictions and true labels
        all_preds.extend(outputs.detach().cpu().numpy())
        all_labels.extend(labels.detach().cpu().numpy())

# Compute ROC AUC for the test set
roc_auc = roc_auc_score(all_labels, all_preds)

print(f'Test ROC AUC: {roc_auc}')

Test ROC AUC: 0.5053588166856647
