In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import math 

In [2]:
''' 
    Read dataset for all 24 weeks
'''

data = np.zeros((48384, 144))

for i in range(1,25):

    # Format the number with leading zeros
    number = f"{i:02d}"
    
    # Path of file
    base_path = f"data/X{number}/X{number}"

    # Load file
    temp = np.loadtxt(base_path)

    # Keep first 144 entries from each line 
    temp = temp[:, :144]

    # Append data    
    data[(i-1)*2016:2016*i, :] = temp


In [4]:
from sklearn.preprocessing import MinMaxScaler

'''
    Matrix Normalization
'''

scaler = MinMaxScaler(feature_range=(0, 1)) 

def normalize_matrix(scaler, dataset):

    dataset_norm = np.zeros(dataset.shape)

    for i in range(len(dataset)):
        row = np.reshape(dataset[i, :], (144,1))
        row = scaler.fit_transform(row)

        dataset_norm[i, :] = np.reshape(row, (144,))

    return dataset_norm

dataset_norm = normalize_matrix(scaler, data)

In [7]:
''' 
    Apply train test split
'''
def train_test_split(dataset, train_ratio):

    train_size = int(len(dataset) * train_ratio)

    train_data = dataset[0:train_size,:] 
    test_data = dataset[train_size:len(dataset),:] 

    return train_data, test_data


train_data, test_data = train_test_split(dataset_norm, 0.8)

In [8]:
''' 
    Windowing the dataset
'''

def create_dataset(dataset, window_size): 
    dataX, dataY = [], [] 
    for i in range(len(dataset)-window_size): 
        a = dataset[i:i+window_size, :] 
        dataX.append(a) 
        dataY.append(dataset[i + window_size, :]) 
    return np.array(dataX), np.array(dataY) 

trainX, trainY= create_dataset(train_data, 10) 
testX, testY = create_dataset(test_data, 10) 

In [15]:
''' 
    Defining hyperparameters
'''

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

input_size = trainX.shape[2] # Number of features in input
hidden_size = trainX.shape[1]  # Number of features in hidden state
output_size = 144  # Number of output classes 
learn_rate = 0.001 
epochs = 100
num_layers = 1
batch_size = 32
shuffle = False #don't want to lose the time dependency
num_workers = 4  # Number of subprocesses to use for data loading

cpu


In [11]:
''' 
    Define datasets and dataloaders
'''

train_dataset = torch.utils.data.TensorDataset(torch.FloatTensor(trainX),
                                                 torch.Tensor(trainY))


train_loader= torch.utils.data.DataLoader(train_dataset, batch_size=1,
                                             num_workers=num_workers,
                                             shuffle=shuffle)

test_dataset = torch.utils.data.TensorDataset(torch.FloatTensor(testX),
                                                 torch.Tensor(testY))


test_loader= torch.utils.data.DataLoader(test_dataset, batch_size=1,
                                             num_workers=num_workers,
                                             shuffle=shuffle)

In [12]:
''' 
    Define the LSTM Model
'''

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # Initialize cell state with zeros
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out


In [13]:
''' 
    Define training loop
'''

import time

def train(model, train_loader, epochs, criterion, optimizer):
    ''' Train ML Model''' 

    print_interval = 5
    track_losses = np.zeros(epochs)
    start = time.time()

    for epoch in range(epochs): 
        for inputs, targets in train_loader: 
            
            # Pass data to LSTM
            optimizer.zero_grad()
            outputs = model(inputs)
            
            # Compute the loss
            loss = criterion(outputs, targets)

            # Compute the gradient and update the network parameters
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        training_loss = loss.item()
        track_losses[epoch] = training_loss 

        if (epoch) % (print_interval-1) == 0:
            print('epoch: %4d training loss:%10.3e time:%7.1f'%(epoch, training_loss, time.time()-start))

    return track_losses


In [None]:
''' 
    Define optimizer, loss function, and call model
'''
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Create optimizer 
optimizer = optim.Adam(model.parameters(), lr=learn_rate)

# Create loss function
criterion = nn.MSELoss()

In [None]:
track_losses_norm1 = train(model_norm1, train_loader_norm1, epochs, criterion)  