In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import torch
import torch.nn as nn

In [None]:
device = 'mps'

In [None]:
df = pd.read_csv('../Data/Raw/USTotalPrivate.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df = df[(df['DATE'] < '2020-04-01') | (df['DATE'] > '2020-08-01')  ]
df_diff = df.diff(axis = 0)
df_diff['DATE'] = df['DATE']
df_diff
df_diff.rename(columns = {'USPRIV' : 'Total_priv', 'DATE': 'Month'}, inplace = True)
df_diff = df_diff.iloc[1:,:]
df = df_diff
# df['Total_priv_dir'] = df['Total_priv'].apply(lambda x: 1 if x>=0 else -1) 
# df = df[['Month', 'Total_priv_dir']]

# df.isnull().any().any()
# print(df)
# plt.plot(df['Month'], df['Total_priv'])

In [None]:
from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    
    df.set_index('Month', inplace=True)
    
    for i in range(1, n_steps + 1):
        df[f'Total_priv(t-{i})'] = df['Total_priv'].shift(i)
        
    df.dropna(inplace=True)
    
    return df

lookback = 7
shifted_df = prepare_dataframe_for_lstm(df, lookback)
# shifted_df.head(20)

In [None]:
shifted_df_as_np = shifted_df.to_numpy()

# shifted_df_as_np
shifted_df_as_np.shape
# plt.plot(shifted_df_as_np[:,0])

In [None]:
# from sklearn.preprocessing import MinMaxScaler

# scaler = MinMaxScaler(feature_range=(-1, 1))
# shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)

# shifted_df_as_np.shape

# plt.plot(shifted_df_as_np[:,0])

In [None]:
X = shifted_df_as_np[:, 1:]
y = shifted_df_as_np[:, 0]
y_dir = np.sign(y)
y_dir[y_dir == -1] = 0
X = dc(np.flip(X, axis=1)) # because we want to start from the earliest time)
# X

X.shape, y_dir.shape
# print(X)
# print(y_dir)
# plt.plot(y_dir)


In [None]:
split_index = int(len(X) * 0.8)
# split_index
X_train = X[:split_index]
X_test = X[split_index:]

y_train = y_dir[:split_index]
y_test = y_dir[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape
# y_test

In [None]:
#add another dimension in the end for Pytorch LSTM
num_classes = 2
X_train = X_train.reshape((-1, lookback, 1))
X_test = X_test.reshape((-1, lookback, 1))

# y_train = y_train.reshape((-1, 1))
# y_test = y_test.reshape((-1, 1))



X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
X_train = torch.tensor(X_train, dtype = torch.float32)
y_train = torch.tensor(y_train, dtype = torch.float32)
X_test = torch.tensor(X_test, dtype = torch.long)
y_test = torch.tensor(y_test, dtype = torch.long)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]
    
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

In [None]:
train_dataset

In [None]:
#wrap dataset in data loaders to get the batches
from torch.utils.data import DataLoader

batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.dtype, y_batch.dtype)
    break

In [None]:
# rnn for classification

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        # -> x needs to be: (batch_size, seq, input_size)
        
        # or:
        #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        #c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        
        # x: (n, 28, 28), h0: (2, n, 128)
        
        # Forward propagate RNN
        out, _ = self.rnn(x, h0)  
        # or:
        #out, _ = self.lstm(x, (h0,c0))  
        
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        # out: (n, 28, 128)
        
        # Decode the hidden state of the last time step
        out = out[:, -1, :]
        # out: (n, 128)
         
        out = self.fc(out)
        # out: (n, 10)
        out = self.softmax(out)
        return out
    
# num_classes = 2
num_epochs = 10
# batch_size = 16
# learning_rate = 0.001

input_size = 1
# sequence_length = 28
hidden_size = 32
num_layers = 5

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [None]:
learning_rate = 0.001
num_epochs = 10
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
# class LSTM(nn.Module):
#     def __init__(self, input_size, hidden_size, num_stacked_layers):
#         super().__init__()
#         self.hidden_size = hidden_size
#         self.num_stacked_layers = num_stacked_layers

#         self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, 
#                             batch_first=True)
        
#         self.fc = nn.Linear(hidden_size, 1)

#     def forward(self, x):
#         batch_size = x.size(0)
#         h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
#         c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        
#         out, _ = self.lstm(x, (h0, c0))
#         out = self.fc(out[:, -1, :])
#         return out

# model = LSTM(1, 4, 1)
# model.to(device)
# model

In [None]:
def train_one_epoch():
    model.train(True)
    print(f'Epoch: {epoch + 1}')
    running_loss = 0.0
    
    for batch_index, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        # print(y_batch)
        
        output = model(x_batch)
        # print(output)
        y_batch = y_batch.long()
        loss = loss_function(output, y_batch)
        running_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch_index % 20 == 19:  # print every 100 batches
            avg_loss_across_batches = running_loss / 100
            print('Batch {0}, Loss: {1:.3f}'.format(batch_index+1,
                                                    avg_loss_across_batches))
            running_loss = 0.0
    print()

In [None]:
def validate_one_epoch():
    model.train(False) #evaluation mode
    running_loss = 0.0
    
    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        x_batch = x_batch.float()
        y_batch = y_batch.long()
        
        with torch.no_grad(): #not calculating gradients because we're not updating model
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss.item()

    avg_loss_across_batches = running_loss / len(test_loader)
    
    print('Val Loss: {0:.3f}'.format(avg_loss_across_batches))
    print('***************************************************')
    print()

In [None]:
# batch= next(iter(train_loader))
# X_batch, y_batch = batch[0].to(device), batch[1].to(device)
# print(X_batch.shape)
# print(y_batch.shape)
# output = model(X_batch)
# print(output)
# print(output.shape)
# print(y_batch.shape)
# loss_function = nn.NLLLoss()
# print(y_batch)
# loss = loss_function(output, y_batch)

In [None]:
for epoch in range(num_epochs):
    train_one_epoch()
    validate_one_epoch()

In [None]:
with torch.no_grad():
    predicted = model(X_train.to(device)).to('cpu').numpy()
# print(predicted)
# print(type(predicted))
max_ind = np.argmax(predicted, axis = 1)
# print(max_ind.shape)
# print(max_ind)
# print(type(y_train))
# print(y_train)
# print(max_ind)
error = np.sum(abs(y_train.numpy() - max_ind)) / len(max_ind)
# print(error)
print(f"training data error is {error}")

# plt.plot(y_train, label='Actual Close')
# plt.plot(max_ind, label='Predicted Close')
# plt.xlabel('Month')
# plt.ylabel('Total private employment')
# plt.legend()
# plt.show()

In [None]:
with torch.no_grad():
    predicted = model(X_test.to(device)).to('cpu').numpy()
# print(predicted)
# print(type(predicted))
max_ind = np.argmax(predicted, axis = 1)
# print(max_ind.shape)
# print(max_ind)
# print(type(y_train))
# print(y_train)
# print(max_ind)
error = np.sum(abs(y_test.numpy() - max_ind)) / len(max_ind)
# print(error)
print(f"test data error is {error}")

In [None]:
# train_predictions = predicted.flatten()

# #get the original scale
# dummies = np.zeros((X_train.shape[0], lookback+1))
# dummies[:, 0] = train_predictions
# dummies = scaler.inverse_transform(dummies)

# train_predictions = dc(dummies[:, 0])
# # train_predictions

In [None]:
# dummies = np.zeros((X_train.shape[0], lookback+1))
# dummies[:, 0] = y_train.flatten()
# dummies = scaler.inverse_transform(dummies)

# new_y_train = dc(dummies[:, 0])
# # new_y_train

In [None]:
# plt.plot(y_train, label='Actual')
# plt.plot(train_predictions, label='Predicted')
# plt.xlabel('Month')
# plt.ylabel('Total private')
# plt.title('Training data')
# plt.legend()
# plt.show()

In [None]:
# test_predictions = model(X_test.to(device)).detach().cpu().numpy().flatten()

# dummies = np.zeros((X_test.shape[0], lookback+1))
# dummies[:, 0] = test_predictions
# dummies = scaler.inverse_transform(dummies)

# test_predictions = dc(dummies[:, 0])
# # test_predictions

In [None]:
# dummies = np.zeros((X_test.shape[0], lookback+1))
# dummies[:, 0] = y_test.flatten()
# dummies = scaler.inverse_transform(dummies)

# new_y_test = dc(dummies[:, 0])
# new_y_test

In [None]:
# plt.plot(new_y_test, label='Actual')
# plt.plot(test_predictions, label='Predicted')
# plt.xlabel('Month')
# plt.ylabel('Total private')
# plt.title('Testing data')
# plt.legend()
# plt.show()