# Lets try a LTSM model, this is a kind of recurrent model that remembers the previous states and uses them to work out the next one. 

In [None]:
#Deep learning library of choice PyTorch

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split

# for number-crunching

import numpy as np
import scipy.stats as stats
import pandas as pd

# Time to check that the gpu optimization is actually helping

import time 

# Some graphing


In [None]:
# use GPU

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

# Count the number of CPU cores available, this returns the number of threads, because most cpus have a thread count equal to twice their core count I have halved the count when multithreading
# Set this number to one if you dont want to multi thread the process

cpuCount = os.cpu_count()
print(cpuCount)

In [None]:
#Pull the data from the CSV

#This value will allow you to change how many periods back the data will consider. for example if you select 1 it will only look at the previous period, Maximum value is currently set to 8.
#if you want to try over more data periods a greater number of datalet length will need to be prepped in the data prep folders. By default a maximum of 8

dataperiods = 8

df = pd.read_csv('../dataletswgv/datalet{}.csv'.format(dataperiods+1),index_col=False) 

In [None]:
# First we are going to split the data into three parts, evaluation, test and training data. I do this with two instances of train_test_split, just for the sake of convinience.

traintemp_data,eval_data, traintemp_labels,eval_labels = train_test_split(data, labels, test_size=.01)

train_data,test_data, train_labels,test_labels = train_test_split(traintemp_data, traintemp_labels, test_size=.011)

#then we are going to pass the data to the Pytorch data loader, this is going to allow us to split it into mini batches that will be run through the model.

train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

train_data = train_data
test_data = test_data

#Best to keep batches to powers of two for speed reasons adjust as needed for your own memory constraints 
x = 11
batches   = 2**x
train_loader = DataLoader(train_data,batch_size=batches,shuffle=False,drop_last=True, num_workers=(int(cpuCount/2)))
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0]) 

In [None]:
class LSTMnet(nn.Module):
  def __init__(self,input_size,num_hidden,num_layers):
    super().__init__()

    # store parameters
    self.input_size = input_size
    self.num_hidden = num_hidden
    self.num_layers = num_layers

    # RNN Layer
    self.lstm1 = nn.LSTM(input_size,num_hidden,num_layers)
    self.lstm2 = nn.LSTM(num_hidden,num_hidden,num_layers)
    # linear layer for output
    self.out = nn.Linear(num_hidden,4)
  
  def forward(self,x):
    # Initializing hidden state for first input with zeros
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_num).requires_grad_()

    # Initializing cell state for first input with zeros
    c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_num).requires_grad_()

    # We need to detach as we are doing truncated backpropagation through time (BPTT)
    # If we don't, we'll backprop all the way to the start even after going through another batch
    # Forward propagation by passing in the input, hidden state, and cell state into the model
    out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

    # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
    # so that it can fit into the fully connected layer
    out = out[:, -1, :]

    # Convert the final state to our desired output shape (batch_size, output_dim)
    out = self.fc(out)

In [None]:
def trainthemodel(learning):
	
	# Loss function and optimizer, I chose cross entropy loss as it is best for classification problems. 
	lossfun = nn.CrossEntropyLoss()
	optimizer = torch.optim.SGD(model.parameters(),lr=learning)
	
	#initialize losses
	losses = torch.zeros(numofepochs)
	trainAcc = []
	testAcc = []

	model.to(device)
	
	#now lets actually loop over the training epochs to train the model
	for epoch in range(numofepochs):
		
		# switch on training mode
		model.train()

		# loop over training data batches
		batchAcc  = []
		batchLoss = []
		for X,y in train_loader:

			X = X.to(device)
			y = y.to(device)
			
			# forward pass and loss
			yHat = model(X)
			loss = lossfun(yHat,y)

			# backprop
			optimizer.zero_grad()
			loss.backward()
			optimizer.step()

			# loss from this batch
			batchLoss.append(loss.item())

			yHat = yHat.cpu()
			y = y.cpu()

			# compute training accuracy for this batch
			batchAcc.append( 100*torch.mean((torch.argmax(yHat,axis=1) == y).float()).item() )
			
		# now that we've trained through the batches, get their average training accuracy
		trainAcc.append( np.mean(batchAcc)) 

		# and get average losses across the batches
		losses[epoch] = np.mean(batchLoss)
		
		### test accuracy

		# Lets turn eval back on so we dont overfit with the test data 
		model.eval()
		X,y = next(iter(test_loader)) # extract X,y from test dataloader

		X = X.to(device)
		y = y.to(device)  

		with torch.no_grad(): # deactivates autograd
			yHat = model(X)
   
		yHat = yHat.cpu()
		y = y.cpu()   

		testAcc.append( 100*torch.mean((torch.argmax(yHat,axis=1) == y).float()) )

		print('epoch {} done at time {} '.format(epoch,time.perf_counter()))


	# function output
	return trainAcc,testAcc,losses,model

In [None]:
# Time to run the model, first we need to input parameters, you might want to change the number of epochs if it isn't reaching the level of accuracy desired.

input_dim = df.shape[1]-3
output_dim = 5
numofepochs = 4
learningrate = 0.01


model = ANN(Input_dim = input_dim,Output_dim = output_dim)
trainAcc,testAcc,losses,model = trainthemodel(learningrate)