# Making an LSTM with Pytorch
### By: Lela Bones and Adam Jump

## Resources
   - [Pytorch Documentation](https://pytorch.org/docs/stable/index.html)
   - [Sine LSTM Example](https://github.com/pytorch/examples/tree/master/time_sequence_prediction)
   - [Noisy Sine LSTM Example](https://gist.github.com/spro/ef26915065225df65c1187562eca7ec4)
   - [LSTM tutorial](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/recurrent_neural_network/main.py#L38-L56)

### Goal
------------------------
In this notebook we aim to dissect the Pytorch library, build a LSTM model, and test in on sample data.
Then we plan to train it on our brainwave data and run a thorough parameter test to determine what the best parameters are for brainwave data.

### Step One: Generating toy data


In [10]:
#Bringing in libraries we plan on using
import numpy as np
import torch

np.random.seed(2)

T = 20 #number we stretch/shrink our cos wave by
L = 1000 #number of inputs
N = 100 #shape of input

#Creating an empty array that is N X L
x = np.empty((N, L), 'float64')
#APopulates array with X variables
x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1)
#Performing the cos function on the data and normalizing it
data = np.cos(x/1/T).astype('float64')
#saving the data in a pickle file
torch.save(data, open('traindata.pt', 'wb'))

### Step Two: Make an LSTM

This is a very basic implemenation of an LSTM, we plan on making this more robust in the future.

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
#Initialized the class for the LSTM
# nn.Module is a built in base class for all nueral network models
class Sequence(nn.Module):
    #Initializes the LSTM
    def __init__(self):
        #We Intialize with super so we have access to the nn.Module
        super(Sequence, self).__init__()
        #Initializes an LSTMCell(input_size, hidden_size)
        self.lstm1 = nn.LSTMCell(1, 51) #We are training the first space 
        self.lstm2 = nn.LSTMCell(51, 51) #We are training the first half
        #Applies a linear transformation to the data of y=x(A^T)+b
        self.linear = nn.Linear(51, 1) #We are tranforming lstm2 into a linear wave
    
    #This is the feed-forward function where we default our prediction to 0
    def forward(self, input, future = 0):
        outputs = [] #We initialize the array for our output
        #Initializing the hidden state(batch, hidden_size) for each element in the batch
        #It is defaulted to 0 because it wasn't provided 
        h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        #Initializing the cell state(batch, hidden_size) for each element in the batch
        #It is also defaulted to 0 if not provided
        c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        #Storing the next hidden state (batch, hidden_size) for each element in the batch
        h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
        ##Storing the next cell state (batch, hidden_size) for each element in the batch
        c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
    
        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
                    h_t, c_t = self.lstm1(input_t, (h_t, c_t))
                    h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
                    output = self.linear(h_t2)
                    outputs += [output]
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs


if __name__ == '__main__':
    # set random seed to 0
    np.random.seed(0)
    torch.manual_seed(0)
    # load data and make training set
    data = torch.load('traindata.pt')
    input = torch.from_numpy(data[3:, :-1])
    target = torch.from_numpy(data[3:, 1:])
    test_input = torch.from_numpy(data[:3, :-1])
    test_target = torch.from_numpy(data[:3, 1:])
    # build the model
    seq = Sequence()
    seq.double()
    criterion = nn.MSELoss()
    # use LBFGS as optimizer since we can load the whole data to train
    optimizer = optim.LBFGS(seq.parameters(), lr=0.8)
    #begin to train
    for i in range(15):
        print('STEP: ', i)
        def closure():
            optimizer.zero_grad()
            out = seq(input)
            loss = criterion(out, target)
            print('loss:', loss.item())
            loss.backward()
            return loss
        optimizer.step(closure)
        # begin to predict, no need to track gradient here
        with torch.no_grad():
            future = 1000
            pred = seq(test_input, future=future)
            loss = criterion(pred[:, :-future], test_target)
            print('test loss:', loss.item())
            y = pred.detach().numpy()
        # draw the result
        plt.figure(figsize=(30,10))
        plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
        plt.xlabel('x', fontsize=20)
        plt.ylabel('y', fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        def draw(yi, color):
            plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
            plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
        draw(y[0], 'r')
        draw(y[1], 'g')
        draw(y[2], 'b')
        plt.savefig('predict%d.pdf'%i)
    plt.close()
#     checkpoint = {'n_hidden': seq.n_hidden,
#               'n_layers': net.n_layers,
#               'state_dict': net.state_dict(),
#               'tokens': net.chars}
#     with open('rnn.net', 'wb') as f:
#         torch.save(,f)

STEP:  0
loss: 0.04603479637200151
loss: 0.03541039094816719


KeyboardInterrupt: 

### Step Three: Train LSTM

### Step Four: Predict using trained LSTM

### Step Five: Visualizing LSTM

In [14]:
input = torch.from_numpy(data[3:, :-1])
target = torch.from_numpy(data[3:, 1:])
input.shape
target.shape

torch.Size([12321, 8])