<a href="https://colab.research.google.com/github/rsethi21/recurrentNeuralNetwork/blob/main/practiceLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Tutorial from StatQuest: https://www.youtube.com/watch?v=RHGiXPuo_pI

Explanation: https://colah.github.io/posts/2015-08-Understanding-LSTMs/

Explanation of number of units and layers: https://datascience.stackexchange.com/questions/24909/the-model-of-lstm-with-more-than-one-unit

IDPs Motivation Paper: https://pkhlab.sites.luc.edu/papers/Myofilament-associated_proteins.pdf

Adapt to incoporate multivariate data

# Create Model

In [None]:
# installations
!pip3 install lightning

In [None]:
# imports
import torch # allows us to create tensors for inputs and weights
import torch.nn as nn # allows us to combine all tensors into a neural network
import torch.nn.functional as f # activation functions
from torch.optim import Adam # fit neural network to data; similar to SGD but faster

import lightning as L # allows to streamline training process
from torch.utils.data import TensorDataset, DataLoader # allows us to manage the tensors from the neural network

In [None]:
# create class for new model; inherits from lightning
class LSTMHardCoded(L.LightningModule):
  def __init__(self):
    '''
    Create and initialize weight and bias tensors
    '''
    super().__init__() # access lightning modules
    mean = torch.tensor(0.0) # normal standard distribution mean
    std = torch.tensor(1.0) # normal standard distrubution standard deviation

    self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True) # randomly initialize weight matrix to SND; True allows for optimization
    self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.blr1 = nn.Parameter(torch.tensor(0.), requires_grad=True) # create bias as 0 (ideally stays this way) and set to optimizable

    self.wpr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.wpr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.bpr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

    self.wp1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.wp2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.bp1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

    self.wo1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.wo2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.bo1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

  def lstm_unit(self, input_value, long_memory, short_memory):
    '''
    Perform LSTM unit math
    input_value: input sequential data item
    long_memory: current long_term memory
    short_memory: current short_term memory
    '''

    # stage 1 (forget gate) --> determine what percentage of long-term should be recalled
    long_remember = short_memory*self.wlr1 + input_value*self.wlr2 + self.blr1 # basically calculate how much of the past states need to be remembered
    long_remember_percent = torch.sigmoid(long_remember) # converts this to a percent and also adds nonlinearity

    # stage 2 (input gate) --> determine new potential long-term memory given last discovery
    potential_long_remember = short_memory*self.wpr1 + input_value*self.wpr2 + self.bpr1 # basically checking to see what new memory proportion of data must be stored for future reference
    potential_long_remember_percent = torch.sigmoid(potential_long_remember) # converts this to a percent

    potential_memory = short_memory*self.wp1 + input_value*self.wp2 + self.bp1 # calculating what to remember from this new data point when considering it in comparison to old output
    potential_memory = torch.tanh(potential_memory) # activation function

    updated_long_memory = long_memory * long_remember_percent + potential_long_remember_percent * potential_memory # update new long-term memory of sequential data

    # stage 3 (output gate) --> determine new short-term memory given new information from past and what amount to remember from it
    output = short_memory * self.wo1 + input_value * self.wo2 # output value intialization
    output_percent = torch.sigmoid(output) # percent of output to retain

    updated_short_memory = torch.tanh(updated_long_memory) * output_percent # calculate portion of LTM that should encode for new STM

    return [updated_long_memory, updated_short_memory]

  def forward(self, input):
    '''
    Foward pass through unrolled LSTM
    '''
    long_memory = 0
    short_memory = 0

    # feed the middle block data into the model sequentially
    for data in input:
      long_memory, short_memory = self.lstm_unit(data, long_memory, short_memory)
  
    return short_memory

  def configure_optimizers(self):
    '''
    Configure adam optimizer
    '''
    return Adam(self.parameters())

  def training_step(self, batch, batch_idx):
    '''
    calculate the loss and log training progress (use RSS)
    '''
    input_i, label_i = batch
    output_i = self.forward(input_i[0])
    loss = (output_i - label_i)**2 # RSS individual item

    self.log("train_loss", loss) # inherited from lightning module

    return loss


# Create an instance

In [None]:
model = LSTMHardCoded()

In [None]:
inputs = torch.tensor([[0., 0.5, 0.25, 1.], [1., 0.5, 0.25, 1.]]) # input data instances
labels = torch.tensor([0., 1.]) # oucome labels/regressions

In [None]:
dataset = TensorDataset(inputs, labels) # create dataset
dataloader = DataLoader(dataset) # access data in batches, shuffle data each epoch, easy to use fraction of data

In [None]:
trainer = L.Trainer(max_epochs=2000) # create a trainer; backpropagation 2000 times
trainer.fit(model, train_dataloaders=dataloader)

INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: 
  | Name | Type | Params
------------------------------
------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name | Type | Params
------------------------------
------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=2000` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2000` reached.


In [None]:
model(torch.tensor([0., 0.5, 0.25, 1.])).detach()

tensor(0.4263)