In [2]:
import torch

print(torch.backends.mps.is_available()) #the MacOS is higher than 12.3+
print(torch.backends.mps.is_built()) #MPS is activated

True
True


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import lightning as L
from torch.utils.data import TensorDataset, DataLoader

In [18]:
class LSTM(L.LightningModule):
    def __init__(self):
        super().__init__()
        
        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)
        
        self.wlr1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.),requires_grad=True)
        
        self.wpr1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.),requires_grad=True)
        
        self.wp1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.),requires_grad=True)
        
        self.wo1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.),requires_grad=True)
    
    def lstm_unit(self,input_value,long_memory,short_memory):
        long_remember_percent = torch.sigmoid((short_memory*self.wlr1)+(input_value*self.wpr2)+self.blr1)
        potential_remember_percent = torch.sigmoid((short_memory*self.wpr1)+(input_value*self.wpr2)+self.bpr1)
        potential_memory = torch.tanh((short_memory*self.wp1) + (input_value *self.wp2)+self.bp1)
        updated_long_memory = ((long_memory*long_remember_percent)+(potential_remember_percent*potential_memory))
        output_percent = torch.sigmoid((short_memory*self.wo1)+input_value*self.wo2+self.bo1)
        updated_short_memory = torch.tanh(updated_long_memory)*output_percent
        
        return ([updated_long_memory,updated_short_memory])
        
    def forward(self,input):
        long_memory = 0
        short_memory = 0
        day1 = input[0]
        day2 = input[1]
        day3 = input[2]
        day4 = input[3]
        
        long_memory,short_memory = self.lstm_unit(day1,long_memory,short_memory)
        long_memory,short_memory = self.lstm_unit(day2,long_memory,short_memory)
        long_memory,short_memory = self.lstm_unit(day3,long_memory,short_memory)
        long_memory,short_memory = self.lstm_unit(day4,long_memory,short_memory)
        
        return short_memory
        
    def configure_optimizers(self):
        return Adam(self.parameters(),lr=0.1)
    
    def training_step(self,batch,batch_idx):
        input_i,label_i = batch
        output_i = self.forward(input_i[0])
        loss = (output_i-label_i)**2
        
        self.log("train_loss",loss)
        
        if (label_i == 0):
            self.log("out_0",output_i)
        else:
            self.log("out_1",output_i)
            
        return loss
    

In [19]:
model = LSTM()

def predict(model):
    print("\Compare observed and predicted values")
    print("Company A: Observed = 0, Predicted:", model(torch.tensor([0,0.5,0.25,1])).detach())

    print("Company B: Observed = 1, Predicted =",model(torch.tensor([1,0.5,0.25,1])).detach())
    
predict(model)

\Compare observed and predicted values
Company A: Observed = 0, Predicted: tensor(-0.4742)
Company B: Observed = 1, Predicted = tensor(-0.5620)


In [20]:
inputs = torch.tensor([[0,0.5,0.25,1],[1,0.5,0.25,1]])
labels = torch.tensor([0,1])

dataset = TensorDataset(inputs,labels)
dataloader = DataLoader(dataset)

In [22]:
trainer = L.Trainer(max_epochs=300)
trainer.fit(model,train_dataloaders=dataloader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type | Params
--------------------------------------
  | other params | n/a  | 12    
--------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
/Users/ms/anaconda3/envs/neuroimagen/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/ms/anaconda3/envs/neuroimagen/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_ste

Epoch 299: 100%|██████████| 2/2 [00:00<00:00, 60.54it/s, v_num=12]

`Trainer.fit` stopped: `max_epochs=300` reached.


Epoch 299: 100%|██████████| 2/2 [00:00<00:00, 49.59it/s, v_num=12]


In [23]:
predict(model)

\Compare observed and predicted values
Company A: Observed = 0, Predicted: tensor(0.0007)
Company B: Observed = 1, Predicted = tensor(0.9851)


In [9]:
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path

trainer = L.Trainer(max_epochs=5000)
# trainer.fit(model,train_dataloaders=dataloader,ckpt_path=path_to_best_checkpoint)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
predict(model)

\Compare observed and predicted values
Company A: Observed = 0, Predicted: tensor(0.2347)
Company B: Observed = 1, Predicted = tensor(0.2824)


In [11]:
from lightning.pytorch.utilities.types import OptimizerLRScheduler


class LightningLSTM(L.LightningModule):
    def __init__(self):
        super().__init__()
        # input = number of features / variables => company stock value
        # hidden = number of output
        self.lstm = nn.LSTM(input_size=1,hidden_size=1)
        
    def forward(self, input):
        # transpose the input to column
        input_trans = input.view(len(input),1)
        
        # lstm_out contains short-term memory values of each LSTM unit
        lstm_out, temp = self.lstm(input_trans)
        
        # get last short-term memory value
        prediction = lstm_out[-1]
        return prediction
    
    def configure_optimizers(self) :
        return Adam(self.parameters(),lr=0.1)
    
    def training_step(self, batch, batch_idx):
        input_i, label_i = batch
        output_i = self.forward(input_i[0])
        loss = (output_i-label_i)**2
        
        self.log("train_loss",loss)
        
        if (label_i == 0):
            self.log("out_0",output_i)
        else:
            self.log("out_1",output_i)
            
        return loss

In [14]:
model = LightningLSTM()

predict(model)

\Compare observed and predicted values
Company A: Observed = 0, Predicted: tensor([0.2397])
Company B: Observed = 1, Predicted = tensor([0.2169])


In [15]:
trainer = L.Trainer(max_epochs=300,log_every_n_steps=2)
trainer.fit(model,train_dataloaders=dataloader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type | Params
------------------------------
0 | lstm | LSTM | 16    
------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
/Users/ms/anaconda3/envs/neuroimagen/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 299: 100%|██████████| 2/2 [00:00<00:00, 176.81it/s, v_num=11]

`Trainer.fit` stopped: `max_epochs=300` reached.


Epoch 299: 100%|██████████| 2/2 [00:00<00:00, 134.55it/s, v_num=11]


In [17]:
predict(model)

\Compare observed and predicted values
Company A: Observed = 0, Predicted: tensor([0.0001])
Company B: Observed = 1, Predicted = tensor([0.9850])
