In [2]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
from torch.optim import Adam

import lightning as L 
from torch.utils.data import TensorDataset, DataLoader

In [15]:
class LSTMbyHand(L.LightningModule) :
    def __init__(self) : 
        super().__init__()
        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)
        self.wlr1 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.wlr2 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.blr1 = nn.Parameter(torch.tensor(0.), requires_grad = True)

        self.wpr1 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.wpr2 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.bpr1 = nn.Parameter(torch.tensor(0.), requires_grad = True)
        
        self.wp1 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.wp2 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.bp1 = nn.Parameter(torch.tensor(0.), requires_grad = True)
        
        self.wo1 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.wo2 = nn.Parameter(torch.normal(mean = mean, std = std), requires_grad = True)
        self.bo1 = nn.Parameter(torch.tensor(0.), requires_grad = True)

    def lstm_unit(self, input_value, long_memory, short_memory) : 
        long_remember_percent = torch.sigmoid((short_memory*self.wlr1) + (input_value*self.wlr2) + self.blr1)  
        potential_remember_percent = torch.sigmoid((short_memory*self.wpr2) + (input_value*self.wpr2) + self.bpr1)
        potential_memory = torch.tanh((short_memory*self.wp1) + (input_value*self.wp2) + self.bp1)
        updated_long_memory = (long_memory*long_remember_percent) + (potential_remember_percent*potential_memory)
        
        output_percent = torch.sigmoid((short_memory*self.wo1) + (input_value*self.wo2) + self.bo1)
        updated_short_memory = torch.tanh(updated_long_memory) * output_percent

        return updated_long_memory, updated_short_memory
    
    def forward(self, input_value) : 
        long_memory, short_memory = 0, 0 
        day1, day2, day3, day4 = input_value
        long_memory, short_memory = self.lstm_unit(day1, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day2, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day3, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day4, long_memory, short_memory)

        return short_memory

    def configure_optimizers(self) : 
        return Adam(self.parameters()) 

    def training_step(self, batch, batch_idx) : 
        input_i, label_i = batch 
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i)**2 

        self.log("train_loss : ", loss) 
        if label_i == 0 : 
            self.log("output_0 : ", output_i)
        else : 
            self.log("output_1 : ", output_i)

        return loss 

In [16]:
model = LSTMbyHand()
print("Company A : observed = 0, predicted = ", model(torch.tensor([0., 0.5, 0.25, 1.])).detach())
print("Company B : observed = 1, predicted = ", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

Company A : observed = 0, predicted =  tensor(0.0551)
Company B : observed = 1, predicted =  tensor(0.0630)


In [22]:
inputs = torch.tensor([
    [0., 0.5, 0.25, 1.], 
    [1., 0.5, 0.25, 1.]
])
labels = torch.tensor([0., 1.])

dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

In [23]:
trainer = L.Trainer(max_epochs=2000)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
c:\Users\ghora\AppData\Local\Programs\Python\Python312\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\ghora\AppData\Local\Programs\Python\Python312\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Tr

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=2000` reached.


In [28]:
print("Company A: Observed = 0, prdicted = ", model(torch.tensor([0., 0.5, 0.25, 1])).detach())
print("Company cd B B: Observed = 1, prdicted = ", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

Company A: Observed = 0, prdicted =  tensor(0.0029)
Company B: Observed = 1, prdicted =  tensor(0.9293)


```sh
cd lighting_logs 
tensorboard --logdir=lighting_logs/
```

then visit `http://localhost:6006/`

In [31]:
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path
trainer = L.Trainer(max_epochs = 3000)
trainer.fit(model, train_dataloaders= dataloader, ckpt_path=path_to_best_checkpoint)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at d:\vscode\deep learning\PyTorch\BAM\lightning_logs\version_11\checkpoints\epoch=1999-step=4000.ckpt
c:\Users\ghora\AppData\Local\Programs\Python\Python312\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:362: The dirpath has changed from 'd:\\vscode\\deep learning\\PyTorch\\BAM\\lightning_logs\\version_11\\checkpoints' to 'd:\\vscode\\deep learning\\PyTorch\\BAM\\lightning_logs\\version_12\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3000` reached.


In [32]:
print("Company A: Observed = 0, prdicted = ", model(torch.tensor([0., 0.5, 0.25, 1])).detach())
print("Company cd B B: Observed = 1, prdicted = ", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

Company A: Observed = 0, prdicted =  tensor(0.0005)
Company cd B B: Observed = 1, prdicted =  tensor(0.9607)


In [33]:
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path
trainer = L.Trainer(max_epochs = 5000)
trainer.fit(model, train_dataloaders= dataloader, ckpt_path=path_to_best_checkpoint)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at d:\vscode\deep learning\PyTorch\BAM\lightning_logs\version_12\checkpoints\epoch=2999-step=6000.ckpt
c:\Users\ghora\AppData\Local\Programs\Python\Python312\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:362: The dirpath has changed from 'd:\\vscode\\deep learning\\PyTorch\\BAM\\lightning_logs\\version_12\\checkpoints' to 'd:\\vscode\\deep learning\\PyTorch\\BAM\\lightning_logs\\version_13\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5000` reached.


In [34]:
print("Company A: Observed = 0, prdicted = ", model(torch.tensor([0., 0.5, 0.25, 1])).detach())
print("Company cd B B: Observed = 1, prdicted = ", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

Company A: Observed = 0, prdicted =  tensor(3.3784e-05)
Company cd B B: Observed = 1, prdicted =  tensor(0.9841)


In [37]:
class LightningLSTTM(L.LightningModule) : 
    def __init__(self) : 
        super().__init__() 
        self.lstm = nn.LSTM(input_size = 1, hidden_size=1)

    def forward(self, inputs) : 
        input_trans = inputs.view(len(inputs), 1)
        lstm_out, temp = self.lstm(input_trans)

        prediction = lstm_out[-1]
        return prediction
    
    def configure_optimizers(self) : 
        return Adam(self.parameters(), lr = 0.1)
    
    def training_step(self, batch, batch_idx) : 
        input_i, label_i = batch 
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i)**2 

        self.log("train_loss : ", loss) 
        if label_i == 0 : 
            self.log("output_0 : ", output_i)
        else : 
            self.log("output_1 : ", output_i)

        return loss  

In [38]:
new_model = LightningLSTTM()

print("Company A: Observed = 0, prdicted = ", new_model(torch.tensor([0., 0.5, 0.25, 1])).detach())
print("Company cd B B: Observed = 1, prdicted = ", new_model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

Company A: Observed = 0, prdicted =  tensor([-0.2061])
Company cd B B: Observed = 1, prdicted =  tensor([-0.2120])


In [40]:
trainer = L.Trainer(max_epochs=3000, log_every_n_steps=2)
trainer.fit(new_model, train_dataloaders= dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode
c:\Users\ghora\AppData\Local\Programs\Python\Python312\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3000` reached.


In [41]:
print("Company A: Observed = 0, prdicted = ", new_model(torch.tensor([0., 0.5, 0.25, 1])).detach())
print("Company cd B B: Observed = 1, prdicted = ", new_model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

Company A: Observed = 0, prdicted =  tensor([-0.0005])
Company cd B B: Observed = 1, prdicted =  tensor([0.9947])
