In [1]:
import torch
import torch.nn as nn
import torch.optim as optim



In [2]:
import torch.nn.functional as F

In [3]:
import lightning as L
from torch.utils.data import TensorDataset, DataLoader

In [4]:
class LSTMbyHand(L.LightningModule):
    def __init__(self):
        super().__init__()
        mean= torch.tensor(0.0)
        std = torch.tensor(1.0)

        self.wlr1= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.wlr2= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.blr1= nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wpr1= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.wpr2= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.bpr1= nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wp1= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.wp2= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.bp1= nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wo1= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.wo2= nn.Parameter(torch.normal(mean=mean, std= std), requires_grad=True)
        self.bo1= nn.Parameter(torch.tensor(0.), requires_grad=True)


    def lstm_unit(self, input_value, long_memory, short_memory):
        long_remember_percent= torch.sigmoid((short_memory * self.wlr1) + (input_value* self.wlr2) + self.blr1)
        long_memory*=long_remember_percent

        long_memory_potential= torch.tanh((short_memory* self.wp1)+(input_value* self.wp2)+ self.bp1)
        long_memory_potential_remember_percent= torch.sigmoid((short_memory* self.wpr1)+(input_value* self.wpr2)+ self.bpr1)
        long_memory+=long_memory_potential * long_memory_potential_remember_percent

        short_memory_potential= torch.tanh(long_memory)
        short_memory_potential_remember_percent= torch.sigmoid((short_memory* self.wo1)+(input_value* self.wo2)+ self.bo1)
        short_memory+=short_memory_potential * short_memory_potential_remember_percent

        return ([long_memory, short_memory])


    def forward(self, input):
        long_memory=0
        short_memory=0
        day1= input[0]
        day2= input[1]
        day3= input[2]
        day4= input[3]

        long_memory, short_memory= self.lstm_unit(day1, long_memory, short_memory)
        long_memory, short_memory= self.lstm_unit(day2, long_memory, short_memory)
        long_memory, short_memory= self.lstm_unit(day3, long_memory, short_memory)
        long_memory, short_memory= self.lstm_unit(day4, long_memory, short_memory)

        return short_memory


    def configure_optimizers(self):
        return optim.Adam(self.parameters())
    
    def training_step(self, batch, batch_idx):
        input_i, label_i= batch
        output_i= self.forward(input_i[0])
        loss= (output_i- label_i)**2
        self.log("train_loss", loss)

In [5]:
model= LSTMbyHand()
print(model(torch.tensor([0., 0.5, 0.25, 1.])).detach()) #com A
print(model(torch.tensor([1., 0.5, 0.25, 1.])).detach()) #com B

tensor(0.6791)
tensor(1.0331)


Till here, we predicted without training

Let's train

In [6]:
inputs= torch.tensor([[0., 0.5, 0.25, 1.], [1., 0.5, 0.25, 1.]])
labels= torch.tensor([0., 1.])

dataset= TensorDataset(inputs,labels)
dataloader= DataLoader(dataset)

In [7]:
trainer= L.Trainer(max_epochs=3000)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
c:\Users\Sanjana Rayarala\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\Sanjana Rayarala\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (2) is smaller than t

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3000` reached.


In [8]:
print(model(torch.tensor([0., 0.5, 0.25, 1.])).detach()) #com A
print(model(torch.tensor([1., 0.5, 0.25, 1.])).detach()) #com B

tensor(0.6791)
tensor(1.0331)


Not closer to outputs, so let's add epochs!!

In [9]:
path_to_best_checkpoint= trainer.checkpoint_callback.best_model_path

In [18]:
trainer= L.Trainer(max_epochs=5000)
trainer.fit(model, train_dataloaders=dataloader, ckpt_path=path_to_best_checkpoint)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at c:\ML\NN_from_scratch\lstm\lightning_logs\version_4\checkpoints\epoch=2999-step=6000.ckpt
c:\Users\Sanjana Rayarala\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:362: The dirpath has changed from 'c:\\ML\\NN_from_scratch\\lstm\\lightning_logs\\version_4\\checkpoints' to 'c:\\ML\\NN_from_scratch\\lstm\\lightning_logs\\version_5\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estima

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5000` reached.


In [19]:
path_to_best_checkpoint= trainer.checkpoint_callback.best_model_path

In [20]:
# Load the best model checkpoint
best_model = LSTMbyHand.load_from_checkpoint(path_to_best_checkpoint)
best_model.eval()  # Set the model to evaluation mode

LSTMbyHand()

In [24]:
# Input tensor (1 sample, sequence length = 4)
input_data = torch.tensor([1., 0.5, 0.25, 1.])

# Ensure the model is on the same device as the input
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)
input_data = input_data.to(device)


In [25]:
with torch.no_grad():
    prediction = best_model(input_data)

print("Predicted Output:", prediction.item())  # Convert to scalar


Predicted Output: 1.0330582857131958


In [23]:
print("Checkpoint Path:", path_to_best_checkpoint)

Checkpoint Path: c:\ML\NN_from_scratch\lstm\lightning_logs\version_5\checkpoints\epoch=4999-step=10000.ckpt


In [26]:
trainer.save_checkpoint("final_model.ckpt")


In [None]:
best_model = LSTMbyHand.load_from_checkpoint("final_model.ckpt")
best_model.eval()  # Set to evaluation mode


In [51]:
class LightningLSTM(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=3)
        self.fc = nn.Linear(3, 1)
        
    def forward(self,input):
        input_tensor = input.view(1, -1, 1)
        lstm_out, temp= self.lstm(input_tensor)

        prediction=lstm_out[:,-1,:]
        prediction= self.fc(prediction)
        return prediction
    
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.01)
    
    def training_step(self, batch, batch_idx):
        input_i, label_i = batch
        output_i= self.forward(input_i[0])
        loss = F.mse_loss(output_i, label_i)
        '''
        self.log('train_loss', loss)

        if label_i==0:
            self.log("out_0", output_i)
        else:
            self.log("out_1", output_i)'''
        return loss

In [39]:
# Original tensor
input_tensor = torch.tensor([0., 0.5, 0.25, 1.])

# Reshaping to (batch_size, seq_len, input_size)
input_tensor = input_tensor.view(1, -1, 1)  # (1, 4, 1)

print(input_tensor.shape)  # Output: torch.Size([1, 4, 1])

torch.Size([1, 4, 1])


In [52]:
model= LightningLSTM()
output=model(torch.tensor([0.,0.5,0.25, 1.]))
print(output.detach())

tensor([[0.2727]])


In [59]:
trainer= L.Trainer(max_epochs=5000, log_every_n_steps=2)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name | Type   | Params | Mode 
----------------------------------------
0 | lstm | LSTM   | 72     | train
1 | fc   | Linear | 4      | train
----------------------------------------
76        Trainable params
0         Non-trainable params
76        Total params
0.000     Total estimated model params size (MB)
2         Modules in train mode
0         Modules in eval mode
c:\Users\Sanjana Rayarala\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

  loss = F.mse_loss(output_i, label_i)
`Trainer.fit` stopped: `max_epochs=5000` reached.


In [56]:
path_to_best_checkpoint= trainer.checkpoint_callback.best_model_path
# Load the best model checkpoint
best_model =LightningLSTM.load_from_checkpoint(path_to_best_checkpoint)
best_model.eval()  # Set the model to evaluation mode
# Input tensor (1 sample, sequence length = 4)
input_data = torch.tensor([1., 0.5, 0.25, 1.])

# Ensure the model is on the same device as the input
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)
input_data = input_data.to(device)
with torch.no_grad():
    prediction = best_model(input_data)

In [57]:
prediction

tensor([[0.5002]])

In [58]:
path_to_best_checkpoint

'c:\\ML\\NN_from_scratch\\lstm\\lightning_logs\\version_9\\checkpoints\\epoch=999-step=2000.ckpt'