In [12]:
import torch   # Create tensor 
import torch.nn as nn # to weights and biases as part of neural network 
from torch.optim import Adam    # Adam is to fit the data, similar to stochastic gradient descent 


In [14]:
import lightning as L
from torch.utils.data import TensorDataset, DataLoader # Makes things easier 

In [16]:
class LSTMbyHand(L.LightningModule):
    def __init__(self):
        
         super().__init__() # Intialization using lighting module 
         mean = torch.tensor(0.0) # mean of standard normal distribution ( this is taken to be zero)
         std =  torch.tensor(1.0) # std of normal distribuiton is set at one 

         self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True) # normal give as value betwen 0 to 1 from normal distribution graph
         self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)  
         self.blr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)   
    
         self.wpr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True) 
         self.wpr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)  
         self.bpr1 = nn.Parameter(torch.tensor(0.), requires_grad=True) 
        
         self.wp1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True) 
         self.wp2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)  
         self.bp1 = nn.Parameter(torch.tensor(0.), requires_grad=True) 

         self.wo1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True) 
         self.wo2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)  
         self.bo1 = nn.Parameter(torch.tensor(0.), requires_grad=True) 

        
    def lstm_unit(self, input_value, long_memory, short_memory):    
        
        #Does the LSTM Caclulatuion 
    
        # Forget Gate : This calculates percentage of the long term to remember 
        long_remember_percent      =  torch.sigmoid((short_memory*self.wlr1) + (input_value*self.wlr2) + self.blr1)
        
        # Input Gate :  updating the cell state
        potential_remember_percent =  torch.sigmoid(short_memory*self.wpr1 + input_value*self.wpr2 + self.bpr1)
        potential_memory           =  torch.tanh(short_memory*self.wp1 + input_value*self.wp2 + self.bp1)
        updated_long_memory        =  long_remember_percent*long_memory + potential_remember_percent*potential_memory 

        # Output Gate: calcualating new short term memory 
        output_percent            = torch.sigmoid(short_memory*self.wo1 + input_value*self.wo2 + self.bo1)
        updated_short_memory       = torch.tanh(updated_long_memory)*output_percent

        return ([updated_long_memory, updated_short_memory])
    
    def forward(self, input):
        # Make as forward pass in the unrolled lstm 
        
        long_memory =0
        short_memory =0
        
        day1 = input[0]
        day2 = input[1]
        day3 = input[2]
        day4 = input[3]

        long_memory, short_memory = self.lstm_unit(day1, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day2, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day3, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day4, long_memory, short_memory)

        return short_memory
        
    def configure_optimizers(self):
        # Configure Adam optimizer 

        return Adam(self.parameters())
        
    def training_step( self, batch, batch_idx):
        
        #  calculate loss and log the porgress

        input_i, label_i = batch
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i)**2

        self.log("train_loss", loss)

        if(label_i==0):  
            self.log("out_0", output_i) # company A
        else:
            self.log("out_1", output_i) # company B
        return loss


In [18]:
model = LSTMbyHand()
print("Compare observed and peridicated values")
print("company A observed =0, predicted =", model(torch.tensor([0.,0.5,0.25,1.])).detach())

Compare observed and peridicated values
company A observed =0, predicted = tensor(-0.0281)


In [9]:
from pytorch_lightning.loggers import TensorBoardLogger
logger = TensorBoardLogger("lightning_logs", name="my_model")

In [20]:
model = LSTMbyHand()

print("Compare observed and predicted values")

input_data = torch.tensor([0., 0.5, 0.25, 1.])  # Ensure correct tensor type
predicted_value = model(input_data).detach()

print("Company A observed = 0, predicted =", predicted_value)


Compare observed and predicted values
Company A observed = 0, predicted = tensor(0.5878)


In [22]:
## create the training data for the neural network.
inputs = torch.tensor([[0., 0.5, 0.25, 1.], [1., 0.5, 0.25, 1.]])
labels = torch.tensor([0., 1.])

dataset = TensorDataset(inputs, labels) 
dataloader = DataLoader(dataset)

In [24]:
trainer = L.Trainer(max_epochs=2000) # with default learning rate, 0.001 (this tiny learning rate makes learning slow)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3050 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
C:\Users\nithi\anaconda3\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425

Training: |                                                                                                | 0…

`Trainer.fit` stopped: `max_epochs=2000` reached.


In [27]:
print("\nNow let's compare the observed and predicted values...")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([0., 0.5, 0.25, 1.])).detach())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())


Now let's compare the observed and predicted values...
Company A: Observed = 0, Predicted = tensor(0.0008)
Company B: Observed = 1, Predicted = tensor(0.9505)


In [29]:
from pytorch_lightning.loggers import TensorBoardLogger
logger = TensorBoardLogger("lightning_logs", name="my_model")

In [None]:
## First, find where the most recent checkpoint files are stored
path_to_checkpoint = trainer.checkpoint_callback.best_model_path ## By default, "best" = "most recent"
print("The new trainer will start where the last left off, and the check point data is here: " + 
      path_to_checkpoint + "\n")

## Then create a new Lightning Trainer
trainer = L.Trainer(max_epochs=3000) # Before, max_epochs=2000, so, by setting it to 3000, we're adding 1000 more.
## And then call fit() using the path to the most recent checkpoint files
## so that we can pick up where we left off.
trainer.fit(model, train_dataloaders=dataloader, ckpt_path=path_to_checkpoint)

In [None]:
print("\nNow let's compare the observed and predicted values...")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([0., 0.5, 0.25, 1.])).detach())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

In [None]:
!tensorboard --logdir=lightning_log/

In [None]:
## First, find where the most recent checkpoint files are stored
path_to_checkpoint = trainer.checkpoint_callback.best_model_path ## By default, "best" = "most recent"
print("The new trainer will start where the last left off, and the check point data is here: " + 
      path_to_checkpoint + "\n")

## Then create a new Lightning Trainer
trainer = L.Trainer(max_epochs=5000) # Before, max_epochs=3000, so, by setting it to 5000, we're adding 2000 more.
## And then call fit() using the path to the most recent checkpoint files
## so that we can pick up where we left off.
trainer.fit(model, train_dataloaders=dataloader, ckpt_path=path_to_checkpoint)

In [34]:
print("\nNow let's compare the observed and predicted values...")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([0., 0.5, 0.25, 1.])).detach())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())


Now let's compare the observed and predicted values...
Company A: Observed = 0, Predicted = tensor(0.0008)
Company B: Observed = 1, Predicted = tensor(0.9505)


In [None]:
!tensorboard --logdir=lightning_log/

In [None]:
import torch
print(torch.cuda.is_available())


In [None]:
import torch

print("GPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("CUDA Version:", torch.version.cuda)
    print("PyTorch Version:", torch.__version__)
    print("GPU Count:", torch.cuda.device_count())


In [None]:
import torch
print(torch.__version__)
print(torch.backends.cudnn.enabled)
print(torch.cuda.is_available())
