In [1]:
#!pip install torch==1.10.0 torchvision==0.11.1 torchtext==0.11.0 torchaudio==0.10.0 --quiet

#!pip install pytorch-lightning==1.5.2 --quiet

In [2]:
import pytorch_lightning as pl

import torch

from torch import nn, optim

from torch.autograd import Variable

import pytorch_lightning as pl

from pytorch_lightning.callbacks import ModelCheckpoint

from torch.utils.data import DataLoader

print("torch version:",torch.__version__)

print("pytorch ligthening version:",pl.__version__)

torch version: 1.10.0+cu102
pytorch ligthening version: 1.5.2




In [3]:
#build a truth table that matches the XOR gate. We will use the variables to create the dataset
xor_inputs = [Variable(torch.Tensor([0, 0])),

          Variable(torch.Tensor([0, 1])),

          Variable(torch.Tensor([1, 0])),

          Variable(torch.Tensor([1, 1]))]

in the code above, we created four tensors, and each tensor had two values—that is, it had two features, A and B. We are ready with all the input features. A total of four rows are ready to be fed to our XOR model.

In [4]:
#Since the input features are ready, it's time to build our target variables
xor_targets = [Variable(torch.Tensor([0])),

           Variable(torch.Tensor([1])),

           Variable(torch.Tensor([1])),

           Variable(torch.Tensor([0]))]

In [5]:
#Inputs and targets will be ready in the final step of preparing our dataset. It's time to create a data loader
xor_data = list(zip(xor_inputs, xor_targets)) # creating a dataset that is a list of tuples, and each tuple has two values- 
#first values are the two features/inputs, and the second values are the target values for the given input

train_loader = DataLoader(xor_data, batch_size=1)

Data loaders in PyTorch Lightning look for two main things—the key and the value, which in our case are the features and target values. We are then using the DataLoader module from torch.utils.data to wrap the xor_data and create a Python iterable over the XOR data

Every model we build using PyTorch Lightning must be inherited from a class called LightningModule.
In simple terms, we can say that PyTorch LightningModule is the same as PyTorch nn.Module but with added life cycle methods and other operations. 

Any PyTorch Lightning model needs at least two life cycle methods—one for the training loop to train the model (called training_step), and another to configure an optimizer for the model (called configure_optimizers). In addition to these two life cycle methods, we also use the forward method. This is where we take in the input data and pass it to the model.

Our XOR MLP model building follows this process, and we will go over each step in detail, as follows:  

 - Initializing the model  
 - Mapping inputs to the model  
 - Configuring the optimizer  
 - Setting up training parameters  

# Initializing the model

In [6]:

class XORModel(pl.LightningModule):  #creating a class called XOR that inherits from PyTorch LightningModule
    
    
    def __init__(self):

        super(XORModel,self).__init__()


        #set up the hidden layers
        self.input_layer = nn.Linear(2, 4) #1st layer - 2 inputs and returns 4 outputs
        self.output_layer = nn.Linear(4,1)

        #Initialize the activation function
        self.sigmoid = nn.Sigmoid()

        #Initialize the loss function
        self.loss = nn.MSELoss()
    

# Mapping inputs to the model

In [7]:
# Define the forward method, which takes the inputs and generates the model's output
# The forward method acts as a mapper or medium where data is passed between multiple layers and the activation function

def forward(self, input):

    #print("INPUT:", input.shape)

    x = self.input_layer(input)

    #print("FIRST:", x.shape)

    x = self.sigmoid(x)

    #print("SECOND:", x.shape)

    output = self.output_layer(x)

    #print("THIRD:", output.shape)

    return output

# Configuring the optimizer

All optimizers in PyTorch Lightning can be configured in a life cycle method called **configure_optimizers**. In this method, one or multiple optimizers can be configured

In [8]:
def configure_optimizers(self):

    params = self.parameters() #model parameters can be accessed by using the self object with the self.parameters() method

    optimizer = optim.Adam(params=params, lr = 0.01)

    return optimizer

# Setting up training parameters


In [9]:
#This is where all the model training occurs
def training_step(self, batch, batch_idx): 
    '''
    batch: Data that is being passed in the data loader is accessed in batches. 
           This consists of two items: one is the input/features data, and the other item is targets.
    
    batch_idx: This is the index number or the sequence number for the batche of data.
    
    '''
    
    xor_input, xor_target = batch

    #print("XOR INPUT:", xor_input.shape)

    #print("XOR TARGET:", xor_target.shape)

    outputs = self(xor_input)

    #print("XOR OUTPUT:", outputs.shape)

    loss = self.loss(outputs, xor_target)

    return loss

In the preceding method, we are accessing our inputs and targets from the batch and then passing the inputs to the self method. When the input is passed to the self method, that indirectly invokes our forward method, which returns the XOR multilayer NN output. We are using the MSE loss function to calculate the loss and return the loss value for this method.

In short, inputs and targets passed to batch --> self method --> invokes forward method --> mapping of Input-output b/w layers --> finally loss value as output

In [10]:
class XORModel(pl.LightningModule):
    def __init__(self):

        super(XORModel,self).__init__()
        
        self.input_layer = nn.Linear(2, 4)
        self.output_layer = nn.Linear(4,1)
        self.sigmoid = nn.Sigmoid()
        self.loss = nn.MSELoss()
        
    # input to output mapping
    def forward(self, input):

        #print("INPUT:", input.shape)
        x = self.input_layer(input)
        #print("FIRST:", x.shape)

        x = self.sigmoid(x)
        #print("SECOND:", x.shape)
        output = self.output_layer(x)

        #print("THIRD:", output.shape)
        return output
    
    # set up optimizer
    def configure_optimizers(self):

        params = self.parameters()
        optimizer = optim.Adam(params=params, lr = 0.01)
        return optimizer

    # model is trained and you get loss value
    def training_step(self, batch, batch_idx):

        xor_input, xor_target = batch

        #print("XOR INPUT:", xor_input.shape)
        #print("XOR TARGET:", xor_target.shape)
        outputs = self(xor_input)

        #print("XOR OUTPUT:", outputs.shape)
        loss = self.loss(outputs, xor_target)
        return loss

We are using sigmoid as our activation function, MSE as our loss function, and Adam as our optimizer  
Backpropagation, clearing gradients, or optimizer parameter updates and many other things are taken care of by the PyTorch Lightning framework.

# Training the model

All models built in PyTorch Lightning can be trained using a Trainer class.  
Everything is taken care inside trainer class - looping over the dataset, backpropagation, clearing gradients, 
and the optimizer step.  
Also, the Trainer class supports many other functionalities that help us to build our model easily, and 
some of those functionalities are various callbacks, model checkpoints, early stopping, dev runs for unit testing,
support for GPUs and TPUs, loggers, logs, epochs, and many more. 


In [11]:
from pytorch_lightning.utilities.types import TRAIN_DATALOADERS

checkpoint_callback = ModelCheckpoint()
model = XORModel()

#creating a trainer object for 100 epochs 
trainer = pl.Trainer(max_epochs=100, callbacks=[checkpoint_callback])

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [12]:

trainer.fit(model, train_dataloaders=train_loader)



2023-05-15 18:44:43.159320: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-15 18:44:43.159352: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

  | Name         | Type    | Params
-----------------------------------------
0 | input_layer  | Linear  | 12    
1 | output_layer | Linear  | 5     
2 | sigmoid      | Sigmoid | 0     
3 | loss         | MSELoss | 0     
-----------------------------------------
17        Trainable params
0         Non-trainable params
17        Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

In **PyTorch Lightning, one advantage** we see is whenever we train a model multiple times, **all the different model versions are saved to disk in a default folder called lightning_logs,** and once all the models with different versions are made ready, we always have the opportunity to load the different model versions from the files and compare the results.

In [13]:
ls lightning_logs/

[0m[01;34mversion_0[0m/  [01;34mversion_1[0m/


Within these version subfolders, we have all the information about the model being trained and built, which can be easily loaded, and predictions can be performed. Files within these folders have some useful information, such as hyperparameters, which are saved as hparams.yaml, and we also have a subfolder called checkpoints.

In [14]:
ls lightning_logs/*/

lightning_logs/version_0/:
[0m[01;34mcheckpoints[0m/                                           hparams.yaml
events.out.tfevents.1684167297.ip-10-215-9-65.23862.0

lightning_logs/version_1/:
[01;34mcheckpoints[0m/                                           hparams.yaml
events.out.tfevents.1684176284.ip-10-215-9-65.25799.0


# Loading the model

identifying the latest version of a model can be done using checkpoint_callback

In [15]:
print(checkpoint_callback.best_model_path)



/home/ju9yt3r/FOLDER_PYTORCH/TEST/lightning_logs/version_1/checkpoints/epoch=99-step=399.ckpt


Loading the model from the checkpoint can easily be done using the load_from_checkpoint method from the model object by passing the model checkpoint path

In [16]:
train_model = model.load_from_checkpoint(checkpoint_callback.best_model_path)

In [17]:
test = torch.utils.data.DataLoader(xor_inputs, batch_size=1)

for val in xor_inputs:
    _ = train_model(val)
    print([int(val[0]),int(val[1])], int(_.round()))

[0, 0] 1
[0, 1] 0
[1, 0] 1
[1, 1] 0


In [18]:
xor_inputs

[tensor([0., 0.]), tensor([0., 1.]), tensor([1., 0.]), tensor([1., 1.])]