In [1]:
%%capture
! pipinstall lightning  

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import lightning as L
from torch.utils.data import TensorDataset,DataLoader
import matplotlib.pyplot as plt

from torchvision import datasets,transforms

In [3]:
transform=transforms.Compose([transforms.ToTensor(),])


In [4]:
train=datasets.MNIST(root="./data",train=True,download=True,transform=transform)
test=datasets.MNIST(root="./data",train=False,download=True,transform=transform)


In [24]:
dl=DataLoader(train,batch_size=100,shuffle=True,num_workers=3)
dltest=DataLoader(test,batch_size=1000,num_workers=3)

In [6]:
class Net(L.LightningModule):
    def __init__(self):
        super().__init__()
        L.seed_everything(seed=42)
        self.convolve=nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3)
        self.maxpooling=nn.MaxPool2d(kernel_size=2,stride=2)
        self.pooltohidden=nn.Linear(in_features=13*13,out_features=1) #1 neuron out, (28-3-0)/1 +1 ie (inp-kernel+padding)/stride+bias
        self.hiddentoout=nn.Linear(in_features=1,out_features=10) #The max pooled matrix is half that of feature map
        self.loss=nn.CrossEntropyLoss()

    def forward(self,x):
        feat=self.convolve(x)
        feat=F.relu(feat)
        pool=self.maxpooling(feat)
        pool=torch.flatten(pool,1)
        xi=self.pooltohidden(pool)
        yi=F.relu(xi)
        out=self.hiddentoout(yi)
        return out
    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.001)

    def training_step(self,batch,batch_idx):
        x,y=batch
        out=self.forward(x)
        loss=self.loss(out,y)
        return loss
        

In [7]:
model=Net()


Seed set to 42


In [8]:
trainer=L.Trainer(max_epochs=100)
trainer.fit(model, train_dataloaders=dl)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type             | Params | Mode 
----------------------------------------------------------
0 | convolve     | Conv2d           | 10     | train
1 | maxpooling   | MaxPool2d        | 0      | train
2 | pooltohidden | Linear           | 170    | train
3 | hiddentoout  | Linear           | 20     | train
4 | loss         | CrossEntropyLoss | 0      | train
----------------------------------------------------------
200       Trainable params
0         Non-trainable params
200       Total params
0.001     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.


In [9]:
for batch_num, (image, label) in enumerate(dl):
    
    prediction = model(image)
    
    probabilities = torch.softmax(prediction, dim=1)

    predicted_label = torch.argmax(probabilities, dim=1)
    
    print("Predicted label:", predicted_label)
    print("Original label:", label)
    print("\n")


Predicted label: tensor([3, 1, 2, 7, 0, 7, 0, 1, 9, 2, 9, 1, 4, 7, 1, 4, 0, 7, 6, 1, 1, 1, 9, 1,
        3, 0, 7, 2, 1, 1, 1, 3, 3, 3, 2, 0, 0, 1, 1, 0, 7, 7, 8, 6, 7, 3, 0, 2,
        0, 1, 6, 4, 3, 4, 2, 4, 1, 1, 1, 2, 2, 1, 3, 4])
Original label: tensor([5, 1, 0, 7, 5, 7, 0, 3, 9, 3, 7, 8, 8, 4, 8, 4, 0, 3, 2, 5, 5, 1, 9, 1,
        8, 0, 7, 0, 4, 5, 0, 0, 3, 9, 0, 5, 0, 1, 4, 6, 9, 4, 5, 6, 3, 2, 6, 6,
        0, 8, 6, 4, 5, 1, 2, 8, 1, 1, 8, 0, 3, 8, 8, 9])


Predicted label: tensor([1, 8, 7, 6, 6, 3, 3, 4, 6, 1, 7, 2, 7, 2, 9, 0, 7, 3, 7, 0, 3, 3, 3, 3,
        1, 1, 2, 0, 1, 0, 3, 9, 9, 3, 1, 0, 9, 1, 0, 1, 4, 7, 2, 3, 4, 0, 0, 1,
        4, 3, 8, 3, 2, 6, 2, 1, 1, 3, 1, 7, 2, 7, 0, 6])
Original label: tensor([1, 8, 7, 2, 5, 3, 0, 7, 6, 1, 7, 0, 9, 5, 9, 0, 7, 0, 7, 6, 2, 5, 6, 8,
        9, 8, 2, 2, 1, 0, 5, 2, 4, 5, 5, 8, 4, 5, 5, 8, 4, 7, 0, 3, 4, 5, 0, 1,
        4, 5, 8, 2, 0, 6, 2, 3, 1, 3, 9, 7, 2, 7, 3, 6])


Predicted label: tensor([2, 6, 6, 1, 0, 1, 8, 1, 6, 3, 9, 3, 8

In [10]:
path_to_checkpoint = trainer.checkpoint_callback.best_model_path
path_to_checkpoint

'/teamspace/studios/this_studio/signa/chapter_06/lightning_logs/version_21/checkpoints/epoch=99-step=93800.ckpt'

In [11]:
trainer=L.Trainer(max_epochs=700)
trainer.fit(model, train_dataloaders=dl,ckpt_path=path_to_checkpoint)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at /teamspace/studios/this_studio/signa/chapter_06/lightning_logs/version_21/checkpoints/epoch=99-step=93800.ckpt
/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:362: The dirpath has changed from '/teamspace/studios/this_studio/signa/chapter_06/lightning_logs/version_21/checkpoints' to '/teamspace/studios/this_studio/signa/chapter_06/lightning_logs/version_22/checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name         | Type             | Params | Mode 
-----------------------------------------------------

Training: |          | 0/? [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

`Trainer.fit` stopped: `max_epochs=700` reached.


In [20]:
for batch_num, (image, label) in enumerate(dl):
    
    prediction = model(image)
    
    probabilities = torch.softmax(prediction, dim=1)

    predicted_label = torch.argmax(probabilities, dim=1)
    
    print("Predicted label:", predicted_label)
    print("Original label:", label)
    print("\n")


Predicted label: tensor([8, 0, 9, 7, 8, 6, 8, 9, 0, 3, 9, 9, 0, 9, 7, 6, 3, 8, 7, 2, 7, 9, 3, 6,
        1, 1, 1, 3, 1, 9, 4, 1, 3, 2, 0, 8, 8, 1, 9, 8, 3, 1, 8, 7, 9, 6, 1, 6,
        6, 3, 3, 2, 3, 3, 9, 4, 7, 3, 1, 8, 2, 0, 7, 1])
Original label: tensor([8, 0, 4, 7, 5, 6, 1, 9, 5, 0, 9, 4, 0, 9, 9, 6, 8, 8, 7, 3, 9, 9, 3, 6,
        3, 1, 1, 5, 1, 7, 4, 1, 3, 2, 0, 1, 8, 2, 9, 5, 3, 3, 8, 7, 4, 6, 2, 6,
        6, 3, 5, 2, 1, 4, 3, 4, 7, 3, 3, 4, 0, 3, 7, 1])


Predicted label: tensor([6, 3, 3, 7, 2, 7, 9, 4, 9, 0, 9, 1, 3, 7, 3, 7, 9, 2, 7, 7, 1, 7, 4, 9,
        0, 4, 4, 0, 2, 6, 8, 0, 3, 1, 3, 1, 3, 8, 9, 0, 1, 1, 1, 0, 0, 8, 7, 1,
        3, 6, 7, 1, 6, 8, 2, 9, 8, 3, 0, 8, 3, 7, 0, 0])
Original label: tensor([6, 0, 8, 7, 6, 7, 9, 4, 7, 0, 4, 1, 5, 7, 8, 7, 9, 6, 7, 9, 8, 7, 4, 4,
        0, 4, 4, 0, 6, 6, 5, 3, 3, 4, 2, 1, 0, 6, 9, 0, 5, 1, 1, 3, 5, 4, 7, 1,
        8, 6, 7, 5, 6, 5, 3, 9, 5, 3, 2, 8, 3, 9, 2, 3])


Predicted label: tensor([9, 1, 1, 7, 0, 1, 3, 1, 6, 9, 1, 3, 1

In [21]:
torch.sum(torch.eq(torch.tensor(label), predicted_label)) / len(predicted_label)

  torch.sum(torch.eq(torch.tensor(label), predicted_label)) / len(predicted_label)


tensor(0.4062)

In [22]:
for batch_num, (image, label) in enumerate(dltest):
    
    prediction = model(image)
    
    probabilities = torch.softmax(prediction, dim=1)

    predicted_label = torch.argmax(probabilities, dim=1)
    
    print("Predicted label:", predicted_label)
    print("Original label:", label)
    print("\n")


Predicted label: tensor([7, 6, 1, 3, 4, 1, 4, 4, 8, 7, 3, 2, 9, 8, 1, 3, 9, 7, 0, 4, 9, 2, 0, 3,
        9, 3, 7, 4, 0, 1, 1, 1, 3, 3, 9, 2, 9, 1, 2, 1, 1, 7, 9, 3, 8, 3, 1, 6,
        1, 4, 2, 0, 3, 8, 6, 3, 1, 1, 9, 4, 7, 8, 4, 0, 7, 4, 6, 4, 3, 2, 7, 2,
        2, 9, 1, 7, 3, 1, 4, 7, 7, 2, 2, 7, 1, 1, 7, 8, 6, 1, 3, 6, 9, 3, 1, 1,
        4, 4, 6, 7, 6, 3, 1, 9, 4, 9, 0, 1, 9, 4, 3, 9, 3, 9, 7, 4, 4, 1, 9, 1,
        3, 1, 7, 6, 7, 4, 3, 8, 8, 3, 2, 2, 2, 9, 3, 1, 3, 1, 6, 4, 2, 7, 1, 1,
        7, 1, 8, 6, 0, 8, 4, 1, 3, 3, 4, 8, 6, 3, 3, 4, 4, 6, 0, 4, 3, 0, 4, 3,
        1, 4, 4, 9, 6, 3, 3, 9, 1, 3, 1, 3, 1, 3, 3, 2, 3, 9, 2, 3, 0, 1, 1, 1,
        3, 4, 3, 1, 1, 2, 4, 3, 2, 6, 1, 1, 1, 8, 9, 3, 0, 7, 4, 1, 9, 1, 9, 8,
        8, 3, 4, 3, 7, 0, 6, 7, 1, 0, 8, 9, 1, 7, 8, 3, 0, 9, 7, 9, 2, 6, 4, 1,
        3, 3, 1, 9, 1, 3, 2, 6, 9, 2, 4, 1, 9, 3, 7, 7, 0, 8, 6, 2, 8, 8, 7, 7,
        4, 1, 3, 4, 3, 3, 3, 3, 1, 9, 9, 9, 1, 4, 3, 4, 6, 7, 4, 1, 9, 6, 6, 4,
        1, 0, 1, 6, 9, 

In [23]:
torch.sum(torch.eq(torch.tensor(label), predicted_label)) / len(predicted_label)

  torch.sum(torch.eq(torch.tensor(label), predicted_label)) / len(predicted_label)


tensor(0.5030)