# lightning

> using pytorch-lightning to train and deploy PyTorch models

Lightning has multiple libraries that trade-off structure+simplicity vs expert-level control.

Here's how you can decide what's important to you:

In [None]:
import os
import torch
import torch.nn as nn
import torch.utils.data as data
import torchvision as tv
import lightning as L

model

In [None]:

encoder = nn.Sequential(nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 3))
decoder = nn.Sequential(nn.Linear(3, 128), nn.ReLU(), nn.Linear(128, 28 * 28))


In [None]:
encoder

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=3, bias=True)
)

In [None]:
decoder

Sequential(
  (0): Linear(in_features=3, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=784, bias=True)
)

In [None]:
class LitAutoEncoder(L.LightningModule):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder, self.decoder = encoder, decoder

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = nn.functional.mse_loss(x_hat, x)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)


In [None]:
# init model
model = LitAutoEncoder(encoder, decoder)

data

In [None]:
# get a list of all the available datasets
dataset_names = tv.datasets.__all__

# print the list of dataset names
print("Available datasets:")
for name in dataset_names:
    print(name)
    # print(f"{name}: {dataset.__doc__}")


Available datasets:
LSUN
LSUNClass
ImageFolder
DatasetFolder
FakeData
CocoCaptions
CocoDetection
CIFAR10
CIFAR100
EMNIST
FashionMNIST
QMNIST
MNIST
KMNIST
StanfordCars
STL10
SUN397
SVHN
PhotoTour
SEMEION
Omniglot
SBU
Flickr8k
Flickr30k
Flowers102
VOCSegmentation
VOCDetection
Cityscapes
ImageNet
Caltech101
Caltech256
CelebA
WIDERFace
SBDataset
VisionDataset
USPS
Kinetics
HMDB51
UCF101
Places365
Kitti
INaturalist
LFWPeople
LFWPairs
KittiFlow
Sintel
FlyingChairs
FlyingThings3D
HD1K
Food101
DTD
FER2013
GTSRB
CLEVRClassification
OxfordIIITPet
PCAM
Country211
FGVCAircraft
EuroSAT
RenderedSST2
Kitti2012Stereo
Kitti2015Stereo
CarlaStereo
Middlebury2014Stereo
CREStereo
FallingThingsStereo
SceneFlowStereo
SintelStereo
InStereo2k
ETH3DStereo
wrap_dataset_for_transforms_v2


In [None]:
# define the transforms to apply to the data

transform = tv.transforms.Compose([tv.transforms.ToTensor(),
                                   tv.transforms.Normalize((0.5,), (0.5,))])

In [None]:
mnist_data = tv.datasets.MNIST(".", download=True, transform=transform)


In [None]:
# split the data into train and validation sets (55000/5000)
mnist_train, mnist_val = data.random_split(mnist_data, [55000, 5000])


In [None]:
# define the dataloaders for the train and validation sets, with a batch size of 64
mnist_train_loader = data.DataLoader(mnist_train, batch_size=64)
mnist_val_loader = data.DataLoader(mnist_val, batch_size=64)


train

In [None]:
# define trainer
trainer = L.Trainer(max_steps=1000)

Trainer will use only 1 of 6 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=6)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/home/ma/miniconda3/envs/myl/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:191: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/ma/miniconda3/envs/myl/lib/python3.10/site-pac ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, mnist_train_loader, mnist_val_loader)

/home/ma/miniconda3/envs/myl/lib/python3.10/site-packages/lightning/pytorch/trainer/configuration_validator.py:72: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
You are using a CUDA device ('NVIDIA A100-PCIE-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 100 K 
1 | decoder | Sequential | 101 K 
---------------------------------------
202 K     Trainable params
0         Non-trainable params
202 K     Total params
0.810     Total estimated model params size (MB)
/home/ma/miniconda3/envs/myl/lib/python3.10/site-packages/lightning/pytorch

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=1000` reached.


deploy

In [None]:
# load checkpoint
checkpoint = "./lightning_logs/version_0/checkpoints/epoch=1-step=1000.ckpt"
autoencoder = LitAutoEncoder.load_from_checkpoint(checkpoint, encoder=encoder, decoder=decoder)

# choose your trained nn.Module
encoder = autoencoder.encoder
encoder.eval()

# embed 4 fake images!
fake_image_batch = torch.Tensor(4, 28 * 28).to(next(encoder.parameters()).device)
embeddings = encoder(fake_image_batch)
print("⚡" * 20, "\nPredictions (4 image embeddings):\n", embeddings, "\n", "⚡" * 20)

⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡ 
Predictions (4 image embeddings):
 tensor([[-1.8337,  0.7712, -1.8188],
        [-0.0949,  1.6177, -2.2496],
        [ 0.3169, -0.3814, -3.2711],
        [ 2.8560, -1.0492, -3.4495]], device='cuda:0',
       grad_fn=<AddmmBackward0>) 
 ⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡⚡


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()