In [None]:
!pip install pytorch-lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch-lightning
  Downloading pytorch_lightning-1.8.3.post1-py3-none-any.whl (798 kB)
[K     |████████████████████████████████| 798 kB 39.6 MB/s 
Collecting tensorboardX>=2.2
  Downloading tensorboardX-2.5.1-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 71.5 MB/s 
[?25hCollecting lightning-utilities==0.3.*
  Downloading lightning_utilities-0.3.0-py3-none-any.whl (15 kB)
Collecting torchmetrics>=0.7.0
  Downloading torchmetrics-0.10.3-py3-none-any.whl (529 kB)
[K     |████████████████████████████████| 529 kB 50.9 MB/s 
Collecting fire
  Downloading fire-0.4.0.tar.gz (87 kB)
[K     |████████████████████████████████| 87 kB 2.2 MB/s 
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.4.0-py2.py3-none-any.whl size=115943 sha256=9529dfde77a46b5dabd89ce

In [None]:
from torchvision import datasets, transforms
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Subset
import torch

In [None]:
class CIFARDatamodule(pl.LightningDataModule):
  def __init__(self, batch_size = 32):
    super().__init__()
    self.batch_size = batch_size
  def setup(self, stage = None):
    transform = transforms.Compose([transforms.ToTensor(), \
                                transforms.Normalize((0), (1))
                                ])
    
    self.train_dataset = Subset(datasets.CIFAR10(root='data/', train=True, download=True, transform=transform),torch.arange(100))
    self.test_dataset = Subset(datasets.CIFAR10(root='data/', train=False, download=True, transform=transform),torch.arange(100, 150))


  def train_dataloader(self):
    return  DataLoader(self.train_dataset, batch_size = self.batch_size, shuffle = True)
  def val_dataloader(self):
    return  DataLoader(self.test_dataset, batch_size = self.batch_size, shuffle = False)

In [None]:
data_module = CIFARDatamodule()

In [None]:
data_module.setup()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar-10-python.tar.gz to data/
Files already downloaded and verified


In [None]:
len(data_module.train_dataloader())

4

## Model

In [None]:
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch import flatten
import torchmetrics

In [None]:
class CIFARModel(pl.LightningModule):
  def __init__(self, input_size, num_classes):
    super().__init__()

    self.input_size = input_size
    
    self.loss_function = nn.CrossEntropyLoss()

    self.fc1 = nn.Linear(input_size, 5000) 
    self.fc2 = nn.Linear(5000, 5000) 
    self.fc3 = nn.Linear(5000, num_classes)  

    self.train_acc = torchmetrics.Accuracy()
    self.val_acc = torchmetrics.Accuracy()

    self.train_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')
    self.val_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')

  def forward(self, x):
    x = x.view(x.shape[0], self.input_size)
    x = self.fc1(x) 
    x = F.relu(x) 
    x = self.fc2(x) 
    x = F.relu(x) 
    x = self.fc3(x) 
    return x

  def configure_optimizers(self):
    optimizer =  optim.SGD(self.parameters(), lr = 0.01)
    return optimizer

  def training_step(self, train_batch, batch_idx):
    inputs, labels = train_batch

    outputs = self.forward(inputs.float()) 
    loss = self.loss_function(outputs, labels) 

    self.log('train_loss', loss, on_step= True, on_epoch = True)

    outputs = F.softmax(outputs, dim =1)

    self.train_acc(outputs, labels)
    self.log('train_acc', self.train_acc, on_epoch=True, on_step=False)

    self.train_macro_f1(outputs, labels)
    self.log('train_macro_f1', self.train_macro_f1, on_epoch=True, on_step= False)

    return loss

  def validation_step(self, val_batch, batch_idx):
    inputs, labels = val_batch 


    outputs = self.forward(inputs.float())
    loss = self.loss_function(outputs, labels)

    self.log('val_loss', loss,  on_step= True, on_epoch = True)

    outputs = F.softmax(outputs, dim =1)

    self.val_acc(outputs, labels)
    self.log('val_acc', self.val_acc, on_epoch=True, on_step= False)

    self.val_macro_f1(outputs, labels)
    self.log('val_macro_f1', self.val_macro_f1, on_epoch=True, on_step= False)

    return loss

In [None]:
baseline_model = CIFARModel(32*32*3,10)

In [None]:
from pytorch_lightning.loggers import TensorBoardLogger
%load_ext tensorboard
%tensorboard --logdir lightning_logs

<IPython.core.display.Javascript object>

In [None]:
logger = TensorBoardLogger("lightning_logs", name="baseline model", )

In [None]:
trainer = pl.Trainer(logger = logger, max_epochs = 20, log_every_n_steps =1)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(baseline_model, data_module, )

### 1. Dropout


In [None]:
dropout = nn.Dropout(0.5)

In [None]:
input = torch.randn(2, 16)
input

tensor([[-0.4153,  0.5765,  1.3138, -0.9096, -0.1709, -0.3551, -0.1638, -0.4360,
         -1.3161, -0.6418, -0.9394, -0.8136,  0.3982, -0.2507,  0.6167, -1.3745],
        [ 1.0730,  0.9870,  3.4673,  0.8975, -0.5919, -0.9460, -0.3522,  0.3968,
         -0.2841, -0.9168,  0.0637, -1.6933, -0.6962,  0.5111,  0.5673,  0.6995]])

In [None]:
output = dropout(input)
output

tensor([[-0.0000,  0.0000,  0.0000, -1.8191, -0.3419, -0.7103, -0.3277, -0.0000,
         -0.0000, -1.2836, -0.0000, -0.0000,  0.7965, -0.5015,  1.2333, -2.7490],
        [ 0.0000,  1.9740,  6.9345,  0.0000, -0.0000, -0.0000, -0.7045,  0.0000,
         -0.0000, -0.0000,  0.0000, -0.0000, -1.3924,  0.0000,  0.0000,  0.0000]])

## 2. Batch normalization


In [None]:
input = torch.randn(2, 16)
input

tensor([[ 0.0317, -0.9995, -2.3557,  1.9492,  0.5856, -0.0583,  1.3619,  1.2541,
          1.2997,  1.8475, -0.0250,  0.2789, -0.7169, -1.5039,  0.3239,  0.0203],
        [ 0.3078, -0.7498,  0.5348, -1.8424,  2.1859,  1.0459, -0.3558, -0.6544,
          0.2015, -0.7225, -0.2437, -0.8881, -0.0224,  0.4053, -0.8769, -0.0203]])

In [None]:
print(f"Mean: {input.mean()}")
print(f"Std dev: {input.std()}")

Mean: 0.04994945973157883
Std dev: 1.065797209739685


In [None]:
batch_norm = nn.BatchNorm1d(16) # batch norm

In [None]:
normalized = batch_norm(input)
normalized

tensor([[-0.9997, -0.9997, -1.0000,  1.0000, -1.0000, -1.0000,  1.0000,  1.0000,
          1.0000,  1.0000,  0.9996,  1.0000, -1.0000, -1.0000,  1.0000,  0.9881],
        [ 0.9997,  0.9997,  1.0000, -1.0000,  1.0000,  1.0000, -1.0000, -1.0000,
         -1.0000, -1.0000, -0.9996, -1.0000,  1.0000,  1.0000, -1.0000, -0.9881]],
       grad_fn=<NativeBatchNormBackward0>)

In [None]:
print(f"Mean: {normalized.mean()}")
print(f"Std dev: {normalized.std()}")

Mean: -1.30385160446167e-08
Std dev: 1.0151780843734741


## 3. L2 regularization

## 1. dropout model


In [None]:
class CIFARDropoutModel(pl.LightningModule):
  def __init__(self, input_size, num_classes):
    super().__init__()

    self.loss_function = nn.CrossEntropyLoss()
    self.input_size = input_size

    self.fc1 = nn.Linear(input_size, 5000) 
    self.fc2 = nn.Linear(5000, 5000) 
    self.fc3 = nn.Linear(5000, num_classes)  
    self.dropout = nn.Dropout(0.5)
    
    self.train_acc = torchmetrics.Accuracy()
    self.val_acc = torchmetrics.Accuracy()

    self.train_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')
    self.val_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')

  def forward(self, x):
    x = x.view(x.shape[0], self.input_size)
    x = self.fc1(x) 
    x = self.dropout(x)
    x = F.relu(x) 
    x = self.fc2(x) 
    x = self.dropout(x)
    x = F.relu(x) 
    x = self.fc3(x) 
    return x

  def configure_optimizers(self):
    optimizer =  optim.SGD(self.parameters(), lr = 0.01)
    return optimizer

  def training_step(self, train_batch, batch_idx):
    inputs, labels = train_batch


    outputs = self.forward(inputs.float()) 
    loss = self.loss_function(outputs, labels) 

    self.log('train_loss', loss, on_step= True, on_epoch = True)

    outputs = F.softmax(outputs, dim =1)

    self.train_acc(outputs, labels)
    self.log('train_acc', self.train_acc, on_epoch=True, on_step= True)

    self.train_macro_f1(outputs, labels)
    self.log('train_macro_f1', self.train_macro_f1, on_epoch=True, on_step= False)


    return loss

  def validation_step(self, val_batch, batch_idx):
    inputs, labels = val_batch 


    outputs = self.forward(inputs.float())
    loss = self.loss_function(outputs, labels)

    self.log('val_loss', loss,  on_step= True, on_epoch = True)

    outputs = F.softmax(outputs, dim =1)

    self.val_acc(outputs, labels)
    self.log('val_acc', self.val_acc, on_epoch=True, on_step= False)

    self.val_macro_f1(outputs, labels)
    self.log('val_macro_f1', self.val_macro_f1, on_epoch=True, on_step= False)

    return loss

In [None]:
dropout_model = CIFARDropoutModel(32*32*3,10)

In [None]:
from pytorch_lightning.loggers import TensorBoardLogger

In [None]:
logger = TensorBoardLogger("lightning_logs", name="dropout model")

In [None]:
trainer = pl.Trainer(logger = logger, max_epochs = 100, log_every_n_steps =1)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(dropout_model, data_module)

# 2. Batch Normalization 


In [None]:
class CIFARBatchNormModel(pl.LightningModule):
  def __init__(self, input_size, num_classes):
    super().__init__()

    self.input_size = input_size
    self.loss_function = nn.CrossEntropyLoss()

    self.fc1 = nn.Linear(input_size, 5000)
    self.batch_norm_1 = nn.BatchNorm1d(5000)
    self.fc2 = nn.Linear(5000, 5000) 
    self.batch_norm_2 = nn.BatchNorm1d(5000)
    self.fc3 = nn.Linear(5000, num_classes)  

    self.train_acc = torchmetrics.Accuracy()
    self.val_acc = torchmetrics.Accuracy()

    self.train_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')
    self.val_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')

  def forward(self, x):
    x = x.view(x.shape[0], self.input_size)
    x = self.fc1(x) 
    x = F.relu(x) 
    x = self.batch_norm_1(x)
    x = self.fc2(x) 
    x = F.relu(x) 
    x = self.batch_norm_2(x)
    x = self.fc3(x) 
    if not self.training:
      x = F.softmax(x, dim=1) 
    return x


  def configure_optimizers(self):
    optimizer =  optim.SGD(self.parameters(), lr = 0.01)
    return optimizer

  def training_step(self, train_batch, batch_idx):
    inputs, labels = train_batch


    outputs = self.forward(inputs.float()) 
    loss = self.loss_function(outputs, labels) 

    self.log('train_loss', loss, on_step= False, on_epoch = True)

    outputs = F.softmax(outputs, dim =1)

    self.train_acc(outputs, labels)
    self.log('train_acc', self.train_acc, on_epoch=True, on_step= False)

    self.train_macro_f1(outputs, labels)
    self.log('train_macro_f1', self.train_macro_f1, on_epoch=True, on_step= False)


    return loss

  def validation_step(self, val_batch, batch_idx):
    inputs, labels = val_batch 


    outputs = self.forward(inputs.float())
    loss = self.loss_function(outputs, labels)

    self.log('val_loss', loss,  on_step= False, on_epoch = True)

    self.log('val_loss', loss)

    outputs = F.softmax(outputs, dim =1)

    self.val_acc(outputs, labels)
    self.log('val_acc', self.val_acc, on_epoch=True, on_step= False)

    self.val_macro_f1(outputs, labels)
    self.log('val_macro_f1', self.val_macro_f1, on_epoch=True, on_step= False)

    return loss

In [None]:
batch_norm_model = CIFARBatchNormModel(32*32*3, 10)

In [None]:
from pytorch_lightning.loggers import TensorBoardLogger

In [None]:
logger = TensorBoardLogger("lightning_logs", name="batch norm model")

In [None]:
trainer = pl.Trainer(gpus=1, logger = logger, max_epochs = 100)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(batch_norm_model, data_module)

L2 Regularization

In [None]:
class CIFARRegularizedModel(pl.LightningModule):
  def __init__(self, input_size, num_classes):
    super().__init__()
    
    self.input_size = input_size
    self.loss_function = nn.CrossEntropyLoss()

    self.fc1 = nn.Linear(input_size, 5000) 
    self.fc2 = nn.Linear(5000, 5000) 
    self.fc3 = nn.Linear(5000, num_classes)  

    self.train_acc = torchmetrics.Accuracy()
    self.val_acc = torchmetrics.Accuracy()

    self.train_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')
    self.val_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro')

  def forward(self, x):
    x = x.view(x.shape[0], self.input_size)
    x = self.fc1(x) 
    x = F.relu(x) 
    x = self.fc2(x) 
    x = F.relu(x) 
    x = self.fc3(x) 
    return x

  def configure_optimizers(self):
     optimizer = torch.optim.SGD(self.parameters(), lr=1e-3, weight_decay=1e-4)
     pass

  def training_step(self, train_batch, batch_idx):
    inputs, labels = train_batch


    outputs = self.forward(inputs.float()) 
    loss = self.loss_function(outputs, labels) 

    self.log('train_loss', loss, on_step= False, on_epoch = True)

    outputs = F.softmax(outputs, dim =1)

    self.train_acc(outputs, labels)
    self.log('train_acc', self.train_acc, on_epoch=True, on_step= False)

    self.train_macro_f1(outputs, labels)
    self.log('train_macro_f1', self.train_macro_f1, on_epoch=True, on_step= False)


    return loss

  def validation_step(self, val_batch, batch_idx):
    inputs, labels = val_batch 


    outputs = self.forward(inputs.float())
    loss = self.loss_function(outputs, labels)

    self.log('val_loss', loss,  on_step= False, on_epoch = True)

    self.log('val_loss', loss)

    outputs = F.softmax(outputs, dim =1)

    self.val_acc(outputs, labels)
    self.log('val_acc', self.val_acc, on_epoch=True, on_step= False)

    self.val_macro_f1(outputs, labels)
    self.log('val_macro_f1', self.val_macro_f1, on_epoch=True, on_step= False)

    return loss

In [None]:
regularized_model = CIFARBatchNormModel(32*32*3,10)

In [None]:
from pytorch_lightning.loggers import TensorBoardLogger

In [None]:
logger = TensorBoardLogger("lightning_logs", name="regularized model")

In [None]:
trainer = pl.Trainer(gpus=1, logger = logger, max_epochs = 10)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(regularized_model, data_module)