In [1]:
# !pip install pytorch-lightning torchmetrics

## Download the datasets

In [2]:
import os
import tarfile
from torchvision.datasets.utils import download_url

NUM_WORKERS = os.cpu_count() // 1

# Flag for downloading the datasets
DOWNLOADED = True
if not DOWNLOADED:
    # Dowload the dataset
    dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
    download_url(dataset_url, '.')
    # Extract from archive
    with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
        tar.extractall(path='./data')
    data_dir = './data/cifar10'
    print(os.listdir(data_dir))
    classes = os.listdir(data_dir + "/train")
    print(classes)

### Compute the mean and std of images in the datasets

In [3]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder


# data_path = './data/cifar10'

# transform_img = transforms.Compose(
#             [transforms.Grayscale(num_output_channels=1),
#              transforms.ToTensor()]
#         )

# image_data = ImageFolder(
#   root=data_path, transform=transform_img
# )

# image_data_loader = DataLoader(
#   image_data, 
#   # batch size is whole datset
#   batch_size=len(image_data), 
#   shuffle=False, 
#   num_workers=NUM_WORKERS)

# def mean_std(loader):
#   images, lebels = next(iter(loader))
#   # shape of images = [b,c,w,h]
#   mean, std = images.mean([0,2,3]), images.std([0,2,3])
#   return mean, std

# IMAGES_MEAN, IMAGES_STD = mean_std(image_data_loader)
IMAGES_MEAN, IMAGES_STD = torch.tensor([0.4814]), torch.tensor([0.2391])

## Create Lightning Data Module (subclass)

In [4]:
import torchmetrics
from torch import nn, optim
import pytorch_lightning as pl
import torch.nn.functional as F
from pytorch_lightning import Trainer
from torch.utils.data import random_split
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

class CifarDataset(pl.LightningDataModule):
    def __init__(self, data_dir: str = None, batch_size: int = 512, num_workers = NUM_WORKERS):
        super().__init__()
        self.data_dir = data_dir or os.getcwd()
        self.num_workers = num_workers
        self.batch_size = batch_size
        self.train_transform = transforms.Compose(
            [transforms.RandomHorizontalFlip(p=0.5),
             transforms.RandomVerticalFlip(p=0.5),
             transforms.Grayscale(num_output_channels=1),
             transforms.ToTensor(),
             transforms.Normalize((IMAGES_MEAN), (IMAGES_STD))]
        )
        self.test_transform = transforms.Compose(
            [transforms.Grayscale(num_output_channels=1),
             transforms.ToTensor(),
             transforms.Normalize((IMAGES_MEAN), (IMAGES_STD))]
        )

    def prepare_data(self):
        self.train = ImageFolder(
            self.data_dir+'/train', transform=self.train_transform)

    def setup(self, train_ratio: float = 0.8, stage=None):
        if stage == 'fit' or stage is None:
            train_amount = int(len(self.train) * train_ratio)
            self.train, self.val = random_split(
                self.train, [train_amount, len(self.train) - train_amount])
        if stage == 'test' or stage is None:
            self.test = ImageFolder(self.data_dir+'/test',
                                    transform=self.test_transform)

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True, shuffle=True,)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)


## Create Lightning Module subclass (model)

In [5]:
class MultiLayerPerceptronModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        '''
        Conv: 
        W = Image width
        H = Image height
        F = Kernel width or height
        P = Kernel padding
        S = Stride
        ((W-F+2*P)/S)+1 * ((H-F+2*P)/S)+1
       
        Pooling: 
        n: input size
        f: filter size
        s: stride
        (n - f) / s + 1 * (n - f) / s + 1
        '''
        conv2_params = {'kernel_size': (3, 3),
                        'stride'     : (1, 1),
                        }
        act_func = nn.Sigmoid()

        self.layer1 = nn.Sequential(
            # Input : (1, 32, 32)
            nn.Conv2d(in_channels=1, out_channels=16, **conv2_params),
            nn.BatchNorm2d(16),
            act_func
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=64, **conv2_params),
            nn.BatchNorm2d(64),
            act_func
        )
        self.fc1 = nn.Sequential(
            nn.Linear(64*14*14, 784),
            nn.BatchNorm1d(784),
            act_func
        )
        self.fc2 = nn.Sequential(
            nn.Linear(784, 196),
            nn.BatchNorm1d(196),
            act_func
        )
        self.fc3 = nn.Sequential(
            nn.Linear(196, 64),
            nn.BatchNorm1d(64),
            act_func
        )
        self.out = nn.Linear(64, 10)
        self.loss = nn.CrossEntropyLoss()
        self.lr = float(1e-3)
        self.train_acc = torchmetrics.Accuracy()
        self.val_acc = torchmetrics.Accuracy()
        self.test_acc = torchmetrics.Accuracy()

    def forward(self, X):
        X = self.layer1(X)  # -> (1, 30, 30)
        X = self.layer2(X)  # -> (1, 28, 28)
        X = nn.MaxPool2d(2)(X)  # -> (1, 14, 14)
        X = nn.Dropout(p=0.4)(X)
        batch_size, _, _, _ = X.size()
        X = X.view(batch_size, -1)
        X = self.fc1(X)
        X = self.fc2(X)
        X = self.fc3(X)
        X = self.out(X)
        return F.log_softmax(X, dim=1)

    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=self.lr, weight_decay=1e-5)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        preds = self.forward(x)
        train_loss = self.loss(preds, y)
        self.train_acc(preds, y)
        # Logging the loss
        self.log('train_acc', self.train_acc, on_epoch=False, on_step=True)
        batch_dictionary={
            #REQUIRED: It ie required for us to return "loss"
            "loss": train_loss,
            #optional for batch logging purposes
            "log": self.log,
            # info to be used at epoch end 
            "accuracy": self.train_acc,
        }
        return batch_dictionary

    def validation_step(self, valid_batch, batch_idx):
        x, y = valid_batch
        preds = self.forward(x)
        val_loss = self.loss(preds, y)
        self.val_acc(preds, y)
        # Logging the loss
        self.log('val_acc', self.val_acc, on_epoch=True, on_step=True)
        batch_dictionary={
            #REQUIRED: It ie required for us to return "loss"
            "loss": val_loss,
            #optional for batch logging purposes
            "log": self.log,
            # info to be used at epoch end 
            "accuracy": self.val_acc,
        }
        return batch_dictionary

    def test_step(self, test_batch, batch_idx):
      x, y = test_batch
      preds = self.forward(x)
      loss = self.loss(preds, y)
      self.test_acc(preds, y)
      # By default logs it per epoch (weighted average over batches), and returns it afterwards
      self.log("test_acc", self.test_acc)

## Trainer configuration

In [6]:
data_path = 'data/cifar10'
model_cp_path = 'Model Checkpoints'
model_cp_filename = 'Cifar10_model'
data_module = CifarDataset(data_path)
pretrained_filename = os.path.join(model_cp_path, model_cp_filename)
trainer = Trainer(
    # default_root_dir=os.path.join(checkpoint_path, save_name),  # Where to save models
    # We run on a single GPU
    max_epochs=70, 
    auto_lr_find=True, 
    gpus=-1,
    auto_scale_batch_size=True, 
    check_val_every_n_epoch=5, 
    detect_anomaly=True,
    precision=16,
    # How many epochs to train for i
    # callbacks=[ModelCheckpoint(dirpath=model_cp_path, filename=model_cp_filename, save_top_k=1, 
                        # monitor="val_acc", mode='max', every_n_epochs=5, save_on_train_epoch_end=True), 
               # Save the best checkpoint based on the maximum val_acc recorded. Saves only weights and not optimizer
        # EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=5, mode='max')
    # ],  # Log learning rate every epoch
)  # In case your notebook crashes due to the progress bar, consider increasing the refresh rate
if os.path.isfile(pretrained_filename + '.ckpt'):
  print(f"Found pretrained model at {pretrained_filename}, loading...")
  # Automatically loads the model with the saved hyperparameters
  model = MultiLayerPerceptronModel.load_from_checkpoint(pretrained_filename + '.ckpt')
else:
  pl.seed_everything(42)  # To be reproducable
  model = MultiLayerPerceptronModel()
trainer.fit(model, data_module)
# model = MultiLayerPerceptronModel.load_from_checkpoint(checkpoint_path=pretrained_filename + '.ckpt')

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Global seed set to 42
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Missing logger folder: D:\Github\5218\Assignment 1\lightning_logs

  | Name      | Type             | Params
-----------------------------------------------
0 | layer1    | Sequential       | 192   
1 | layer2    | Sequential       | 9.4 K 
2 | fc1       | Sequential       | 9.8 M 
3 | fc2       | Sequential       | 154 K 
4 | fc3       | Sequential       | 12.7 K
5 | out       | Linear           | 650   
6 | loss      | CrossEntropyLoss | 0     
7 | train_acc | Accuracy         | 0     
8 | val_acc   | Accuracy         | 0     
9 | test_acc  | Accuracy         | 0     
-----------------------------------------------
10.0 M    Trainable params
0         Non-trainable params
10.0 M    Total params
20.028    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 42


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Activation
* Log 0: Sigmoid, AdamW, 0.4
* Log 1: ReLU, AdamW, 0.4
* Log 2: Swish, AdamW, 0.4

Optimizer:
* Log 3: Adam, Swish, 0.4
* Log 4: SGD, Swish, 0.4
* Log 5: AdamW, Swish, 0.4

Dropout:
* Log 6: 0.2, AdamW, Swish
* Log 7: 0.3, AdamW, Swish
* Log 8: 0.4, AdamW, Swish

In [7]:
trainer.test(model, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.6384000182151794}
--------------------------------------------------------------------------------


[{'test_acc': 0.6384000182151794}]

In [8]:
# import shutil
# folder_name = 'lightning_logs'
# shutil.make_archive(folder_name, 'zip', folder_name)

In [9]:
import webbrowser

webbrowser.open('https://youtu.be/5dwxGvmUG90?t=53')

True