In [None]:
# !pip install pytorch-lightning torchmetrics

In [None]:
import os
import tarfile
from torchvision.datasets.utils import download_url

DOWNLOADED = True
if not DOWNLOADED:
    # Dowload the dataset
    dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
    download_url(dataset_url, '.')
    # Extract from archive
    with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
        tar.extractall(path='./data')
    data_dir = './data/cifar10'
    print(os.listdir(data_dir))
    classes = os.listdir(data_dir + "/train")
    print(classes)

In [8]:
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder


data_dir = './data/cifar10'
image_data = ImageFolder(data_dir, transform=transforms.Compose(
                         [transforms.Grayscale(num_output_channels=1),
                         transforms.ToTensor()]))
image_data_loader = DataLoader(
  image_data, 
  # batch size is whole datset
  batch_size=len(image_data), 
  shuffle=False, 
  num_workers=0)

def mean_std(loader):
  images, lebels = next(iter(loader))
  # shape of images = [b,c,w,h]
  mean, std = images.mean([0,2,3]), images.std([0,2,3])
  return mean, std

IMAGE_MEAN, IMAGE_STD = mean_std(image_data_loader)

mean and std: 
 tensor([0.4814]) tensor([0.2391])


In [None]:
import torchmetrics
from torch import nn, optim
import pytorch_lightning as pl
import torch.nn.functional as F
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split, DataLoader

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


class CifarDataset(pl.LightningDataModule):
    def __init__(self, data_dir: str = None, batch_size: int = 32, num_workers: int = 4):
        super().__init__()
        self.data_dir = data_dir or os.getcwd()
        self.num_workers = num_workers
        self.batch_size = batch_size
        self.transforms = transforms.Compose(
            [transforms.RandomHorizontalFlip(p=0.5),
             transforms.RandomVerticalFlip(p=0.5),
             transforms.Grayscale(num_output_channels=1),
             transforms.ToTensor(),
             transforms.Normalize((IMAGE_MEAN), (IMAGE_STD))]
        )

    def prepare_data(self):
        # Get data
        self.train = ImageFolder(self.data_dir+'/train', transform=self.transforms)
        self.test = ImageFolder(self.data_dir+'/test', transform=self.transforms)

    def setup(self, train_ratio: float = 0.8, stage=None):
        train_amount = int(len(self.train) * train_ratio)
        self.train, self.val = random_split(
            self.train, [train_amount, len(self.train) - train_amount])

    def __len__(self):
        return len(self.train)

    def train_dataloader(self):
        return DataLoader(self.train, shuffle=True, batch_size=self.batch_size, num_workers=self.num_workers)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size, num_workers=self.num_workers)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=self.num_workers)


In [None]:
class MultiLayerPerceptronModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        '''
        Conv: 
        W = Image width
        H = Image height
        F = Kernel width or height
        P = Kernel padding
        S = Stride
        ((W-F+2*P)/S)+1 * ((H-F+2*P)/S)+1
       
        Pooling: 
        n: input size
        f: filter size
        s: stride
        (n - f) / s + 1 * (n - f) / s + 1
        '''
        self.layer1 = nn.Sequential(
            # Input : (1, 32, 32)
            nn.Conv2d(in_channels=1, out_channels=16,
                      kernel_size=3, stride=1),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=64,
                      kernel_size=3, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.fc1 = nn.Sequential(
            nn.Linear(64*14*14, 784),
            nn.BatchNorm1d(784),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(784, 196),
            nn.BatchNorm1d(196),
            nn.ReLU()
        )
        self.fc3 = nn.Sequential(
            nn.Linear(196, 64),
            nn.BatchNorm1d(64),
            nn.ReLU()
        )
        self.out = nn.Linear(64, 10)
        self.loss = nn.CrossEntropyLoss()
        self.lr = 0.001
        self.train_acc = torchmetrics.Accuracy()
        self.val_acc = torchmetrics.Accuracy()
        self.test_acc = torchmetrics.Accuracy()

    def forward(self, X):
        X = self.layer1(X)  # -> (1, 30, 30)
        X = self.layer2(X)  # -> (1, 28, 28)
        X = nn.MaxPool2d(2)(X)  # -> (1, 14, 14)
        X = nn.Dropout(p=0.2)(X)
        batch_size, _, _, _ = X.size()
        X = X.view(batch_size, -1)
        X = self.fc1(X)
        X = self.fc2(X)
        X = self.fc3(X)
        X = self.out(X)
        return F.log_softmax(X, dim=1)

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.lr)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        preds = self.forward(x)
        train_loss = self.loss(preds, y)
        self.train_acc(preds, y)
        # Logging the loss
        self.log('train_acc', self.train_acc, on_epoch=False, on_step=True)
        batch_dictionary={
            #REQUIRED: It ie required for us to return "loss"
            "loss": train_loss,
            #optional for batch logging purposes
            "log": self.log,
            # info to be used at epoch end 
            "accuracy": self.train_acc,
        }
        return batch_dictionary

    def validation_step(self, valid_batch, batch_idx):
        x, y = valid_batch
        preds = self.forward(x)
        val_loss = self.loss(preds, y)
        self.val_acc(preds, y)
        # Logging the loss
        self.log('val_accuracy', self.val_acc, on_epoch=True, on_step=True)
        batch_dictionary={
            #REQUIRED: It ie required for us to return "loss"
            "loss": val_loss,
            #optional for batch logging purposes
            "log": self.log,
            # info to be used at epoch end 
            "accuracy": self.val_acc,
        }
        return batch_dictionary

    def test_step(self, test_batch, batch_idx):
      x, y = test_batch
      preds = self.forward(x)
      loss = self.loss(preds, y)
      self.test_acc(preds, y)
      # By default logs it per epoch (weighted average over batches), and returns it afterwards
      self.log("test_acc", self.test_acc)

In [None]:
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

def train_model(data_module, checkpoint_path, save_name=None, **kwargs):
    """
    Inputs:
        model_name - Name of the model you want to run. Is used to look up the class in "model_dict"
        save_name (optional) - If specified, this name will be used for creating the checkpoint and logging directory.
    """

    # Create a PyTorch Lightning trainer with the generation callback
    trainer = pl.Trainer(
        default_root_dir=os.path.join(checkpoint_path, save_name),  # Where to save models
        # We run on a single GPU
        max_epochs=1, 
        auto_lr_find=True, 
        accelerator='gpu', 
        devices=1,
        auto_scale_batch_size=True, 
        check_val_every_n_epoch=10, 
        detect_anomaly=True,
        # How many epochs to train for i
        callbacks=[
            ModelCheckpoint(
                save_weights_only=True, mode="max", monitor="val_acc"
            ),  # Save the best checkpoint based on the maximum val_acc recorded. Saves only weights and not optimizer
            LearningRateMonitor("epoch"),
        ],  # Log learning rate every epoch
        progress_bar_refresh_rate=1,
    )  # In case your notebook crashes due to the progress bar, consider increasing the refresh rate
    trainer.logger._log_graph = True  # If True, we plot the computation graph in tensorboard
    trainer.logger._default_hp_metric = None  # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    pretrained_filename = os.path.join(checkpoint_path, save_name + ".ckpt")
    if os.path.isfile(pretrained_filename):
        print(f"Found pretrained model at {pretrained_filename}, loading...")
        # Automatically loads the model with the saved hyperparameters
        model = MultiLayerPerceptronModel.load_from_checkpoint(pretrained_filename)
    else:
        pl.seed_everything(42)  # To be reproducable
        model = MultiLayerPerceptronModel(**kwargs)
        trainer.fit(model, data_module)
        model = MultiLayerPerceptronModel.load_from_checkpoint(
            trainer.checkpoint_callback.best_model_path
        )  # Load best checkpoint after training

    # Test best model on validation and test set
    val_result = trainer.test(model, test_dataloaders=data_module.val, verbose=False)
    test_result = trainer.test(model, test_dataloaders=data_module.test, verbose=False)
    result = {"test": test_result[0]["test_acc"], "val": val_result[0]["test_acc"]}

    return model, result

In [None]:
data_path = 'data/cifar10'
data_module = CifarDataset(data_path)
model_cp_path = 'Model checkpoints'
model_cp_filename = 'Cifar10_model.ckpt'
train_model(data_module, model_cp_path, model_cp_filename)

In [None]:
# from os.path import exists
# from datetime import datetime
# from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

# # Data module
# data_path = 'data/cifar10'
# data_module = CifarDataset(data_path)
# model_cp_path = 'Model checkpoints'
# model_cp_filename = 'Cifar10_model.ckpt'

# # Load the saved model from checkpoints if the folder is not empty
# if len(os.listdir(model_cp_path)) > 0:
#     model = MultiLayerPerceptronModel().load_from_checkpoint(
#     checkpoint_path=os.path.join(model_cp_path, model_cp_filename))
# else:
#     model = MultiLayerPerceptronModel()
#     # Callbacks
#     checkpoint_callback = ModelCheckpoint(
#           dirpath=model_cp_path, monitor='val_acc', mode='max', auto_insert_metric_name=False, save_top_k=1, verbose=False)

# # ckpt_path=os.path.join(model_cp_path, model_cp_filename
# trainer = pl.Trainer(max_epochs=50, fast_dev_run=False, auto_lr_find=True, accelerator='gpu', devices=1,
#                      auto_scale_batch_size=True, check_val_every_n_epoch=10, detect_anomaly=True)
# trainer.fit(model, data_module)
# trainer.save_checkpoint(f'{model_cp_path}/{model_cp_filename}')

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs