# imports

In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from pl_bolts.datamodules import CIFAR10DataModule
from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
from pytorch_lightning import LightningModule, Trainer, seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

from torch.optim.lr_scheduler import OneCycleLR
from torch.optim.swa_utils import AveragedModel, update_bn
from torchmetrics.functional import accuracy

seed_everything(1)

Path = os.environ.get("PATH_DATASETS", ".")

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
AVAIL_GPUS = min(1, torch.cuda.device_count())
BATCH_SIZE = 256 if AVAIL_GPUS else 64
NUM_WORKERS = int(os.cpu_count() / 2)

print("Available GPUS:", AVAIL_GPUS)
print("Batch size:", BATCH_SIZE)
print("Num Workers:", NUM_WORKERS)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Global seed set to 1


Available GPUS: 1
Batch size: 256
Num Workers: 6


## import Cifar10 dataset

transformations for the training dataset:
 - crop a random 32x32 part of the source image
 - randomly flip image horizontally
 - convert data to tensor
 - normalize data

In [2]:
train_transforms = torchvision.transforms.Compose(    [
        torchvision.transforms.RandomCrop(32, padding=4),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ToTensor(),
        cifar10_normalization(),
    ]
)

transformations for the test and validation dataset:
 - convert data to tensor
 - normalize data according

In [3]:
test_transforms = torchvision.transforms.Compose(    
    [
        torchvision.transforms.ToTensor(),
        cifar10_normalization(),
    ]
)

load cifar 10 dataset

In [4]:
cifar10_data_module = CIFAR10DataModule(
    data_dir=PATH_DATASETS,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    train_transforms=train_transforms,
    val_transforms=test_transforms,
    test_transforms=test_transforms,


)

  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(


define nn

In [6]:
# Using a modified version of Resnet 18

def create_network():
    network = torchvision.models.resnet18(pretrained=False, num_classes=10)
    network.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    network.maxpool = nn.Identity()
    network.to(device)
    return network



test dimensions of layers

In [8]:
from torchsummary import summary

summary(create_network(), (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
          Identity-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
       BasicBlock-11           [-1, 64, 32, 32]               0
           Conv2d-12           [-1, 64, 32, 32]          36,864
      BatchNorm2d-13           [-1, 64, 32, 32]             128
             ReLU-14           [-1, 64,

In [9]:
class NeuralNetwork(LightningModule):
    def __init__(self, model, lr=0.05):
        super().__init__()
    
        self.save_hyperparameters()
        self.model = model
        
    def forward(self, x):
        out = self.model(x)
        return F.log_softmax(out, dim=1)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log("train_loss", loss)
        return loss
    
    def evaluate(self, batch, stage=None):
        x, y = batch
        logits = self(x)
        
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc =  accuracy(preds, y)
        
        if stage:
            self.log(f"{stage}_loss", loss, prog_bar=True)
            self.log(f"{stage}_acc", acc, prog_bar=True)
    
    def validation_step(self, batch, batch_idx):
        self.evaluate(batch, "val")
        
    def test_step(self, batch, batch_idx):
        self.evaluate(batch, "test")
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(
            self.parameters(),
            lr = self.hparams.lr,
            momentum=0.9,
            weight_decay=5e-4
        )
        
        steps_per_epoch = 45000 // BATCH_SIZE
        
        scheduler_dict = {
            "scheduler": OneCycleLR(
                optimizer,
                0.1,
                epochs = self.trainer.max_epochs,
                steps_per_epoch=steps_per_epoch,
            ),
            "interval": "step",
        }
        return {"optimizer": optimizer, "lr_scheduler": scheduler_dict}

In [12]:
model = NeuralNetwork(create_network(), lr=0.05)
model.datamodule = cifar10_data_module

define trainer:

In [13]:
trainer = Trainer(
    progress_bar_refresh_rate=10,
    max_epochs=100,
    gpus=AVAIL_GPUS,
    logger=TensorBoardLogger("lightning_logs/"),
    callbacks=[
        LearningRateMonitor(logging_interval="step"),
        EarlyStopping(monitor="val_loss"),
    ],
    
)

  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [14]:
os.environ['TENSORBOARD_BINARY'] = '/home/paul/anaconda3/bin/tensorboard'
# Start tensorboard.
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/

Reusing TensorBoard on port 6006 (pid 3877), started 4:49:01 ago. (Use '!kill 3877' to kill it.)

train network

In [15]:
trainer.fit(model, cifar10_data_module)

Files already downloaded and verified
Files already downloaded and verified


  rank_zero_deprecation(
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2022-02-09 22:53:08.563693: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-02-09 22:53:08.563723: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.696    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 1


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [16]:
trainer.test(model, datamodule=cifar10_data_module)

  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.8100000023841858, 'test_loss': 0.6268109083175659}
--------------------------------------------------------------------------------


[{'test_loss': 0.6268109083175659, 'test_acc': 0.8100000023841858}]