In [5]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import time
from typing import Type, Any, Callable, Union, List, Optional
from torch import Tensor 
from torchsummary import summary
if torch.cuda.is_available():
    print("Using GPUs")
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

Using GPUs


In [2]:
torch.manual_seed(43)
batch_size = 128

### for CIFAR 10
# stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
## for CIFAR 100
stats = ((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(*stats),
    torchvision.transforms.RandomCrop(32, padding=4, padding_mode='constant'),
    torchvision.transforms.RandomHorizontalFlip(p=0.5)
])

train_set = torchvision.datasets.CIFAR100(root="data", train=True, download=True, transform=transform)
train_size = len(train_set)
test_set = torchvision.datasets.CIFAR100(root="data", train=False, download=True, transform=transform)
test_set, validation_set = torch.utils.data.random_split(test_set, [5000, 5000])
test_size = len(test_set)
validation_size = len(validation_set)


train_loader = torch.utils.data.DataLoader(train_set, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size, num_workers=4, pin_memory=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size, num_workers=4, pin_memory=True)

data_loaders = {"train": train_loader, "test": test_loader, "validation": validation_loader}
dataset_sizes = {"train": train_size, "test": test_size, "validation": validation_size}
print(dataset_sizes)

Files already downloaded and verified
Files already downloaded and verified
{'train': 50000, 'test': 5000, 'validation': 5000}


In [6]:
### from https://pytorch.org/hub/pytorch_vision_resnet/

class BasicBlock(nn.Module):

    def __init__(self, inplanes, planes, stride=1, down=False):
        super().__init__()
            
        self.conv1 = nn.Conv2d(inplanes, planes, stride=stride, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(planes)
        
        self.downsample = None
        
        if down:
            self.downsample = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride)
        

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity
        out = self.relu(out)
        
        return out

    

class ResNet(nn.Module):
    def __init__(self, model_n, num_classes: int = 10):
        super().__init__()

        self.residual_layers = nn.ModuleList([])
        self.model_n = model_n

        ### begining layers
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding='same')
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        
        
        ######## ResNet blocks [16, 32, 64]
        ### first block, 16 channels
        for i in range(self.model_n):
            self.residual_layers.append(BasicBlock(16, 16).to(device))
            
        
        ### second block, 32 channels
        for i in range(self.model_n):
            if i == 0:
                self.residual_layers.append(BasicBlock(16, 32, stride=2, down=True).to(device))
            else:
                self.residual_layers.append(BasicBlock(32, 32).to(device))
                
                
        ### third block, 64 channels
        for i in range(self.model_n):
            if i == 0:
                self.residual_layers.append(BasicBlock(32, 64, stride=2, down=True).to(device))
                self.inplanes = 64
            else:
                self.residual_layers.append(BasicBlock(64, 64).to(device))
        
    
        ### output layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)


    def forward(self, x: Tensor) -> Tensor:

        ### begining layers
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        ##### ResNet blocks
        for i, layer in enumerate(self.residual_layers):
            x = layer (x)
            
            
        ### output layers
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x


In [9]:
#### Train Configurations, based on DSNet and ResNet paper
model_n = 3
epochs = 100
milestones = [int(epochs*0.5), int(epochs*0.75)]
momentum = 0.9
weight_decay = 0.0005
gamma = 0.1
lr = 0.1

model = ResNet(model_n, num_classes=100)
model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)

summary(model, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
            Conv2d-4           [-1, 16, 32, 32]           2,320
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 16, 32, 32]           2,320
       BatchNorm2d-8           [-1, 16, 32, 32]              32
              ReLU-9           [-1, 16, 32, 32]               0
       BasicBlock-10           [-1, 16, 32, 32]               0
           Conv2d-11           [-1, 16, 32, 32]           2,320
      BatchNorm2d-12           [-1, 16, 32, 32]              32
             ReLU-13           [-1, 16, 32, 32]               0
           Conv2d-14           [-1, 16,

In [8]:
### Train loop + validation/ also test at the end
print("Configuration: ", "model:ResNet(small)", " model_n:", model_n, " batch size:", batch_size, 
      " optimizer:SGD", " lr:", lr, " epochs:", epochs)

print("----------------------------- Train --------------------------------")
for epoch in range(epochs):
    start_time = time.time()
    print("Epoch {}/{}".format(epoch+1, epochs))
    print("-" * 30)
    
    
    epoch_loss = {"train": 0.0, "validation": 0.0}
    epoch_acc = {"train": 0.0, "validation": 0.0}
    
    running_loss = {"train": 0.0, "validation": 0.0}
    running_corrects = {"train": 0, "validation": 0}
    
    for phase in ["train", "validation"]:
        if phase == "train":
            model.train(True)
        else:
            model.train(False)
        
        for data in data_loaders[phase]:
            inputs, labels = data 
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad() # clear all gradients
            
            outputs = model(inputs) # batch_size x num_classes
            _, preds = torch.max(outputs.data, 1) # values, indices
            loss = loss_fn(outputs, labels)
            
            if phase == "train":
                loss.backward()  # compute gradients
                optimizer.step() # update weights/biases
               
            running_loss[phase] += loss.data.item() * inputs.size(0)
            running_corrects[phase] += torch.sum(preds == labels.data).item()
        
        epoch_loss[phase] = running_loss[phase] / dataset_sizes[phase]
        epoch_acc[phase] =  running_corrects[phase] / dataset_sizes[phase]

    # Visualize the loss and accuracy values.
    print({
        'time': np.round(time.time()-start_time, 5),
        'train_loss': np.round(epoch_loss["train"], 5),
        'train_acc': np.round(epoch_acc["train"], 5),
        'val_loss': np.round(epoch_loss["validation"], 5),
        'val_acc': np.round(epoch_acc["validation"], 5),
    })
    
    scheduler.step()

    
    
### evaluating the model with test set
print("----------------------------- Test --------------------------------")
with torch.no_grad():
    model.eval()
    running_loss = 0
    running_corrects = 0

    for data in test_loader:
        inputs, labels = data 

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() # clear all gradients

        outputs = model(inputs) # batch_size x num_classes
        _, preds = torch.max(outputs.data, 1) # values, indices
        loss = loss_fn(outputs, labels)

        running_loss += loss.data.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

    # Visualize the loss and accuracy values.
    print({
    'time': np.round(time.time()-start_time, 5),
    'test_loss': np.round(running_loss/ dataset_sizes['test'], 5),
    'test_acc': np.round(running_corrects/ dataset_sizes['test'], 5),
    })

Configuration:  model:ResNet(small)  model_n: 3  batch size: 128  optimizer:SGD  lr: 0.1  epochs: 100
----------------------------- Train --------------------------------
Epoch 1/100
------------------------------
{'time': 10.54983, 'train_loss': 4.06348, 'train_acc': 0.0686, 'val_loss': 3.83767, 'val_acc': 0.0992}
Epoch 2/100
------------------------------
{'time': 8.51099, 'train_loss': 3.53918, 'train_acc': 0.14928, 'val_loss': 3.57399, 'val_acc': 0.1622}
Epoch 3/100
------------------------------
{'time': 8.29049, 'train_loss': 3.1133, 'train_acc': 0.2219, 'val_loss': 3.05406, 'val_acc': 0.2442}
Epoch 4/100
------------------------------
{'time': 8.52674, 'train_loss': 2.78957, 'train_acc': 0.28572, 'val_loss': 2.75431, 'val_acc': 0.2986}
Epoch 5/100
------------------------------
{'time': 8.45359, 'train_loss': 2.53348, 'train_acc': 0.33596, 'val_loss': 2.51307, 'val_acc': 0.3344}
Epoch 6/100
------------------------------
{'time': 8.64396, 'train_loss': 2.3373, 'train_acc': 0.380

{'time': 7.80876, 'train_loss': 0.91364, 'train_acc': 0.7226, 'val_loss': 1.61954, 'val_acc': 0.5756}
Epoch 57/100
------------------------------
{'time': 7.88094, 'train_loss': 0.89883, 'train_acc': 0.72752, 'val_loss': 1.63311, 'val_acc': 0.5918}
Epoch 58/100
------------------------------
{'time': 8.80987, 'train_loss': 0.89606, 'train_acc': 0.72724, 'val_loss': 1.57855, 'val_acc': 0.5944}
Epoch 59/100
------------------------------
{'time': 8.1734, 'train_loss': 0.88586, 'train_acc': 0.72914, 'val_loss': 1.51004, 'val_acc': 0.608}
Epoch 60/100
------------------------------
{'time': 7.99855, 'train_loss': 0.87878, 'train_acc': 0.73248, 'val_loss': 1.55224, 'val_acc': 0.5984}
Epoch 61/100
------------------------------
{'time': 7.92424, 'train_loss': 0.87025, 'train_acc': 0.73726, 'val_loss': 1.60786, 'val_acc': 0.5812}
Epoch 62/100
------------------------------
{'time': 8.22886, 'train_loss': 0.86627, 'train_acc': 0.7363, 'val_loss': 1.719, 'val_acc': 0.5608}
Epoch 63/100
--------