In [1]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
import torch.nn.functional as F
from torch import Tensor
import torch.utils.checkpoint as cp
from torch.utils.data import TensorDataset, DataLoader
from collections import OrderedDict
import numpy as np
import ast
import time
from typing import Type, Any, Callable, Union, List, Optional
from torch import Tensor 
from torchsummary import summary
if torch.cuda.is_available():
    print("Using GPUs")
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

Using GPUs


In [2]:
torch.manual_seed(43)
batch_size = 32

### for CIFAR 10
# stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
## for CIFAR 100
stats = ((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(*stats),
    torchvision.transforms.RandomCrop(32, padding=4, padding_mode='constant'),
    torchvision.transforms.RandomHorizontalFlip(p=0.5)
])

train_set = torchvision.datasets.CIFAR100(root="data", train=True, download=True, transform=transform)
train_size = len(train_set)
test_set = torchvision.datasets.CIFAR100(root="data", train=False, download=True, transform=transform)
test_set, validation_set = torch.utils.data.random_split(test_set, [5000, 5000])
test_size = len(test_set)
validation_size = len(validation_set)


train_loader = torch.utils.data.DataLoader(train_set, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size, num_workers=4, pin_memory=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size, num_workers=4, pin_memory=True)

data_loaders = {"train": train_loader, "test": test_loader, "validation": validation_loader}
dataset_sizes = {"train": train_size, "test": test_size, "validation": validation_size}
print(dataset_sizes)

Files already downloaded and verified
Files already downloaded and verified
{'train': 50000, 'test': 5000, 'validation': 5000}


In [3]:
# Implementation from (with modifications): 
# https://github.com/pytorch/vision/blob/6db1569c89094cf23f3bc41f79275c45e9fcb3f3/torchvision/models/densenet.py#L126

class _DenseLayer(nn.Module):
    def __init__(self, num_input_features, growth_rate, bn_size):
        super(_DenseLayer, self).__init__()
        self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
        self.add_module('relu1', nn.ReLU(inplace=True)),
        self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
                                           growth_rate, kernel_size=1, stride=1,
                                           bias=False)),
        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
        self.add_module('relu2', nn.ReLU(inplace=True)),
        self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
                                           kernel_size=3, stride=1, padding=1,
                                           bias=False)),

    def bn_function(self, inputs):
        # type: (List[Tensor]) -> Tensor
        concated_features = torch.cat(inputs, 1)
        bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features)))  # noqa: T484
        return bottleneck_output

    # todo: rewrite when torchscript supports any
    def any_requires_grad(self, input):
        # type: (List[Tensor]) -> bool
        for tensor in input:
            if tensor.requires_grad:
                return True
        return False


    # torchscript does not yet support *args, so we overload method
    # allowing it to take either a List[Tensor] or single Tensor
    def forward(self, input):  # noqa: F811
        if isinstance(input, Tensor):
            prev_features = [input]
        else:
            prev_features = input

        bottleneck_output = self.bn_function(prev_features)

        return self.conv2(self.relu2(self.norm2(bottleneck_output)))


class _DenseBlock(nn.ModuleDict):
    _version = 2

    def __init__(self, num_layers, num_input_features, bn_size, growth_rate):
        super(_DenseBlock, self).__init__()
        for i in range(num_layers):
            layer = _DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate=growth_rate,
                bn_size=bn_size
            )
            self.add_module('denselayer%d' % (i + 1), layer)

    def forward(self, init_features):
        features = [init_features]
        for name, layer in self.items():
            new_features = layer(features)
            features.append(new_features)
        return torch.cat(features, 1)


class _Transition(nn.Sequential):
    def __init__(self, num_input_features, num_output_features):
        super(_Transition, self).__init__()
        self.add_module('norm', nn.BatchNorm2d(num_input_features))
        self.add_module('relu', nn.ReLU(inplace=True))
        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
                                          kernel_size=1, stride=1, bias=False))
        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))


class DenseNet(nn.Module):
    r"""Densenet-BC model class, based on
    `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
    Args:
        growth_rate (int) - how many filters to add each layer (`k` in paper)
        block_config (list of 4 ints) - how many layers in each pooling block
        num_init_features (int) - the number of filters to learn in the first convolution layer
        bn_size (int) - multiplicative factor for number of bottle neck layers
          (i.e. bn_size * k features in the bottleneck layer)
        num_classes (int) - number of classification classes
          but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
    """

    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
                 num_init_features=64, bn_size=4, num_classes=1000):

        super(DenseNet, self).__init__()

        # First convolution
        # Modified from the original DenseNet implementation to mimic Resnet settings on CIFAR
        self.features = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(3, num_init_features, kernel_size=3, stride=1,
                                padding='same', bias=False)),
            ('norm0', nn.BatchNorm2d(num_init_features)),
            ('relu0', nn.ReLU(inplace=True))
        ]))

        # Each denseblock
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(
                num_layers=num_layers,
                num_input_features=num_features,
                bn_size=bn_size,
                growth_rate=growth_rate
            )
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features,
                                    num_output_features=num_features // 2)
                self.features.add_module('transition%d' % (i + 1), trans)
                num_features = num_features // 2

        # Final batch norm
        self.features.add_module('norm5', nn.BatchNorm2d(num_features))

        # Linear layer
        self.classifier = nn.Linear(num_features, num_classes)

        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out


In [4]:
#### Train Configurations, based on DSNet and ResNet paper
model_n = 8
epochs = 100 ### should be 180
milestones = [int(epochs*0.5), int(epochs*0.75)]
momentum = 0.9
weight_decay = 0.0001
gamma = 0.1
lr = 0.1

model = DenseNet(growth_rate=16, block_config=(2 * model_n, 2 * model_n, 2 * model_n),
                 num_init_features=16, bn_size=2, num_classes=100)

model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)

# summary(model, (3, 32, 32))
print('Total Number of Parameters:', sum(p.numel() for p in model.parameters()))

Total Number of Parameters: 771228


In [5]:
### Train loop + validation/ also test at the end
print("Configuration: ", "model:DenseNet", " model_n:", model_n, " batch size:", batch_size, 
      " optimizer:SGD", " lr:", lr, " epochs:", epochs)

all_epoch_loss = {"train": [], "validation": []}
all_epoch_acc = {"train":  [], "validation": []}

print("----------------------------- Train --------------------------------")
for epoch in range(epochs):
    start_time = time.time()
    print("Epoch {}/{}".format(epoch+1, epochs))
    print("-" * 30)
    
    
    epoch_loss = {"train": 0.0, "validation": 0.0}
    epoch_acc = {"train": 0.0, "validation": 0.0}
    
    running_loss = {"train": 0.0, "validation": 0.0}
    running_corrects = {"train": 0, "validation": 0}
    
    for phase in ["train", "validation"]:
        if phase == "train":
            model.train(True)
        else:
            model.train(False)
        
        for data in data_loaders[phase]:
            inputs, labels = data 
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad() # clear all gradients
            
            outputs = model(inputs) # batch_size x num_classes
            _, preds = torch.max(outputs.data, 1) # values, indices
            loss = loss_fn(outputs, labels)
            
            if phase == "train":
                loss.backward()  # compute gradients
                optimizer.step() # update weights/biases
               
            running_loss[phase] += loss.data.item() * inputs.size(0)
            running_corrects[phase] += torch.sum(preds == labels.data).item()
        
        all_epoch_loss[phase].append(running_loss[phase] / dataset_sizes[phase])
        all_epoch_acc[phase].append(running_corrects[phase] / dataset_sizes[phase])
        
        epoch_loss[phase] = running_loss[phase] / dataset_sizes[phase]
        epoch_acc[phase] =  running_corrects[phase] / dataset_sizes[phase]

    # Visualize the loss and accuracy values.
    print({
        'time': np.round(time.time()-start_time, 5),
        'train_loss': np.round(epoch_loss["train"], 5),
        'train_acc': np.round(epoch_acc["train"], 5),
        'val_loss': np.round(epoch_loss["validation"], 5),
        'val_acc': np.round(epoch_acc["validation"], 5),
    })
    
    scheduler.step()

    
    
with open('DenseNet_3_bs_32_opt_SGD_lr_1_epochs_100_loss.txt', 'w') as f_loss:
    print(all_epoch_loss, file=f_loss)
    
with open('DenseNet_3_bs_32_opt_SGD_lr_1_epochs_100_acc.txt', 'w') as f_acc:
    print(all_epoch_acc, file=f_acc)
    
### evaluating the model with test set
print("----------------------------- Test --------------------------------")
with torch.no_grad():
    model.eval()
    running_loss = 0
    running_corrects = 0

    for data in test_loader:
        inputs, labels = data 

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() # clear all gradients

        outputs = model(inputs) # batch_size x num_classes
        _, preds = torch.max(outputs.data, 1) # values, indices
        loss = loss_fn(outputs, labels)

        running_loss += loss.data.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

    # Visualize the loss and accuracy values.
    print({
    'time': np.round(time.time()-start_time, 5),
    'test_loss': np.round(running_loss/ dataset_sizes['test'], 5),
    'test_acc': np.round(running_corrects/ dataset_sizes['test'], 5),
    })

Configuration:  model:DenseNet  model_n: 3  batch size: 32  optimizer:SGD  lr: 0.1  epochs: 100
----------------------------- Train --------------------------------
Epoch 1/100
------------------------------
{'time': 52.19063, 'train_loss': 3.85947, 'train_acc': 0.1021, 'val_loss': 3.54879, 'val_acc': 0.1582}
Epoch 2/100
------------------------------
{'time': 56.99864, 'train_loss': 3.18135, 'train_acc': 0.21472, 'val_loss': 2.79594, 'val_acc': 0.2902}
Epoch 3/100
------------------------------
{'time': 50.74312, 'train_loss': 2.65927, 'train_acc': 0.31388, 'val_loss': 2.45695, 'val_acc': 0.3634}
Epoch 4/100
------------------------------
{'time': 50.92215, 'train_loss': 2.33018, 'train_acc': 0.384, 'val_loss': 2.20637, 'val_acc': 0.417}
Epoch 5/100
------------------------------
{'time': 50.85698, 'train_loss': 2.1224, 'train_acc': 0.4294, 'val_loss': 2.04856, 'val_acc': 0.444}
Epoch 6/100
------------------------------
{'time': 50.48864, 'train_loss': 1.96578, 'train_acc': 0.4653, '

{'time': 53.62423, 'train_loss': 0.58722, 'train_acc': 0.81952, 'val_loss': 1.25875, 'val_acc': 0.6718}
Epoch 57/100
------------------------------
{'time': 50.28523, 'train_loss': 0.57894, 'train_acc': 0.82018, 'val_loss': 1.25995, 'val_acc': 0.674}
Epoch 58/100
------------------------------
{'time': 51.42764, 'train_loss': 0.5793, 'train_acc': 0.82048, 'val_loss': 1.28879, 'val_acc': 0.6684}
Epoch 59/100
------------------------------
{'time': 51.82786, 'train_loss': 0.57338, 'train_acc': 0.8237, 'val_loss': 1.27279, 'val_acc': 0.6788}
Epoch 60/100
------------------------------
{'time': 50.79615, 'train_loss': 0.56608, 'train_acc': 0.82654, 'val_loss': 1.28329, 'val_acc': 0.674}
Epoch 61/100
------------------------------
{'time': 51.74843, 'train_loss': 0.56578, 'train_acc': 0.82378, 'val_loss': 1.27563, 'val_acc': 0.6744}
Epoch 62/100
------------------------------
{'time': 58.88578, 'train_loss': 0.56257, 'train_acc': 0.82702, 'val_loss': 1.26969, 'val_acc': 0.6758}
Epoch 63/100

In [5]:
### Train loop + validation/ also test at the end
print("Configuration: ", "model:DenseNet", " model_n:", model_n, " batch size:", batch_size, 
      " optimizer:SGD", " lr:", lr, " epochs:", epochs)

all_epoch_loss = {"train": [], "validation": []}
all_epoch_acc = {"train":  [], "validation": []}

print("----------------------------- Train --------------------------------")
for epoch in range(epochs):
    start_time = time.time()
    print("Epoch {}/{}".format(epoch+1, epochs))
    print("-" * 30)
    
    
    epoch_loss = {"train": 0.0, "validation": 0.0}
    epoch_acc = {"train": 0.0, "validation": 0.0}
    
    running_loss = {"train": 0.0, "validation": 0.0}
    running_corrects = {"train": 0, "validation": 0}
    
    for phase in ["train", "validation"]:
        if phase == "train":
            model.train(True)
        else:
            model.train(False)
        
        for data in data_loaders[phase]:
            inputs, labels = data 
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad() # clear all gradients
            
            outputs = model(inputs) # batch_size x num_classes
            _, preds = torch.max(outputs.data, 1) # values, indices
            loss = loss_fn(outputs, labels)
            
            if phase == "train":
                loss.backward()  # compute gradients
                optimizer.step() # update weights/biases
               
            running_loss[phase] += loss.data.item() * inputs.size(0)
            running_corrects[phase] += torch.sum(preds == labels.data).item()
        
        all_epoch_loss[phase].append(running_loss[phase] / dataset_sizes[phase])
        all_epoch_acc[phase].append(running_corrects[phase] / dataset_sizes[phase])
        
        epoch_loss[phase] = running_loss[phase] / dataset_sizes[phase]
        epoch_acc[phase] =  running_corrects[phase] / dataset_sizes[phase]

    # Visualize the loss and accuracy values.
    print({
        'time': np.round(time.time()-start_time, 5),
        'train_loss': np.round(epoch_loss["train"], 5),
        'train_acc': np.round(epoch_acc["train"], 5),
        'val_loss': np.round(epoch_loss["validation"], 5),
        'val_acc': np.round(epoch_acc["validation"], 5),
    })
    
    scheduler.step()

    
    
with open('DenseNet_8_bs_32_opt_SGD_lr_1_epochs_100_loss.txt', 'w') as f_loss:
    print(all_epoch_loss, file=f_loss)
    
with open('DenseNet_8_bs_32_opt_SGD_lr_1_epochs_100_acc.txt', 'w') as f_acc:
    print(all_epoch_acc, file=f_acc)
    
### evaluating the model with test set
print("----------------------------- Test --------------------------------")
with torch.no_grad():
    model.eval()
    running_loss = 0
    running_corrects = 0

    for data in test_loader:
        inputs, labels = data 

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() # clear all gradients

        outputs = model(inputs) # batch_size x num_classes
        _, preds = torch.max(outputs.data, 1) # values, indices
        loss = loss_fn(outputs, labels)

        running_loss += loss.data.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

    # Visualize the loss and accuracy values.
    print({
    'time': np.round(time.time()-start_time, 5),
    'test_loss': np.round(running_loss/ dataset_sizes['test'], 5),
    'test_acc': np.round(running_corrects/ dataset_sizes['test'], 5),
    })

Configuration:  model:DenseNet  model_n: 8  batch size: 32  optimizer:SGD  lr: 0.1  epochs: 100
----------------------------- Train --------------------------------
Epoch 1/100
------------------------------
{'time': 99.05934, 'train_loss': 3.88425, 'train_acc': 0.10126, 'val_loss': 3.52518, 'val_acc': 0.1496}
Epoch 2/100
------------------------------
{'time': 94.28528, 'train_loss': 3.05189, 'train_acc': 0.23892, 'val_loss': 2.7674, 'val_acc': 0.2902}
Epoch 3/100
------------------------------
{'time': 93.84855, 'train_loss': 2.4272, 'train_acc': 0.36184, 'val_loss': 2.20881, 'val_acc': 0.4086}
Epoch 4/100
------------------------------
{'time': 98.72011, 'train_loss': 2.06806, 'train_acc': 0.44188, 'val_loss': 1.86049, 'val_acc': 0.4874}
Epoch 5/100
------------------------------
{'time': 93.03579, 'train_loss': 1.82793, 'train_acc': 0.49776, 'val_loss': 1.78385, 'val_acc': 0.5122}
Epoch 6/100
------------------------------
{'time': 92.31454, 'train_loss': 1.65924, 'train_acc': 0.53

{'time': 96.75016, 'train_loss': 0.09206, 'train_acc': 0.97208, 'val_loss': 1.38779, 'val_acc': 0.7312}
Epoch 57/100
------------------------------
{'time': 92.63182, 'train_loss': 0.08469, 'train_acc': 0.97506, 'val_loss': 1.39219, 'val_acc': 0.7306}
Epoch 58/100
------------------------------
{'time': 91.4026, 'train_loss': 0.08568, 'train_acc': 0.97382, 'val_loss': 1.40717, 'val_acc': 0.7256}
Epoch 59/100
------------------------------
{'time': 98.89914, 'train_loss': 0.07843, 'train_acc': 0.97708, 'val_loss': 1.43774, 'val_acc': 0.7222}
Epoch 60/100
------------------------------
{'time': 95.61185, 'train_loss': 0.07745, 'train_acc': 0.9771, 'val_loss': 1.41178, 'val_acc': 0.7254}
Epoch 61/100
------------------------------
{'time': 94.03176, 'train_loss': 0.07394, 'train_acc': 0.97786, 'val_loss': 1.46184, 'val_acc': 0.726}
Epoch 62/100
------------------------------
{'time': 98.7251, 'train_loss': 0.07087, 'train_acc': 0.97892, 'val_loss': 1.48168, 'val_acc': 0.7224}
Epoch 63/100