In [None]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import time
from typing import Type, Any, Callable, Union, List, Optional
from torch import Tensor 
from torchsummary import summary
if torch.cuda.is_available():
    print("Using GPUs")
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

In [None]:
torch.manual_seed(43)
batch_size = 128

### for CIFAR 10
# stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
## for CIFAR 100
stats = ((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(*stats),
    torchvision.transforms.RandomCrop(32, padding=4, padding_mode='constant'),
    torchvision.transforms.RandomHorizontalFlip(p=0.5)
])

train_set = torchvision.datasets.CIFAR100(root="data", train=True, download=True, transform=transform)
train_size = len(train_set)
test_set = torchvision.datasets.CIFAR100(root="data", train=False, download=True, transform=transform)
test_set, validation_set = torch.utils.data.random_split(test_set, [5000, 5000])
test_size = len(test_set)
validation_size = len(validation_set)


train_loader = torch.utils.data.DataLoader(train_set, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size, num_workers=4, pin_memory=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size, num_workers=4, pin_memory=True)

data_loaders = {"train": train_loader, "test": test_loader, "validation": validation_loader}
dataset_sizes = {"train": train_size, "test": test_size, "validation": validation_size}
print(dataset_sizes)

In [None]:
from models import ResNet, DenseNet, DSNet

#### Train Configurations, based on DSNet and ResNet paper
model_n = 3
epochs = 100 ### should be 180
milestones = [int(epochs*0.5), int(epochs*0.75)]
momentum = 0.9
weight_decay = 0.0005
gamma = 0.1
lr = 0.1

model = DSNet(model_n, num_classes=100, device=device)
# model = ResNet(model_n, num_classes=100, device=device)
# model = DenseNet(growth_rate=16, block_config=(2 * model_n, 2 * model_n, 2 * model_n),
#                  num_init_features=16, bn_size=2, num_classes=100)

model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)

summary(model, (3, 32, 32))
print('Total Number of Parameters:', sum(p.numel() for p in model.parameters()))

In [78]:
### Train loop + validation/ also test at the end
print("Configuration: ", "model:DSNet(small)", " model_n:", model_n, " batch size:", batch_size, 
      " optimizer:SGD", " lr:", lr, " epochs:", epochs)

print("----------------------------- Train --------------------------------")
for epoch in range(epochs):
    start_time = time.time()
    print("Epoch {}/{}".format(epoch+1, epochs))
    print("-" * 30)
    
    
    epoch_loss = {"train": 0.0, "validation": 0.0}
    epoch_acc = {"train": 0.0, "validation": 0.0}
    
    running_loss = {"train": 0.0, "validation": 0.0}
    running_corrects = {"train": 0, "validation": 0}
    
    for phase in ["train", "validation"]:
        if phase == "train":
            model.train(True)
        else:
            model.train(False)
        
        for data in data_loaders[phase]:
            inputs, labels = data 
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad() # clear all gradients
            
            outputs = model(inputs) # batch_size x num_classes
            _, preds = torch.max(outputs.data, 1) # values, indices
            loss = loss_fn(outputs, labels)
            
            if phase == "train":
                loss.backward()  # compute gradients
                optimizer.step() # update weights/biases
               
            running_loss[phase] += loss.data.item() * inputs.size(0)
            running_corrects[phase] += torch.sum(preds == labels.data).item()
        
        epoch_loss[phase] = running_loss[phase] / dataset_sizes[phase]
        epoch_acc[phase] =  running_corrects[phase] / dataset_sizes[phase]

    # Visualize the loss and accuracy values.
    print({
        'time': np.round(time.time()-start_time, 5),
        'train_loss': np.round(epoch_loss["train"], 5),
        'train_acc': np.round(epoch_acc["train"], 5),
        'val_loss': np.round(epoch_loss["validation"], 5),
        'val_acc': np.round(epoch_acc["validation"], 5),
    })
    
    scheduler.step()

    
    
### evaluating the model with test set
print("----------------------------- Test --------------------------------")
with torch.no_grad():
    model.eval()
    running_loss = 0
    running_corrects = 0

    for data in test_loader:
        inputs, labels = data 

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() # clear all gradients

        outputs = model(inputs) # batch_size x num_classes
        _, preds = torch.max(outputs.data, 1) # values, indices
        loss = loss_fn(outputs, labels)

        running_loss += loss.data.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

    # Visualize the loss and accuracy values.
    print({
    'time': np.round(time.time()-start_time, 5),
    'test_loss': np.round(running_loss/ dataset_sizes['test'], 5),
    'test_acc': np.round(running_corrects/ dataset_sizes['test'], 5),
    })

Configuration:  model:DSNet(small)  model_n: 3  batch size: 128  optimizer:SGD  lr: 0.1  epochs: 100
----------------------------- Train --------------------------------
Epoch 1/100
------------------------------
{'time': 11.32642, 'train_loss': 3.89058, 'train_acc': 0.0945, 'val_loss': 3.61582, 'val_acc': 0.133}
Epoch 2/100
------------------------------
{'time': 11.20832, 'train_loss': 3.235, 'train_acc': 0.20014, 'val_loss': 3.18643, 'val_acc': 0.2176}
Epoch 3/100
------------------------------
{'time': 10.97142, 'train_loss': 2.83238, 'train_acc': 0.27616, 'val_loss': 2.91107, 'val_acc': 0.2578}
Epoch 4/100
------------------------------
{'time': 11.09191, 'train_loss': 2.53902, 'train_acc': 0.33254, 'val_loss': 2.4892, 'val_acc': 0.3456}
Epoch 5/100
------------------------------
{'time': 11.14418, 'train_loss': 2.33153, 'train_acc': 0.3804, 'val_loss': 2.6593, 'val_acc': 0.3234}
Epoch 6/100
------------------------------
{'time': 11.02948, 'train_loss': 2.16312, 'train_acc': 0.41

{'time': 10.41584, 'train_loss': 0.82713, 'train_acc': 0.74666, 'val_loss': 1.61879, 'val_acc': 0.5844}
Epoch 57/100
------------------------------
{'time': 11.00377, 'train_loss': 0.82289, 'train_acc': 0.74832, 'val_loss': 1.53706, 'val_acc': 0.5928}
Epoch 58/100
------------------------------
{'time': 14.88287, 'train_loss': 0.80805, 'train_acc': 0.75368, 'val_loss': 1.54498, 'val_acc': 0.597}
Epoch 59/100
------------------------------
{'time': 10.92331, 'train_loss': 0.80554, 'train_acc': 0.75294, 'val_loss': 1.52728, 'val_acc': 0.6054}
Epoch 60/100
------------------------------
{'time': 11.10722, 'train_loss': 0.79652, 'train_acc': 0.75496, 'val_loss': 1.52894, 'val_acc': 0.6074}
Epoch 61/100
------------------------------
{'time': 10.60187, 'train_loss': 0.78485, 'train_acc': 0.7585, 'val_loss': 1.57973, 'val_acc': 0.592}
Epoch 62/100
------------------------------
{'time': 10.82366, 'train_loss': 0.77702, 'train_acc': 0.75928, 'val_loss': 1.57175, 'val_acc': 0.5996}
Epoch 63/10