# Imports and setting up libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
# importing necessary libraries
import os
from tqdm.auto import tqdm
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
from torchvision import transforms

In [4]:
!pip install torchinfo
from torchinfo import summary



In [5]:
os.chdir('/content/gdrive/MyDrive/Deep Learning/pytorch-experiments/Vision/CIFAR-10')

In [6]:
# using GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [7]:
# ensuring reproducibility of code
random_seed = 42
torch.manual_seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Loading data

In [8]:
arch = "densenetlike"

In [9]:
# some hyperparameters for the network
batch_size = 32
val_size = 0.1
epochs = 100
learning_rate = 3e-3
decay = 0.1
opt_milestones = [50, 75]

In [10]:
# defining transforms and data augmentation
train_transform = transforms.Compose(
    [transforms.RandomCrop(32, padding = 4),
    transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))])

valid_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))])

test_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))])

In [11]:
# loading data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=train_transform)

validset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=valid_transform)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=test_transform)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [12]:
# validation split
num_train = len(trainset)
indices = list(range(num_train))
split = int(np.floor(val_size * num_train))
np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [13]:
# defining dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          sampler = train_sampler, num_workers=2)
validloader = torch.utils.data.DataLoader(validset, batch_size=batch_size,
                                          sampler = valid_sampler, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle = False, num_workers=2)

# Network definitions

In [14]:
# importing model to be used
model_file = __import__(f'models.{arch}', fromlist=[arch])
base_arch = getattr(model_file, arch)

In [15]:
# moving net to GPU and summary of model architecture
net = base_arch()
net.to(device)
summary(net, input_size = (batch_size, 3, 32, 32))

Layer (type:depth-idx)                             Output Shape              Param #
densenetlike                                       --                        --
├─ModuleList: 1-1                                  --                        --
├─DenseBlock: 1                                    --                        --
│    └─ModuleList: 2-1                             --                        --
│    └─ModuleList: 2-2                             --                        --
├─AutoConv2d_BN: 1-2                               [32, 24, 32, 32]          --
│    └─Conv2d: 2-3                                 [32, 24, 32, 32]          1,824
│    └─BatchNorm2d: 2-4                            [32, 24, 32, 32]          48
├─ModuleList: 1-1                                  --                        --
│    └─densechunk: 2-5                             [32, 24, 16, 16]          --
│    │    └─DenseBlock: 3-1                        [32, 60, 32, 32]          73,656
│    │    └─TransitionBlock:

In [16]:
# defining loss, optimizer and lr scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(net.parameters(), lr = learning_rate, weight_decay = decay)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, opt_milestones, gamma = 0.5)

# Training

In [17]:
def train(epoch):
    """training loop for one epoch"""
    net.train()

    running_loss = 0.0
    train_total = 0
    train_correct = 0

    for data in tqdm(trainloader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # keep track of loss and accuracy
        running_loss += loss.item()
        predicted = torch.argmax(outputs.data, dim = 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    
    scheduler.step() # only after every epoch

    # print statistics
    train_accuracy = 100 * (train_correct / train_total)
    print("Epoch", epoch + 1)
    print(f'    Loss of the network on the {train_total} training images: {running_loss}')
    print(f'    Accuracy of the network on the {train_total} training images: {train_accuracy}%')

In [18]:
def validation():
    """validation set evaluation for one epoch"""
    valid_correct = 0
    valid_total = 0

    # since we're not training, we don't need to calculate the gradients
    with torch.no_grad():
        net.eval()

        for data in validloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            predicted = torch.argmax(outputs.data, dim = 1)
            valid_total += labels.size(0)
            valid_correct += (predicted == labels).sum().item()

    valid_accuracy = 100 * (valid_correct / valid_total)
    print(f'    Accuracy of the network on the {valid_total} validation images: {valid_accuracy}%')

In [19]:
# training

for epoch in range(epochs):  # loop over the dataset multiple times
    train(epoch)
    validation()

print('Finished Training')

HBox(children=(FloatProgress(value=0.0, max=1407.0), HTML(value='')))


Epoch 1
    Loss of the network on the 45000 training images: 2319.394626915455
    Accuracy of the network on the 45000 training images: 38.80222222222222%
    Accuracy of the network on the 5000 validation images: 50.2%


HBox(children=(FloatProgress(value=0.0, max=1407.0), HTML(value='')))

KeyboardInterrupt: ignored

In [None]:
# saving model
model_save_name = f'cifar{arch}.pt'
path = f"/content/gdrive/My Drive/Deep Learning/pytorch-experiments/Vision/CIFAR-10/models/trained models/{model_save_name}" 
torch.save(net, path)

# Testing

In [None]:
# loading model
model_save_name = f'cifar{arch}.pt'
PATH = f"/content/gdrive/My Drive/Deep Learning/pytorch-experiments/Vision/CIFAR-10/models/trained models/{model_save_name}"
model = torch.load(PATH, map_location=torch.device(device))

In [None]:
# moving model to GPU
model.to(device);

In [None]:
# evaluating on test set
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    model.eval()
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} test images: {100 * correct / total}')