In [2]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.autograd import Variable
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data_train = torchvision.datasets.CIFAR10('.', train = True, transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                                                                             transforms.RandomAffine(degrees = (-10, 10), translate = (0.0, 0.1)),
                                                                                             transforms.ToTensor()]), download = True)
data_test = torchvision.datasets.CIFAR10('.', train = True, transform = transforms.ToTensor(), download = True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified


In [4]:
def make_dataloader(dataset, pct, batch_size):
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    np.random.shuffle(indices)
    split = int(np.floor(pct * dataset_size))
    train_indices, valid_indices = indices[split:], indices[:split]
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    valid_sampler = torch.utils.data.SubsetRandomSampler(valid_indices)

    train_loader = DataLoader(dataset, batch_size = batch_size, sampler = train_sampler)
    valid_loader = DataLoader(dataset, batch_size = batch_size, sampler = valid_sampler)

    return train_loader, valid_loader

In [5]:
train_loader, valid_loader = make_dataloader(data_train, 0.2, 256)

In [37]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer = nn.Sequential(
            # Conv Block 1
            nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 3, padding = 1).cuda(),
            nn.BatchNorm2d(32).cuda(),
            nn.ReLU().cuda(),
            nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1).cuda(),
            nn.ReLU().cuda(),
            nn.MaxPool2d(kernel_size = 2, stride = 2).cuda(),

            # Conv Block 2
            nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, padding = 1).cuda(),
            nn.BatchNorm2d(128).cuda(),
            nn.ReLU().cuda(),
            nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, padding = 1).cuda(),
            nn.ReLU().cuda(),
            nn.MaxPool2d(kernel_size = 2, stride = 2).cuda(),

            # Conv Block 3
            nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, padding = 1).cuda(),
            nn.BatchNorm2d(256).cuda(),
            nn.ReLU().cuda(),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = 1).cuda(),
            nn.ReLU().cuda(),
            nn.MaxPool2d(kernel_size = 2, stride= 2).cuda(),

            # Conv Block 4
            nn.Conv2d(in_channels = 256,  out_channels = 512, kernel_size = 3, padding = 1).cuda(),
            nn.BatchNorm2d(512).cuda(),
            nn.ReLU().cuda(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1).cuda(),
            nn.BatchNorm2d(512).cuda(),
            nn.ReLU().cuda(),
            nn.MaxPool2d(kernel_size = 2, stride= 2).cuda()
        )

        self.fc_layer = nn.Sequential(
            nn.Dropout(p = 0.3).cuda(),
            nn.Linear(2048, 2048).cuda(),
            nn.ReLU().cuda(),
            nn.Linear(2048, 1024).cuda(),
            nn.ReLU().cuda(),
            nn.Dropout(p = 0.3).cuda(),
            nn.Linear(1024, 1024).cuda(),
            nn.ReLU().cuda(),
            nn.Dropout(p = 0.3).cuda(),
            nn.Linear(1024, 100).cuda(),
            nn.ReLU().cuda(),
            nn.Dropout(p = 0.3),
            nn.Linear(100, 10).cuda()
        )
    
    def forward(self, x):
         x = self.conv_layer(x)
         x = x.view(x.shape[0], -1)
         x = self.fc_layer(x)
         return x

In [38]:
model = Network()

In [44]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-6)

In [40]:
trainingloss = []
trainingacc = []
validationloss = []
validationacc = []

In [41]:
def accuracy(output, y):
    _, preds = torch.max(output, dim = 1)
    return torch.tensor(torch.sum(preds == y).item() / len(preds))

In [42]:
use_cuda = True
use_cuda, torch.cuda.is_available()

(True, True)

Trained for 20 epochs with learning rate of 1e-3
Trained for 20 epochs with learning rate of 1e-6

In [45]:
epochs = 20
for e in range(epochs):
    train_acc = torch.tensor(0, dtype = torch.float32)
    train_loss = torch.tensor(0, dtype = torch.float32)
    train_cnt = torch.tensor(0, dtype = torch.float32)

    model.train()
    for (i, batch) in enumerate(train_loader):
        # Unwrap the batch
        X, y = batch
        X, y = Variable(X), Variable(y)
        # Transfer to GPU
        if use_cuda and torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
        
        # Forward pass
        output = model(X)

        # Calculate the loss
        loss = loss_func(output, y)

        # Calculate the gradient of loss wrt the params
        optimizer.zero_grad()
        loss.backward()

        # Subtract the gradients
        optimizer.step()

        # Accumulate the history (NOTE that this is the batch loss and batch accuracy)
        batch_size = X.shape[0]
        train_cnt += batch_size
        acc = accuracy(output, y)
        train_loss = train_loss + loss * batch_size
        train_acc = train_acc + acc * batch_size

    train_loss = train_loss / train_cnt
    train_acc = train_acc / train_cnt
    trainingloss.append(train_loss)
    trainingacc.append(train_acc)

    model.eval()
    with torch.no_grad():
        val_loss = torch.tensor(0, dtype = torch.float32)
        val_acc = torch.tensor(0, dtype = torch.float32)
        val_cnt = torch.tensor(0, dtype = torch.float32)

        for (i, batch) in enumerate(valid_loader):
            # Unwrap the batch
            X, y = batch
            X, y = Variable(X), Variable(y)
            # Transfer to GPU
            if use_cuda and torch.cuda.is_available():
                X = X.cuda()
                y = y.cuda()

            # Forward pass
            output = model(X)
            # Calculate the loss
            loss = loss_func(output, y)
            
            # Accumulate the history
            batch_size = X.shape[0]
            val_cnt = val_cnt + batch_size
            val_loss = val_loss + loss * batch_size
            acc = accuracy(output, y)
            val_acc = val_acc + acc * batch_size
        
        val_loss = val_loss / val_cnt
        val_acc = val_acc / val_cnt
        validationloss.append(val_loss)
        validationacc.append(val_acc)
    print(f"Training Loss - {train_loss : 0.5f} Training Accuracy - {train_acc : 0.5f} Validation Loss - {val_loss : 0.5f} Validation Accuracy - {val_acc : 0.5f}")

Training Loss -  0.36569 Training Accuracy -  0.88300 Validation Loss -  0.49166 Validation Accuracy -  0.84430
Training Loss -  0.35161 Training Accuracy -  0.88722 Validation Loss -  0.48870 Validation Accuracy -  0.84570
Training Loss -  0.35687 Training Accuracy -  0.88475 Validation Loss -  0.48301 Validation Accuracy -  0.84580
Training Loss -  0.34708 Training Accuracy -  0.88832 Validation Loss -  0.47444 Validation Accuracy -  0.85110
Training Loss -  0.34637 Training Accuracy -  0.88690 Validation Loss -  0.46474 Validation Accuracy -  0.85090
Training Loss -  0.34264 Training Accuracy -  0.88893 Validation Loss -  0.46788 Validation Accuracy -  0.85040
Training Loss -  0.33714 Training Accuracy -  0.89155 Validation Loss -  0.46284 Validation Accuracy -  0.85020
Training Loss -  0.33821 Training Accuracy -  0.88963 Validation Loss -  0.46497 Validation Accuracy -  0.85190
Training Loss -  0.33620 Training Accuracy -  0.89157 Validation Loss -  0.45748 Validation Accuracy -  

Compared to previous experiments this seems decent enought. Now let us take the same model and the same training process and train on the full dataset so that we dont waste data.

In [51]:
train_loader, valid_loader = make_dataloader(data_train, 0, 256)

In [52]:
model = Network()

In [62]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-6)

In [58]:
trainingloss = []
trainingacc = []
validationloss = []
validationacc = []

In [59]:
def accuracy(output, y):
    _, preds = torch.max(output, dim = 1)
    return torch.tensor(torch.sum(preds == y).item() / len(preds))

In [60]:
use_cuda = True
use_cuda, torch.cuda.is_available()

(True, True)

In [63]:
epochs = 20
for e in range(epochs):
    train_acc = torch.tensor(0, dtype = torch.float32)
    train_loss = torch.tensor(0, dtype = torch.float32)
    train_cnt = torch.tensor(0, dtype = torch.float32)

    model.train()
    for (i, batch) in enumerate(train_loader):
        # Unwrap the batch
        X, y = batch
        X, y = Variable(X), Variable(y)
        # Transfer to GPU
        if use_cuda and torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
        
        # Forward pass
        output = model(X)

        # Calculate the loss
        loss = loss_func(output, y)

        # Calculate the gradient of loss wrt the params
        optimizer.zero_grad()
        loss.backward()

        # Subtract the gradients
        optimizer.step()

        # Accumulate the history (NOTE that this is the batch loss and batch accuracy)
        batch_size = X.shape[0]
        train_cnt += batch_size
        acc = accuracy(output, y)
        train_loss = train_loss + loss * batch_size
        train_acc = train_acc + acc * batch_size

    train_loss = train_loss / train_cnt
    train_acc = train_acc / train_cnt
    trainingloss.append(train_loss)
    trainingacc.append(train_acc)
    print(f"Training Loss - {train_loss : 0.5f} Training Accuracy - {train_acc : 0.5f}")

Training Loss -  0.32279 Training Accuracy -  0.89446
Training Loss -  0.31562 Training Accuracy -  0.89674
Training Loss -  0.30873 Training Accuracy -  0.89918
Training Loss -  0.30521 Training Accuracy -  0.90114
Training Loss -  0.30359 Training Accuracy -  0.90146
Training Loss -  0.29878 Training Accuracy -  0.90232
Training Loss -  0.29378 Training Accuracy -  0.90490
Training Loss -  0.29339 Training Accuracy -  0.90382
Training Loss -  0.29249 Training Accuracy -  0.90594
Training Loss -  0.28693 Training Accuracy -  0.90600
Training Loss -  0.29170 Training Accuracy -  0.90472
Training Loss -  0.28490 Training Accuracy -  0.90760
Training Loss -  0.28460 Training Accuracy -  0.90742
Training Loss -  0.28473 Training Accuracy -  0.90698
Training Loss -  0.28057 Training Accuracy -  0.90796
Training Loss -  0.27763 Training Accuracy -  0.90886
Training Loss -  0.27889 Training Accuracy -  0.90812
Training Loss -  0.28296 Training Accuracy -  0.90744
Training Loss -  0.27942 Tra

Time to find the results on the official test set.


In [64]:
test_loader = DataLoader(data_test, batch_size = 256)

In [65]:
test_acc = torch.tensor(0, dtype = torch.float32)
test_cnt = torch.tensor(0, dtype = torch.float32)
for batch in test_loader:
    X, y = batch
    X = Variable(X)
    y = Variable(y)
    if use_cuda and torch.cuda.is_available():
        X = X.cuda()
        y = y.cuda()
    model.eval()
    with torch.no_grad():
        batch_size = X.shape[0]
        output = model(X)
        acc = accuracy(output, y)
        test_acc += acc * batch_size
        test_cnt += batch_size
test_acc = test_acc / test_cnt
print(f"Test accurary - {test_acc.item() : 0.5f}")

Test accurary -  0.93168
