In [3]:
#Data loading in PyTorch, using torchvision package. In here, we will be loading the CIFAR10 dataset.
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10 #Load the dataset using torchvision
from torchvision.transforms import transforms #Define transformations to be applied on the image
from torch.utils.data import DataLoader#Create an instance of the DataLoader to hold the images

#import Adam optimizer in order to training the model
from torch.optim import Adam

from torch.autograd import Variable
import numpy as np

In [4]:
#Under is the Unit class which is for modularity, we put convolution and relu in one single separate module and stack 
#much of this module in the SimpleNet class. We do that since we want make the code clean and not cumbersome.
#This Unit class consist of convolution, batchNorm2d and relu. Batch Normalization essentially normalizes all inputs to
#have zero mean and unit variance. It will greatly boosts the accuracy of the CNN models.

In [5]:
class Unit(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Unit, self).__init__()
        
        self.conv = nn.Conv2d(in_channels=in_channels, kernel_size=3, out_channels=out_channels, stride=1, padding=1)
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()
        
    def forward(self, input):
        output = self.conv(intput)
        output = self.bn(output)
        output = self.relu(output)
        return output

In [6]:
#This SimpleNet class use above Unit class as sub-modules.
class SimpleNet(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleNet,self).__init__()
        
        self.unit1 = Unit(in_channels=3, out_channels=32)
        self.unit2 = Unit(in_channels=32, out_channels=32)
        self.unit3 = Unit(in_channels=32, out_channels=32)
        
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        
        self.unit4 = Unit(in_channels=32, out_channels=64)
        self.unit5 = Unit(in_channels=64, out_channels=64)
        self.unit6 = Unit(in_channels=64, out_channels=64)
        self.unit7 = Unit(in_channels=64, out_channels=64)
        
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        
        self.unit8 = Unit(in_channels=64, out_channels=128)
        self.unit9 = Unit(in_channels=128, out_channels=128)
        self.unit10 = Unit(in_channels=128, out_channels=128)
        self.unit11 = Unit(in_channels=128, out_channels=128)
        
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        
        self.unit12 = Unit(in_channels=128, out_channels=128)
        self.unit13 = Unit(in_channels=128, out_channels=128)
        self.unit14 = Unit(in_channels=128, out_channels=128)
        
        self.avgpool = nn.AvgPool2d(kernel_size=4)
        
        #Add all the units into the Sequential layer in exact order
        #Putting all layers except the fully connected layer(Linear layer) into a sequential class.
        self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6, self.unit7,
                                self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3, self.unit12, self.unit13,
                                self.unit14, self.avgpool)
        #The out_channels = 128, and after pooling 3 times, our 32*32 images have become 4*4, and after apply AvgPool2d
        #of kernel size 4, it turning our feature map into 1*1*128, and the linear layer would have 1*1*128=128 input features
        self.fc = nn.Linear(in_features=128, out_features=num_classes)
        
        def forward(self, input):
            output = self.net(input)
            #flatten the output of the network to have 128 features.
            output = output.view(-1,128)
            output = self.fc(output)
            return output
        
#This model contains 14 convolution layers, 14 ReLU layers, 14 batch normalization layers, 4 pooling layers and 1 linear layer.        

In [17]:
#Define transformations for the training set. Firstly, we pass an array of transformations using transform.Compose, use RandomHorizontalFlip() to randomly flips the images horizontally, use
#RandomCrop randomly crops the images. ToTensor() converts the images into a format usable by PyTorch. 
#Normalize() with the values given below will make all our pixels range between -1 to +1.
train_transformations = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32,padding=4),
    #In this transformation, ToTensor and Normalize must be at last in this order.
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

train_transformations

Compose(
    RandomHorizontalFlip(p=0.5)
    RandomCrop(size=(32, 32), padding=4)
    ToTensor()
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
)

In [16]:
#Load the training set using CIFAR10 class
train_set = CIFAR10(root="./data", train=True, transform=train_transformations, download=True)
train_set

Files already downloaded and verified


Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               RandomHorizontalFlip(p=0.5)
               RandomCrop(size=(32, 32), padding=4)
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )

In [18]:
batch_size = 32
#Create a loader for the training set with batch_size equal 32 images
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x12cd54950>

In [19]:
#Define transformations for the test set
test_transformations = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
    
])
test_transformations

Compose(
    ToTensor()
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
)

In [20]:
#Load the test set, the train in here should set to False
test_set = CIFAR10(root="./data", train=False, transform=test_transformations, download=True)
test_set

Files already downloaded and verified


Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )

In [21]:
#Create a loader for the test set, shuffle in here set to false
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader

<torch.utils.data.dataloader.DataLoader at 0x128f39b50>

In [13]:
#Under is the step for training the model:
#Begining: from torch.optim import Adam 
#Instantiate the model and create the optimizer and loss function
#Write a function to adjust learning rates
#Write functions to save and evaluate the model
#Write the training function

In [15]:
#Check if gpu support is available
cuda_avail = torch.cuda.is_available()
cuda_avail

False

In [25]:
#Create model, optimizer and loss function
model = SimpleNet(num_classes = 10)
#if cuda is available, move the model to the GPU
if cuda_avail:
    model.cuda()
#Define the optimizer and loss function
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss()


CrossEntropyLoss()

In [26]:
#Create a learning rate adjustment function that divides the learning rate by 10 every 30 epochs
def adjust_learning_rate(epoch):
    
    lr = 0.001
    
    if epoch > 180:
        lr = lr / 1000000
    elif epoch > 150:
        lr = lr / 100000
    elif epoch > 120:
        lr = lr / 10000
    elif epoch > 90:
        lr = lr / 1000
    elif epoch > 60:
        lr = lr / 100
    elif epoch > 30:
        lr = lr / 10
        
    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

In [27]:
def save_models(epoch):
    torch.save(model.state_dict(), "cifar10model_{}.model".format(epoch))
    print("Checkpoint saved")

In [35]:
#To evaluate the accuracy of the model on the test set.
def test():
    model.eval()
    test_acc = 0.0
    #We iterate over the test loader, each time, we will move the images and labels to the GPU, if GPU availble, we will 
    #wrap the images and labels in a Variable. The images then passed into the model to obtain predictions. The maximum predictions
    #is picked and then compared to the actual class to obtain the accuracy. Finally, return the average accuracy.
    for i, (images, labels) in enumerate(test_loader):
        
        if cuda_avail:
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        
        #Predict classes using images from the test set
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        prediction = prediction.cpu().numpy()
        ##problem
        test_acc += torch.sum(torch.from_numpy(prediction).cuda() == labels.data)
        
        
    #Compute the average acc and loss over all 10000 test images
    test_acc = test_acc / 10000
    
    return test_acc

In [37]:
def train(num_epochs):
    
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        train_acc = 0.0
        train_loss = 0.0
        #We loop over the loader for the training set
        for i, (images, labels) in enumerate(train_loader):
            
            #Move images and labels to GPU if GPU support is available
            if cuda_avail:
                images = Variable(images.cuda())
                labels = Variable(labels.cuda())
                
            #Clear all accumulated gradients
            #This step is important since weights in neural network are adjusted based on gradients accumulated for each batch
            #For each new batch, gradients must be reset to zero, so images in a previous batch would not propagate gradients to
            #a new batch.
            optimizer.zero_grad()
            
            #Pass our images into the model we create, it returns the predictions, and then we pass both the predictions and actual labels
            #into the loss function.
            #Predict classes using images from the train set
            outputs = model(images)
            
            #Compute the loss based on the predictions and actual labels
            loss = loss_fn(outputs,labels)
            
            #We call loss.backward() to propagate the gradients, and then we call optimizer.step() to modify our model parameters 
            #in accordance with the propagated gradients.
            #Backpropagate the loss
            loss.backward()
            
            #Adjust parameters according to the computed gradients
            optimizer.step()
            
            #We retrieve the actual loss and then obtain the maximum predicted class.
            #Problem
            train_loss += loss.cpu().data* images.size(0)
            _, prediction = torch.max(outputs.data, 1)
            
            #We sum up the number of correct predictions in the batch and add it to the total train_acc.
            train_acc += torch.sum(prediction == labels.data)
        
        
        #call the learning rate adjustment function
        adjust_learning_rate(epoch)
        
        #Compute the average training accuracy and training loss over all 50000 training images
        train_acc = train_acc/50000
        train_loss = train_loss/50000
        
        #Find the test accuracy 
        test_acc = test()
        
        #We keep track the best accuracy, and will call the save models if the current test accuracy is greater than our current best
        if test_acc > best_acc:
            save_models(epoch)
            best_acc = test_acc
            
        #Print the metrics
        print("Epoch {}, Train Accuracy:{}, TrainLoss: {}, Test Accuracy: {}".format(epoch, train_acc, train_loss, test_acc))
      

    
#problem
#if __name__ == "__main__":
    #train(200)
