In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

In [16]:
def weights_init(m):
    if type(m) == nn.Linear:
        m.weight.data.normal_(0.0, 1e-3)
        m.bias.data.fill_(0.)

def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


In [3]:
#--------------------------------
# Device configuration
#--------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device: %s'%device)


Using device: cuda


In [15]:
#--------------------------------
# Hyper-parameters
#--------------------------------
input_size = 32 * 32 * 3
hidden_size = [50]
num_classes = 10
num_epochs = 10
batch_size = 200
learning_rate = 1e-3
learning_rate_decay = 0.95
reg=0.001
num_training= 49000
num_validation =1000
train = True


In [5]:
!unzip datasets.zip

Archive:  datasets.zip
   creating: datasets/cifar-10-batches-py/
  inflating: datasets/cifar-10-batches-py/batches.meta  
  inflating: datasets/cifar-10-batches-py/data_batch_1  
  inflating: datasets/cifar-10-batches-py/data_batch_2  
  inflating: datasets/cifar-10-batches-py/data_batch_3  
  inflating: datasets/cifar-10-batches-py/data_batch_4  
  inflating: datasets/cifar-10-batches-py/data_batch_5  
 extracting: datasets/cifar-10-batches-py/readme.html  
  inflating: datasets/cifar-10-batches-py/test_batch  


In [17]:
#-------------------------------------------------
# Load the CIFAR-10 dataset
#-------------------------------------------------
norm_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                     ])
cifar_dataset = torchvision.datasets.CIFAR10(root='datasets/',
                                           train=True,
                                           transform=norm_transform,
                                           download=True)

test_dataset = torchvision.datasets.CIFAR10(root='datasets/',
                                          train=False,
                                          transform=norm_transform
                                          )

Files already downloaded and verified


In [18]:
#-------------------------------------------------
# Prepare the training and validation splits
#-------------------------------------------------
mask = list(range(num_training))
train_dataset = torch.utils.data.Subset(cifar_dataset, mask)
mask = list(range(num_training, num_training + num_validation))
val_dataset = torch.utils.data.Subset(cifar_dataset, mask)


In [19]:

#-------------------------------------------------
# Data loader
#-------------------------------------------------
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)



In [None]:
#======================================================================================
# Q4: Implementing multi-layer perceptron in PyTorch
#======================================================================================
# So far we have implemented a two-layer network using numpy by explicitly
# writing down the forward computation and deriving and implementing the
# equations for backward computation. This process can be tedious to extend to
# large network architectures
#
# Popular deep-learning libraries like PyTorch and Tensorflow allow us to
# quickly implement complicated neural network architectures. They provide
# pre-defined layers which can be used as building blocks to define our
# network. They also enable automatic-differentiation, which allows us to
# define only the forward pass and let the libraries perform back-propagation
# using automatic differentiation.
#
# In this question we will implement a multi-layer perceptron using the PyTorch
# library.  Please complete the code for the MultiLayerPerceptron, training and
# evaluating the model. Once you can train the two layer model, experiment with
# adding more layers and report your observations
#--------------------------------------------------------------------------------------


In [25]:
#-------------------------------------------------
# Fully connected neural network with one hidden layer
#-------------------------------------------------
class MultiLayerPerceptron(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes):
        super(MultiLayerPerceptron, self).__init__()
        #################################################################################
        # TODO: Initialize the modules required to implement the mlp with the layer     #
        # configuration. input_size --> hidden_layers[0] --> hidden_layers[1] .... -->  #
        # hidden_layers[-1] --> num_classes                                             #
        # Make use of linear and relu layers from the torch.nn module                   #
        #################################################################################
        
        layers = [] #Use the layers list to store a variable number of layers
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        layers.append(nn.Linear(input_size,hidden_layers[0]))
        layers.append(nn.ReLU())
        
        if len(hidden_layers)>1:
          for i in range(len(hidden_layers)-1):
            layers.append(nn.Linear(hidden_layers[i], hidden_layers[i+1]))
            layers.append(nn.ReLU())
        
        layers.append(nn.Linear(hidden_layers[len(hidden_layers)-1],num_classes))
        layers.append(nn.Softmax(dim=1))

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        # Enter the layers into nn.Sequential, so the model may "see" them
        # Note the use of * in front of layers
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        #################################################################################
        # TODO: Implement the forward pass computations                                 #
        # Note that you do not need to use the softmax operation at the end.            #
        # Softmax is only required for the loss computation and the criterion used below#
        # nn.CrossEntropyLoss() already integrates the softmax and the log loss together#
        #################################################################################
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        out = self.layers(x)
        

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        
        return out

In [20]:
model = MultiLayerPerceptron(input_size, hidden_size, num_classes).to(device)
# Print model's state_dict



In [21]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model's state_dict:
layers.0.weight 	 torch.Size([50, 3072])
layers.0.bias 	 torch.Size([50])
layers.2.weight 	 torch.Size([10, 50])
layers.2.bias 	 torch.Size([10])


In [24]:
if train:
    model.apply(weights_init)
    model.train() #set dropout and batch normalization layers to training mode
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)

    # Train the model
    lr = learning_rate
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
            #################################################################################
            # TODO: Implement the training code                                             #
            # 1. Pass the images to the model                                               #
            # 2. Compute the loss using the output and the labels.                          #
            # 3. Compute gradients and update the model using the optimizer                 #
            # Use examples in https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
            #################################################################################
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            images = images.view(images.shape[0], -1)
            optimizer.zero_grad()
            logits = model.forward(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

        # Code to update the lr
        lr *= learning_rate_decay
        update_lr(optimizer, lr)
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                ####################################################
                # TODO: Implement the evaluation code              #
                # 1. Pass the images to the model                  #
                # 2. Get the most confident predicted class        #
                ####################################################
                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                images = images.view(images.shape[0], -1)
                logits = model.forward(images)
                loss = criterion(logits, labels)
                _, predicted = torch.max(logits, 1)

                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print('Validataion accuracy is: {} %'.format(100 * correct / total))

    ##################################################################################
    # TODO: Now that you can train a simple two-layer MLP using above code, you can  #
    # easily experiment with adding more layers and different layer configurations   #
    # and let the pytorch library handle computing the gradients                     #
    #                                                                                #
    # Experiment with different number of layers (at least from 2 to 5 layers) and   #
    # record the final validation accuracies Report your observations on how adding  #
    # more layers to the MLP affects its behavior. Try to improve the model          #
    # configuration using the validation performance as the guidance. You can        #
    # experiment with different activation layers available in torch.nn, adding      #
    # dropout layers, if you are interested. Use the best model on the validation    #
    # set, to evaluate the performance on the test set once and report it            #
    ##################################################################################

    # Save the model checkpoint
    torch.save(model.state_dict(), 'model.ckpt')

else:
    # Run the test code once you have your by setting train flag to false
    # and loading the best model

    best_model = None
    best_model = torch.load('model.ckpt')
    
    model.load_state_dict(best_model)
    
    # Test the model
    model.eval() #set dropout and batch normalization layers to evaluation mode
    
    # In test phase, we don't need to compute gradients (for memory efficiency)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            ####################################################
            # TODO: Implement the evaluation code              #
            # 1. Pass the images to the model                  #
            # 2. Get the most confident predicted class        #
            ####################################################
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            images = images.view(images.shape[0], -1)
            logits = model.forward(images)
            loss = criterion(logits, labels)
            _, predicted = torch.max(logits, 1)
            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            if total == 1000:
                break

        print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))



Epoch [1/10], Step [100/245], Loss: 2.1102
Epoch [1/10], Step [200/245], Loss: 2.1018
Validataion accuracy is: 38.0 %
Epoch [2/10], Step [100/245], Loss: 2.0656
Epoch [2/10], Step [200/245], Loss: 2.0607
Validataion accuracy is: 40.8 %
Epoch [3/10], Step [100/245], Loss: 2.0553
Epoch [3/10], Step [200/245], Loss: 2.0381
Validataion accuracy is: 41.4 %
Epoch [4/10], Step [100/245], Loss: 2.0232
Epoch [4/10], Step [200/245], Loss: 2.0390
Validataion accuracy is: 43.7 %
Epoch [5/10], Step [100/245], Loss: 2.0482
Epoch [5/10], Step [200/245], Loss: 2.0058
Validataion accuracy is: 42.1 %
Epoch [6/10], Step [100/245], Loss: 2.0266
Epoch [6/10], Step [200/245], Loss: 2.0016
Validataion accuracy is: 45.3 %
Epoch [7/10], Step [100/245], Loss: 2.0101
Epoch [7/10], Step [200/245], Loss: 1.9957
Validataion accuracy is: 43.5 %
Epoch [8/10], Step [100/245], Loss: 2.0030
Epoch [8/10], Step [200/245], Loss: 2.0037
Validataion accuracy is: 43.9 %
Epoch [9/10], Step [100/245], Loss: 1.9660
Epoch [9/10],

In [26]:
#Above is the one hidden layer fully connected neural network
train = False

In [27]:
if train:
    model.apply(weights_init)
    model.train() #set dropout and batch normalization layers to training mode
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)

    # Train the model
    lr = learning_rate
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
            #################################################################################
            # TODO: Implement the training code                                             #
            # 1. Pass the images to the model                                               #
            # 2. Compute the loss using the output and the labels.                          #
            # 3. Compute gradients and update the model using the optimizer                 #
            # Use examples in https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
            #################################################################################
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            images = images.view(images.shape[0], -1)
            optimizer.zero_grad()
            logits = model.forward(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

        # Code to update the lr
        lr *= learning_rate_decay
        update_lr(optimizer, lr)
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                ####################################################
                # TODO: Implement the evaluation code              #
                # 1. Pass the images to the model                  #
                # 2. Get the most confident predicted class        #
                ####################################################
                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                images = images.view(images.shape[0], -1)
                logits = model.forward(images)
                loss = criterion(logits, labels)
                _, predicted = torch.max(logits, 1)

                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print('Validataion accuracy is: {} %'.format(100 * correct / total))

    ##################################################################################
    # TODO: Now that you can train a simple two-layer MLP using above code, you can  #
    # easily experiment with adding more layers and different layer configurations   #
    # and let the pytorch library handle computing the gradients                     #
    #                                                                                #
    # Experiment with different number of layers (at least from 2 to 5 layers) and   #
    # record the final validation accuracies Report your observations on how adding  #
    # more layers to the MLP affects its behavior. Try to improve the model          #
    # configuration using the validation performance as the guidance. You can        #
    # experiment with different activation layers available in torch.nn, adding      #
    # dropout layers, if you are interested. Use the best model on the validation    #
    # set, to evaluate the performance on the test set once and report it            #
    ##################################################################################

    # Save the model checkpoint
    torch.save(model.state_dict(), 'model.ckpt')

else:
    # Run the test code once you have your by setting train flag to false
    # and loading the best model

    best_model = None
    best_model = torch.load('model.ckpt')
    
    model.load_state_dict(best_model)
    
    # Test the model
    model.eval() #set dropout and batch normalization layers to evaluation mode
    
    # In test phase, we don't need to compute gradients (for memory efficiency)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            ####################################################
            # TODO: Implement the evaluation code              #
            # 1. Pass the images to the model                  #
            # 2. Get the most confident predicted class        #
            ####################################################
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            images = images.view(images.shape[0], -1)
            logits = model.forward(images)
            loss = criterion(logits, labels)
            _, predicted = torch.max(logits, 1)
            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            if total == 1000:
                break

        print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))

Accuracy of the network on the 1000 test images: 44.7 %


In [76]:
#Hyperparameter Tuning
#--------------------------------
# Hyper-parameters
#--------------------------------
input_size = 32 * 32 * 3
hidden_size = [2048,1365,910,606,404]
num_classes = 10
num_epochs = 10
batch_size = 200
learning_rate = 1e-3  #2e-3,3e-3
learning_rate_decay = 0.95
reg=0.001
num_training= 49000
num_validation =1000
train = True
        
model = MultiLayerPerceptron(input_size, hidden_size, num_classes).to(device)    

if train:
    model.apply(weights_init)
    model.train() #set dropout and batch normalization layers to training mode
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)

    # Train the model
    lr = learning_rate
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            images = images.view(images.shape[0], -1)
            optimizer.zero_grad()
            logits = model.forward(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

        # Code to update the lr
        lr *= learning_rate_decay
        update_lr(optimizer, lr)
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                images = images.view(images.shape[0], -1)
                logits = model.forward(images)
                loss = criterion(logits, labels)
                _, predicted = torch.max(logits, 1)

                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print('Validataion accuracy is: {} %'.format(100 * correct / total))

    # Save the model checkpoint
    torch.save(model.state_dict(), 'model.ckpt')
    print("Learning Rate:",learning_rate)
    print("Hidden Size", hidden_size)
else:
    # Run the test code once you have your by setting train flag to false
    # and loading the best model

    best_model = None
    best_model = torch.load('model.ckpt')
    
    model.load_state_dict(best_model)
    
    # Test the model
    model.eval() #set dropout and batch normalization layers to evaluation mode
    
    # In test phase, we don't need to compute gradients (for memory efficiency)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            images = images.view(images.shape[0], -1)
            logits = model.forward(images)
            loss = criterion(logits, labels)
            _, predicted = torch.max(logits, 1)
            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            if total == 1000:
                break

        print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))
        print("Learning Rate:",learning_rate)
        print("Hidden Size", hidden_size)

Epoch [1/10], Step [100/245], Loss: 2.3026
Epoch [1/10], Step [200/245], Loss: 2.3026
Validataion accuracy is: 7.8 %
Epoch [2/10], Step [100/245], Loss: 2.3026
Epoch [2/10], Step [200/245], Loss: 2.3025
Validataion accuracy is: 7.8 %
Epoch [3/10], Step [100/245], Loss: 2.3026
Epoch [3/10], Step [200/245], Loss: 2.3025
Validataion accuracy is: 7.8 %
Epoch [4/10], Step [100/245], Loss: 2.3026
Epoch [4/10], Step [200/245], Loss: 2.3026
Validataion accuracy is: 7.8 %
Epoch [5/10], Step [100/245], Loss: 2.3026
Epoch [5/10], Step [200/245], Loss: 2.3026
Validataion accuracy is: 7.8 %
Epoch [6/10], Step [100/245], Loss: 2.3025
Epoch [6/10], Step [200/245], Loss: 2.3027
Validataion accuracy is: 7.9 %
Epoch [7/10], Step [100/245], Loss: 2.3026
Epoch [7/10], Step [200/245], Loss: 2.3026
Validataion accuracy is: 7.8 %
Epoch [8/10], Step [100/245], Loss: 2.3026
Epoch [8/10], Step [200/245], Loss: 2.3027
Validataion accuracy is: 7.8 %
Epoch [9/10], Step [100/245], Loss: 2.3026
Epoch [9/10], Step [2

In [None]:
#Learning Rate 1e-3
'[50,50]Accuracy of the network on the 1000 test images: 44.1 %'
'[600,600]Accuracy of the network on the 1000 test images: 45.9 %'
'[100,100]Accuracy of the network on the 1000 test images: 46.4 %'

In [70]:
print(model)

MultiLayerPerceptron(
  (layers): Sequential(
    (0): Linear(in_features=3072, out_features=1536, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1536, out_features=768, bias=True)
    (3): ReLU()
    (4): Linear(in_features=768, out_features=384, bias=True)
    (5): ReLU()
    (6): Linear(in_features=384, out_features=192, bias=True)
    (7): ReLU()
    (8): Linear(in_features=192, out_features=10, bias=True)
    (9): Softmax(dim=1)
  )
)


In [75]:
606*2/3

404.0