#Finding optimal hyper-parameters for CIFAR10 Images

#Student Name: zijun wu

#Student id: 1488834

In [38]:
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

epochs = 5
batch_size_train = 128
batch_size_test = 1000
learning_rate = 1e-3
momentum = 0.5
log_interval = 100
optimizer_name="Adam"

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

# Checking GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


cuda:0


## Divide CIFAR10 into training, validation and test sets
## Use DataLoader iterator for loading data in batches

In [39]:
from torch.utils.data import random_split


CIFAR10_training = torchvision.datasets.CIFAR10('/CIFAR10_dataset/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

CIFAR10_test_set = torchvision.datasets.CIFAR10('/CIFAR10_dataset/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

# create a training and a validation set
CIFAR10_training_set, CIFAR10_validation_set = random_split(CIFAR10_training, [45000, 5000])


train_loader = torch.utils.data.DataLoader(CIFAR10_training_set,batch_size=batch_size_train, shuffle=True)

validation_loader = torch.utils.data.DataLoader(CIFAR10_validation_set,batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(CIFAR10_test_set,batch_size=batch_size_test, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [0]:
# Multiple Linear regression
class MultipleLinearRegression(nn.Module):
    def __init__(self):
        super(MultipleLinearRegression, self).__init__()
        self.fc = nn.Linear(32*32*3, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [0]:
# Following code appears at:  https://lirnli.wordpress.com/2017/09/03/one-hot-encoding-in-pytorch/
class One_Hot(nn.Module):
    def __init__(self, depth):
        super(One_Hot,self).__init__()
        self.depth = depth
        self.ones = torch.sparse.torch.eye(depth).to(device)
    def forward(self, X_in):
        X_in = X_in.long()
        return self.ones.index_select(0,X_in.data)
    def __repr__(self):
        return self.__class__.__name__ + "({})".format(self.depth)

In [0]:
def train(multi_linear_model, learning_rate=0.0001, momentum=0.5, epochs=2, optimizer_name="Adam"):
  multi_linear_model.train()
  if optimizer_name == "Adam":
      optimizer = optim.Adam(multi_linear_model.parameters(), lr=learning_rate, weight_decay=0.01)
      
  elif optimizer_name == "SGD":
      optimizer = optim.SGD(multi_linear_model.parameters(), lr=learning_rate, momentum=momentum,weight_decay=0.01)
    
  for epoch in range(1, epochs + 1):
    for batch_idx, (data, target) in enumerate(train_loader):
      data = data.to(device)
      target = target.to(device)
      optimizer.zero_grad()
      output = multi_linear_model(data)
      loss = F.mse_loss(output, one_hot(target)) # notice the use of view_as
      loss.backward()
      optimizer.step()
      error = loss.item();
    print('EPOCH {} completed. learning_rate= {:.6f}, Training Loss: {:.4f}'.format( epoch,learning_rate,error))
  return error


In [0]:
def validation(multi_linear_model):
  multi_linear_model.eval()
  validation_loss = 0
  correct = 0
  with torch.no_grad(): # notice the use of no_grad
    for data, target in validation_loader:
      data = data.to(device)
      target = target.to(device)
      output = multi_linear_model(data)
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
      validation_loss += F.mse_loss(output, one_hot(target), size_average=False).item()
  validation_loss /= len(validation_loader.dataset)
  print('Validation set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(validation_loss, correct, len(validation_loader.dataset), 100. * correct / len(validation_loader.dataset)))
  return 100. * correct / len(validation_loader.dataset)

In [0]:
def test(multi_linear_model):
  multi_linear_model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data = data.to(device)
      target = target.to(device)
      output = multi_linear_model(data)
      test_loss += F.mse_loss(output, one_hot(target), size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  print('Test set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
  return 100. * correct / len(test_loader.dataset)

In [0]:
def tune_hyper_parameter():
  # -- Your code goes here --

  import numpy as np
  np.random.seed(1)
  ## Perform your hyper-parameter search for Adam
  adam_grid_value = np.random.uniform(low=0.00001, high=0.0001, size=(10,))
  # adam_grid_value = [0.000109, 0.000108, 0.000107, 0.000106, 0.000105]
  adam_accuracy = 0
  adam_test_accuracy = 0
  adam_lr_value = 0
  for value in adam_grid_value:
    multi_linear_model = MultipleLinearRegression().to(device)
    train(multi_linear_model, epochs=5, learning_rate=value)
    accuracy = validation(multi_linear_model)
    if accuracy > adam_accuracy:
      adam_accuracy = accuracy
      adam_lr_value = value
      adam_test_accuracy = test(multi_linear_model)

  print('Best performance: Validation Accuracy={:.0f}%, Test Accuracy={:.0f}%, with Adam optimizer learning_rate={}'.format(adam_accuracy, adam_test_accuracy, adam_lr_value))

  
  ## Perform your hyper-parameter search for SGD   
  SGD_grid_value = {"lr": np.random.uniform(low=0.0001, high=0.0009, size=(10,)),
                    "momentum": np.random.uniform(low=0.95, high=0.99, size=(5,))}
  SGD_accuracy = 0
  SGD_test_accuracy = 0
  SGD_lr_value = 0
  SGD_monentum_value = 0
  for lr in SGD_grid_value["lr"]:
    for momentum in SGD_grid_value["momentum"]:
      multi_linear_model = MultipleLinearRegression().to(device)
      train(multi_linear_model, learning_rate=lr, momentum=momentum, epochs=5, optimizer_name="SGD")
      accuracy = validation(multi_linear_model)
      if accuracy > SGD_accuracy:
        SGD_accuracy = accuracy
        SGD_lr_value = lr
        SGD_monentum_value = momentum
        SGD_test_accuracy = test(multi_linear_model)

  print('Best performance: Validation Accuracy={:.0f}%,  Test Accuracy={:.0f}%, with SGD optimizer learning_rate={} and momentum={}'.format(SGD_accuracy, SGD_test_accuracy, SGD_lr_value, SGD_monentum_value))

    
  ##Final output will be like:
  
  #Best performance: Validation Accuracy=38% , with Adam optimizer learning_rate=0.??????
  
  #or
  
  #Best performance: Validation Accuracy=37% , with SGD optimizer learning_rate=0.?????? and momentum=0.???
   

In [46]:
##Final Block
##Keep the output block of this section while submitting your solution 
##The last line of the output must contain the accuracy and best configuration information
multi_linear_model = MultipleLinearRegression().to(device)
one_hot = One_Hot(10).to(device)
validation(multi_linear_model)
tune_hyper_parameter()
        





Validation set: Avg. loss: 1.8523, Accuracy: 479/5000 (9%)

EPOCH 1 completed. learning_rate= 0.000048, Training Loss: 0.0876
EPOCH 2 completed. learning_rate= 0.000048, Training Loss: 0.0862
EPOCH 3 completed. learning_rate= 0.000048, Training Loss: 0.0846
EPOCH 4 completed. learning_rate= 0.000048, Training Loss: 0.0863
EPOCH 5 completed. learning_rate= 0.000048, Training Loss: 0.0810
Validation set: Avg. loss: 0.8150, Accuracy: 1920/5000 (38%)

Test set: Avg. loss: 0.8167, Accuracy: 3812/10000 (38%)

EPOCH 1 completed. learning_rate= 0.000075, Training Loss: 0.0872
EPOCH 2 completed. learning_rate= 0.000075, Training Loss: 0.0917
EPOCH 3 completed. learning_rate= 0.000075, Training Loss: 0.0800
EPOCH 4 completed. learning_rate= 0.000075, Training Loss: 0.0815
EPOCH 5 completed. learning_rate= 0.000075, Training Loss: 0.0793
Validation set: Avg. loss: 0.8064, Accuracy: 1875/5000 (37%)

EPOCH 1 completed. learning_rate= 0.000010, Training Loss: 0.1067
EPOCH 2 completed. learning_rate=