### First cell can be skipped if you are not working with: https://colab.research.google.com/

In [1]:
!pip3 install http://download.pytorch.org/whl/cu90/torch-0.3.1-cp36-cp36m-linux_x86_64.whl 
!pip3 install torchvision

Collecting torch==0.3.1 from http://download.pytorch.org/whl/cu90/torch-0.3.1-cp36-cp36m-linux_x86_64.whl
[?25l  Downloading http://download.pytorch.org/whl/cu90/torch-0.3.1-cp36-cp36m-linux_x86_64.whl (547.8MB)
[K    78% |█████████████████████████       | 429.7MB 32.7MB/s eta 0:00:04[K    100% |████████████████████████████████| 547.8MB 25.5MB/s 
Installing collected packages: torch
Successfully installed torch-0.3.1
Collecting torchvision
[?25l  Downloading https://files.pythonhosted.org/packages/ca/0d/f00b2885711e08bd71242ebe7b96561e6f6d01fdb4b9dcf4d37e2e13c5e1/torchvision-0.2.1-py2.py3-none-any.whl (54kB)
[K    100% |████████████████████████████████| 61kB 2.1MB/s 
[?25hCollecting pillow>=4.1.1 (from torchvision)
[?25l  Downloading https://files.pythonhosted.org/packages/5f/4b/8b54ab9d37b93998c81b364557dff9f61972c0f650efa0ceaf470b392740/Pillow-5.1.0-cp36-cp36m-manylinux1_x86_64.whl (2.0MB)
[K    100% |████████████████████████████████| 2.0MB 8.0MB/s 
Installing collected packa

In [1]:
import torch
import math
import os

from torch import optim
from torch import Tensor
from torch.autograd import Variable
from torch import nn
from torchvision import datasets
import torchvision.transforms as transforms

In [2]:
def create_mnist_model():
  return nn.Sequential(
      nn.Linear(784, 100),
      nn.ReLU(),
      nn.Linear(100, 10)
  )

def train_model_sgd(model, train_input, train_target, nb_epochs = 150, mini_batch_size = 100, lr = 1e-1):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr)

  for e in range(0, nb_epochs):
      for b in range(0, train_input.size(0), mini_batch_size):
          output = model(train_input.narrow(0, b, mini_batch_size))
          loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
          model.zero_grad()
          loss.backward()
          optimizer.step()

def train_model_adam(model, train_input, train_target, nb_epochs = 150, mini_batch_size = 100, lr = 1e-3, beta1 = 0.9, beta2 = 0.999):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr, betas = (beta1,beta2))

  for e in range(0, nb_epochs):
      for b in range(0, train_input.size(0), mini_batch_size):
          output = model(train_input.narrow(0, b, mini_batch_size))
          loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
          model.zero_grad()
          loss.backward()
          optimizer.step()

def train_model_amsgrad(model, train_input, train_target, nb_epochs = 150, mini_batch_size = 100, lr = 1e-3, beta1 = 0.9, beta2 = 0.999):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr, betas = (beta1,beta2), amsgrad = True)

  for e in range(0, nb_epochs):
      for b in range(0, train_input.size(0), mini_batch_size):
          output = model(train_input.narrow(0, b, mini_batch_size))
          loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
          model.zero_grad()
          loss.backward()
          optimizer.step()
          
def compute_prc_errors(model, data_input, data_target, mini_batch_size = 100, test_prc = False):
  nb_data_errors = 0
  
  for b in range(0, data_input.size(0), mini_batch_size):
      output = model(data_input.narrow(0, b, mini_batch_size))
      _, predicted_classes = torch.max(output.data, 1)
      for k in range(0, mini_batch_size):
          if data_target.data[b + k] != predicted_classes[k]:
              nb_data_errors = nb_data_errors + 1
              
  # compute error rate for train/test set depending on flag test_prc
  if (test_prc):
    percentage = nb_data_errors/test_input.size(0) * 100
  else:
    percentage = nb_data_errors/train_input.size(0) * 100
  return percentage

def print_errors(mini_batch_size = 100):
  train_percent = compute_prc_errors(model, train_input, train_target,  mini_batch_size, test_prc = False)
  print('train error = {:0.2f}%'.format(train_percent))
  test_percent = compute_prc_errors(model, test_input, test_target, mini_batch_size, test_prc = True)
  print('test error = {:0.2f}%'.format(test_percent))

In [3]:
def get_data():
  
  root = './data'
  if not os.path.exists(root):
    os.mkdir(root)

  mnist_train_set = datasets.MNIST(root = root, train = True, download = True)
  mnist_test_set = datasets.MNIST(root = root, train = False, download = True)
#   train_loader = torch.utils.data.DataLoader(dataset=mnist_train_set, batch_size=mini_batch_size, shuffle=True)
#   test_loader = torch.utils.data.DataLoader(dataset=mnist_test_set, batch_size=mini_batch_size, shuffle=True)

  train_input = mnist_train_set.train_data.view(mnist_train_set.train_data.size(0),-1).float()
  train_target = mnist_train_set.train_labels
  test_input = mnist_test_set.test_data.view(mnist_test_set.test_data.size(0),-1).float()
  test_target = mnist_test_set.test_labels
  print("train_input.size(0)", train_input.size(0))
  print("test_input.size(0)", test_input.size(0))
  
  return train_input, train_target, test_input, test_target


In [4]:
def get_data_old():
  
  root = './data'
  if not os.path.exists(root):
    os.mkdir(root)

  trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
  mnist_train_set = datasets.MNIST(root = root, train = True, transform = trans, download = True)
  mnist_test_set = datasets.MNIST(root = root, train = False, transform = trans, download = True)
#   train_loader = torch.utils.data.DataLoader(dataset=mnist_train_set, batch_size=mini_batch_size, shuffle=True)
#   test_loader = torch.utils.data.DataLoader(dataset=mnist_test_set, batch_size=mini_batch_size, shuffle=True)

  train_input = mnist_train_set.train_data.view(mnist_train_set.train_data.size(0),-1).float()
  train_target = mnist_train_set.train_labels
  test_input = mnist_test_set.test_data.view(mnist_test_set.test_data.size(0),-1).float()
  test_target = mnist_test_set.test_labels
  print("train_input.size(0)", train_input.size(0))
  print("test_input.size(0)", test_input.size(0))
  
  return train_input, train_target, test_input, test_target

Load the train and test data for MNIST dataset, normalize

In [5]:
train_input, train_target, test_input, test_target = get_data()
# normalize the data
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

# converting Tensors into Variables before using themin model
train_input = Variable(train_input)
train_target = Variable(train_target)
test_input = Variable(test_input)
test_target = Variable(test_target)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!
train_input.size(0) 60000
test_input.size(0) 10000


Train the model with specific set of parameters, i.e lr=0.1, , it takes so much time, like 2 min GPU

In [6]:
model = create_mnist_model()
if torch.cuda.is_available():
  model = model.cuda()
  train_input, train_target,test_input, test_target= train_input.cuda(), train_target.cuda(),test_input.cuda(), test_target.cuda()
nb_epochs = 150
mini_batch = 100
lr = 1e-1

train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)

Print error rate on train and test set

In [7]:
# nb_epochs = 150, mini_batch = 100, lr = 1e-1
print_errors(mini_batch)

train error = 0.00%
test error = 2.07%


Train with new learning rates

In [8]:
# nb_epochs = 150, mini_batch = 100, lr = 1e-2
lr = 1e-2
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 0.00%
test error = 2.08%


In [9]:
# nb_epochs = 150, mini_batch = 100, lr = 1e-3
lr = 1e-3
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 0.00%
test error = 2.08%


In [10]:
# nb_epochs = 150, mini_batch = 100, lr = 1e-6
lr = 1e-6
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 0.00%
test error = 2.08%


In [11]:
# nb_epochs = 150, mini_batch = 100, lr = 1
lr = 1
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 35.22%
test error = 36.86%


In [12]:
# nb_epochs = 150, mini_batch = 100, lr = 0.5
lr = 0.5
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 32.70%
test error = 35.00%
