# Problem 3

Use this notebook to write your code for problem 3.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## 3D - Convolutional network

As in problem 2, we have conveniently provided for your use code that loads and preprocesses the MNIST data.

In [None]:
# load MNIST data into PyTorch format
import torch
import torchvision
import torchvision.transforms as transforms

# set batch size
batch_size = 32

# load training data downloaded into data/ folder
mnist_training_data = torchvision.datasets.MNIST('data/', train=True, download=True,
                                                transform=transforms.ToTensor())
# transforms.ToTensor() converts batch of images to 4-D tensor and normalizes 0-255 to 0-1.0
training_data_loader = torch.utils.data.DataLoader(mnist_training_data,
                                                  batch_size=batch_size,
                                                  shuffle=True)

# load test data
mnist_test_data = torchvision.datasets.MNIST('data/', train=False, download=True,
                                                transform=transforms.ToTensor())
test_data_loader = torch.utils.data.DataLoader(mnist_test_data,
                                                  batch_size=batch_size,
                                                  shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 121816451.44it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 33178771.25it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 156220722.76it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 13646510.58it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [None]:
# look at the number of batches per epoch for training and validation
print(f'{len(training_data_loader)} training batches')
print(f'{len(training_data_loader) * batch_size} training samples')
print(f'{len(test_data_loader)} validation batches')

1875 training batches
60000 training samples
313 validation batches


In [None]:
# sample model
import torch.nn as nn

model = nn.Sequential(
    nn.Conv2d(1, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.5),

    nn.Conv2d(8, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.5),

    nn.Flatten(),
    nn.Linear(25*8, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
    # PyTorch implementation of cross-entropy loss includes softmax layer
)

In [None]:
# why don't we take a look at the shape of the weights for each layer
for p in model.parameters():
    print(p.data.shape)

torch.Size([8, 1, 3, 3])
torch.Size([8])
torch.Size([8, 8, 3, 3])
torch.Size([8])
torch.Size([64, 200])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])


In [None]:
# our model has some # of parameters:
count = 0
for p in model.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
print(f'total params: {count}')

total params: 14178


In [None]:
# For a multi-class classification problem
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters())

In [None]:
# Train the model for 10 epochs, iterating on the data in batches
n_epochs = 10

# store metrics
training_accuracy_history = np.zeros([n_epochs, 1])
training_loss_history = np.zeros([n_epochs, 1])
validation_accuracy_history = np.zeros([n_epochs, 1])
validation_loss_history = np.zeros([n_epochs, 1])

for epoch in range(n_epochs):
    print(f'Epoch {epoch+1}/10:', end='')
    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()

        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss_history[epoch] += loss.item()
        # progress update after 180 batches (~1/10 epoch for batch size 32)
        if i % 180 == 0: print('.',end='')
    training_loss_history[epoch] /= len(training_data_loader)
    training_accuracy_history[epoch] = train_correct / train_total
    print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss_history[epoch] += loss.item()
        validation_loss_history[epoch] /= len(test_data_loader)
        validation_accuracy_history[epoch] = test_correct / test_total
    print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

Epoch 1/10:...........
	loss: 0.6516, acc: 0.7884, val loss: 0.1586, val acc: 0.9552
Epoch 2/10:...........
	loss: 0.4088, acc: 0.8720, val loss: 0.1800, val acc: 0.9496
Epoch 3/10:...........
	loss: 0.3858, acc: 0.8797, val loss: 0.1453, val acc: 0.9558
Epoch 4/10:...........
	loss: 0.3809, acc: 0.8823, val loss: 0.1391, val acc: 0.9595
Epoch 5/10:...........
	loss: 0.3743, acc: 0.8857, val loss: 0.1364, val acc: 0.9595
Epoch 6/10:...........
	loss: 0.3605, acc: 0.8892, val loss: 0.1474, val acc: 0.9601
Epoch 7/10:...........
	loss: 0.3478, acc: 0.8943, val loss: 0.2071, val acc: 0.9449
Epoch 8/10:...........
	loss: 0.3642, acc: 0.8898, val loss: 0.2053, val acc: 0.9456
Epoch 9/10:...........
	loss: 0.3616, acc: 0.8902, val loss: 0.1517, val acc: 0.9584
Epoch 10/10:...........
	loss: 0.3650, acc: 0.8888, val loss: 0.1834, val acc: 0.9518


Above, we output the training loss/accuracy as well as the validation loss and accuracy. Not bad! Let's see if you can do better.

## **My Model**

In [None]:
import torch.nn as nn
import numpy as np
import torch.optim as optim

In [None]:
probs = np.linspace(0.1,1,10)

In [None]:
#Trying different dropout rates
for pr in probs:
  print('dropout: {}'.format(pr))
  model = nn.Sequential(
        nn.Conv2d(1, 16, kernel_size=(3,3)),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Dropout(p=pr),

        nn.Conv2d(16, 8, kernel_size=(3,3)),
        nn.BatchNorm2d(8),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Dropout(p=pr),

        nn.Flatten(),
        nn.Linear(25*8, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
    )

  criterion = nn.CrossEntropyLoss()
  optimizer = optim.RMSprop(model.parameters())

  # store metrics
  training_accuracy_history = np.zeros([len(probs), 1])
  training_loss_history = np.zeros([len(probs), 1])
  validation_accuracy_history = np.zeros([len(probs), 1])
  validation_loss_history = np.zeros([len(probs), 1])

  train_total = 0
  train_correct = 0
  # train
  model.train()
  for i, data in enumerate(training_data_loader):
      images, labels = data
      optimizer.zero_grad()
      # forward pass
      output = model(images)
      # calculate categorical cross entropy loss
      loss = criterion(output, labels)
      # backward pass
      loss.backward()
      optimizer.step()

      # track training accuracy
      _, predicted = torch.max(output.data, 1)
      train_total += labels.size(0)
      train_correct += (predicted == labels).sum().item()
      # track training loss
      training_loss_history[epoch] += loss.item()
      # progress update after 180 batches (~1/10 epoch for batch size 32)
      if i % 180 == 0: print('.',end='')
  training_loss_history[epoch] /= len(training_data_loader)
  training_accuracy_history[epoch] = train_correct / train_total
  print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

  # validate
  test_total = 0
  test_correct = 0
  with torch.no_grad():
      model.eval()
      for i, data in enumerate(test_data_loader):
          images, labels = data
          # forward pass
          output = model(images)
          # find accuracy
          _, predicted = torch.max(output.data, 1)
          test_total += labels.size(0)
          test_correct += (predicted == labels).sum().item()
          # find loss
          loss = criterion(output, labels)
          validation_loss_history[epoch] += loss.item()
      validation_loss_history[epoch] /= len(test_data_loader)
      validation_accuracy_history[epoch] = test_correct / test_total
  print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')


dropout: 0.1
...........
	loss: 0.2462, acc: 0.9266, val loss: 0.0728, val acc: 0.9775
dropout: 0.2
...........
	loss: 0.2673, acc: 0.9237, val loss: 0.0979, val acc: 0.9691
dropout: 0.30000000000000004
...........
	loss: 0.3554, acc: 0.8909, val loss: 0.1040, val acc: 0.9676
dropout: 0.4
...........
	loss: 0.4143, acc: 0.8719, val loss: 0.1171, val acc: 0.9635
dropout: 0.5
...........
	loss: 0.4921, acc: 0.8458, val loss: 0.1527, val acc: 0.9563
dropout: 0.6
...........
	loss: 0.6477, acc: 0.7896, val loss: 0.1758, val acc: 0.9472
dropout: 0.7000000000000001
...........
	loss: 0.8653, acc: 0.7180, val loss: 0.3300, val acc: 0.9195
dropout: 0.8
...........
	loss: 1.2097, acc: 0.5922, val loss: 0.4944, val acc: 0.8699
dropout: 0.9
...........
	loss: 2.0022, acc: 0.2823, val loss: 2.0161, val acc: 0.2158
dropout: 1.0
...........
	loss: 2.3028, acc: 0.1074, val loss: 84.4860, val acc: 0.0982


In [None]:
#final model
model = nn.Sequential(
      nn.Conv2d(1, 16, kernel_size=(3,3)),
      nn.ReLU(),
      nn.MaxPool2d(2),
      nn.Dropout(p=0.1),

      nn.Conv2d(16, 8, kernel_size=(3,3)),
      nn.BatchNorm2d(8),
      nn.ReLU(),
      nn.MaxPool2d(2),
      nn.Dropout(p=0.1),

      nn.Flatten(),
      nn.Linear(25*8, 64),
      nn.ReLU(),
      nn.Linear(64, 10)
)

# for p in model.parameters():
print(p.data.shape)
# our model has some # of parameters:
count = 0
for p in model.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
print(f'total params: {count}')

criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters())

# Train the model for 10 epochs, iterating on the data in batches
n_epochs = 10

# store metrics
training_accuracy_history = np.zeros([n_epochs, 1])
training_loss_history = np.zeros([n_epochs, 1])
validation_accuracy_history = np.zeros([n_epochs, 1])
validation_loss_history = np.zeros([n_epochs, 1])

for epoch in range(n_epochs):
    print(f'Epoch {epoch+1}/10:', end='')
    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()

        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss_history[epoch] += loss.item()
        # progress update after 180 batches (~1/10 epoch for batch size 32)
        if i % 180 == 0: print('.',end='')
    training_loss_history[epoch] /= len(training_data_loader)
    training_accuracy_history[epoch] = train_correct / train_total
    print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss_history[epoch] += loss.item()
        validation_loss_history[epoch] /= len(test_data_loader)
        validation_accuracy_history[epoch] = test_correct / test_total
    print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

torch.Size([10])
total params: 14850
Epoch 1/10:...........
	loss: 0.2303, acc: 0.9309, val loss: 0.0824, val acc: 0.9729
Epoch 2/10:...........
	loss: 0.1120, acc: 0.9650, val loss: 0.0858, val acc: 0.9716
Epoch 3/10:...........
	loss: 0.0983, acc: 0.9694, val loss: 0.0614, val acc: 0.9811
Epoch 4/10:...........
	loss: 0.0902, acc: 0.9720, val loss: 0.0765, val acc: 0.9763
Epoch 5/10:...........
	loss: 0.0854, acc: 0.9740, val loss: 0.0718, val acc: 0.9782
Epoch 6/10:...........
	loss: 0.0836, acc: 0.9743, val loss: 0.0506, val acc: 0.9851
Epoch 7/10:...........
	loss: 0.0813, acc: 0.9746, val loss: 0.0623, val acc: 0.9812
Epoch 8/10:...........
	loss: 0.0760, acc: 0.9767, val loss: 0.0498, val acc: 0.9845
Epoch 9/10:...........
	loss: 0.0756, acc: 0.9767, val loss: 0.0621, val acc: 0.9811
Epoch 10/10:...........
	loss: 0.0731, acc: 0.9786, val loss: 0.0469, val acc: 0.9869
