In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from itertools import product
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
from torchvision import datasets

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

%matplotlib inline

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.BN1 = nn.BatchNorm2d(16) 
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.BN2 = nn.BatchNorm1d(120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.pool(self.conv1(x)))
        x = self.BN1(F.relu(self.pool(self.conv2(x))))
        x = x.view(-1, 16 * 5 * 5)
        x = self.BN2(F.relu(self.fc1(x)))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

trainset = datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [4]:
# def imshow(img):
#     img = img / 2 + 0.5     # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()

# # get some random training images
# dataiter = iter(trainloader)
# images, labels = dataiter.next()

# # show images
# imshow(torchvision.utils.make_grid(images))
# # print labels
# print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [5]:
### Hyperparameters

shuffle=True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
optims = {'Momentum_SGD': optim.SGD(net.parameters(), lr=lr), 
             'ADAM': optim.Adam(net.parameters(), lr=lr)}

parameters = OrderedDict(
batch_size=[100],
lr = [0.01],
# optimiser = list(optims.keys())
)

param_values = [v for v in parameters.values()]
print(param_values)

NameError: name 'net' is not defined

In [120]:
%%time

for batch_size, lr in product(*param_values):
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=shuffle, num_workers=2)
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    net = Net().to(device)
    optimizer = optim.Adam(net.parameters(), lr=lr)
#     optimizer = optims[optimiser]
    criterion = nn.CrossEntropyLoss()

    comment = f'cifar10 batch_size={batch_size} lr={lr} optimizer={optimiser}'
    print(comment)
    print(optimizer)
    tb = SummaryWriter(comment=comment)
    tb_count=0

    for epoch in range(2): 
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:   
                tb_count += 1
                tb.add_scalar('Running Loss', running_loss/100, tb_count)
                print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0

        if epoch % 2 == 1:
            print('At the end of epoch %d' %(epoch+1))
            correct = 0
            total = 0
            with torch.no_grad():
                for data in trainloader:
                    images, labels = data[0].to(device), data[1].to(device)
                    outputs = net(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            tb.add_scalar('Train Accuracy', 100 * correct / total, epoch+1)
            print('Accuracy of the network on the 60000 train images: %d %%' % (100 * correct / total))

            with torch.no_grad():
                for data in testloader:
                    images, labels = data[0].to(device), data[1].to(device)
                    outputs = net(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            tb.add_scalar('Test Accuracy', 100 * correct / total, epoch+1)
            print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

    tb.close()
    print('Finished Training')

cifar10 batch_size=100 lr=0.01 optimizer=ADAM
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0
)
[1,   100] loss: 1.762
[1,   200] loss: 1.521
[1,   300] loss: 1.443
[1,   400] loss: 1.355
[1,   500] loss: 1.347
[2,   100] loss: 1.266
[2,   200] loss: 1.264
[2,   300] loss: 1.202
[2,   400] loss: 1.181
[2,   500] loss: 1.176
At the end of epoch 2
Accuracy of the network on the 60000 train images: 59 %
Accuracy of the network on the 10000 test images: 59 %
Finished Training
Wall time: 1min 4s


In [95]:
# %%time

# correct = 0
# total = 0
# with torch.no_grad():
#     for data in trainloader:
#         images, labels = data[0].to(device), data[1].to(device)
#         outputs = net(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

# print('Accuracy of the network on the 60000 train images: %d %%' % (
#     100 * correct / total))

# with torch.no_grad():
#     for data in testloader:
#         images, labels = data[0].to(device), data[1].to(device)
#         outputs = net(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

# print('Accuracy of the network on the 10000 test images: %d %%' % (
#     100 * correct / total))

In [96]:
# class_correct = list(0. for i in range(10))
# class_total = list(0. for i in range(10))
# with torch.no_grad():
#     for data in testloader:
#         images, labels = data[0].to(device), data[1].to(device)
#         outputs = net(images)
#         _, predicted = torch.max(outputs, 1)
#         c = (predicted == labels).squeeze()
#         for i in range(100):
#             label = labels[i]
#             class_correct[label] += c[i].item()
#             class_total[label] += 1


# for i in range(10):
#     print('Accuracy of %5s : %2d %%' % (
#         classes[i], 100 * class_correct[i] / class_total[i]))

In [57]:
# PATH = './cifar_net.pth'
# torch.save(net.state_dict(), PATH)

In [58]:
# net = Net()
# net.load_state_dict(torch.load(PATH))

In [59]:
# dataiter = iter(testloader)
# images, labels = dataiter.next()

# # print images
# imshow(torchvision.utils.make_grid(images[:4]))
# print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [60]:
# outputs = net(images)
# _, predicted = torch.max(outputs, 1)

# print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
#                               for j in range(4)))