In [17]:
# using pytorch load cifar10 data and train a neural network to classify images into 10 classes
# 1. load data
# 2. define model
# 3. train model
# 4. evaluate model
# 5. make prediction

import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

# 1. load data
# transform PILImage to tensor and normalize to [-1, 1]
transform = transforms.Compose(
    [transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))])

# load train data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
# load test data
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)

# define classes
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# define data loader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                            shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                            shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# 2. define model

# define a convolutional neural network

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 3 input channel, 6 output channel, 5x5 square convolution
        self.conv1 = nn.Conv2d(3, 6, 5)
        # 6 input channel, 16 output channel, 5x5 square convolution
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5x5 image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10) # 10 classes

    def forward(self, x):
        # max pooling over a (2, 2) window
        x = nn.functional.max_pool2d(nn.functional.relu(self.conv1(x)), (2, 2))
        # if the size is a square, specify only a single number
        x = nn.functional.max_pool2d(nn.functional.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        # all dimensions except the batch dimension
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        # print(num_features)
        return num_features
    
net = Net()
print(net)

# 3. train model
# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # stochastic gradient descent

# train the network
for epoch in range(2): # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0): # enumerate(iterable, start=0)
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad() # clear the gradient buffers

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels) # compute loss
        loss.backward() # compute gradient
        optimizer.step() # update parameters

        # print statistics
        running_loss += loss.item() # loss.item() returns the scalar value held in the loss
        if i % 2000 == 1999: # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/2000))
            running_loss = 0.0

print('Finished Training')

# save model
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

# 4. evaluate model

# load model
net = Net()
net.load_state_dict(torch.load(PATH))

# test on test data
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
# imshow(torchvision.utils.make_grid(images))
# print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

# make prediction
outputs = net(images)
_, predicted = torch.max(outputs, 1) # return (max value, index of max value)
# print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

# test on whole test data
correct = 0
total = 0
with torch.no_grad(): # disable gradient calculation
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)

        # print(predicted)
        # print(labels)
        total += labels.size(0)
        correct += (predicted == labels).sum().item() # .item() returns the value of this tensor as a standard Python number

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

