Source: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py


Generally, when you have to deal with image, text, audio or video data, you can use standard python packages that load data into a numpy array. Then you can convert this array into a `torch.*Tensor`.

For images, packages such as `Pillow`, `OpenCV` are useful
For audio, packages such as `scipy` and `librosa`
For text, either raw Python or Cython based loading, or `NLTK` and `SpaCy` are useful
Specifically for vision, we have created a package called `torchvision`, that has data loaders for common datasets such as Imagenet, CIFAR10, MNIST, etc. and data transformers for images, viz., `torchvision.datasets` and `torch.utils.data.DataLoader`.

This provides a huge convenience and avoids writing boilerplate code.

For this tutorial, we will use the `CIFAR10` dataset. It has the classes: `‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’`. The images in `CIFAR-10` are of size `3x32x32`, i.e. 3-channel color images of 32x32 pixels in size.
![CIFAR-10](https://pytorch.org/tutorials/_images/cifar10.png)

## Training an image classifier
We will do the following steps in order:

1. Load and normalizing the CIFAR10 training and test datasets using `torchvision`
2. Define a Convolutional Neural Network
3. Define a loss function
4. Train the network on the training data
5. Test the network on the test data


## 1. Loading and normalizing `CIFAR10`

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

The output of torchvision datasets are PILImage images of range `[0, 1]`. We transform them to Tensors of normalized range `[-1, 1]`. .. note:

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

cuda:0


In [3]:
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 
])
transform

Compose(
    ToTensor()
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
)

In [4]:
# Downloading the dataset
train_ds = torchvision.datasets.CIFAR10(root = './data/', train=True, download = True, transform=transform)
test_ds = torchvision.datasets.CIFAR10(root = './data/', train=False, download = True, transform=transform)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [5]:
# Converting the dataset into a DataLoader
train_dl = torch.utils.data.DataLoader(train_ds, shuffle=True, batch_size= 32, num_workers=4)
test_dl = torch.utils.data.DataLoader(test_ds, shuffle=False, batch_size= 32, num_workers=4)

## Data Visualizations

In [6]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def imshow(img):
    img = img/2 + 0.5
    npimg = img.numpy()
    plt.figure(figsize=(8,1))
    plt.imshow(np.transpose(npimg, (1,2,0)))
    plt.axis(False)
    plt.show()
    
dataiter = iter(train_dl)
images, labels = dataiter.next()

In [None]:
collage = torchvision.utils.make_grid(images)
imshow(collage)
print(' '.join('%5s' % classes[labels[j]] for j in range(8)))

# 2. Define a Convolutional Neural Network

In [7]:
import torch.nn as nn
import torch.nn.functional as F

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=(2,2))
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=32, kernel_size=5)
        self.fc1 = nn.Linear(in_features=32*5*5, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=10)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32*5*5)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x



Net(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 32, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [9]:
model = Net()
model.to(device)

Net(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 32, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

# 3. Define a Loss function and optimizer
Let’s use a Classification Cross-Entropy loss and SGD with momentum.

In [17]:
import torch.optim as optim

loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
epochs = 30

# 4. Train the network
This is when things start to get interesting. We simply have to loop over our data iterator, and feed the inputs to the network and optimize.

In [18]:
def fit (epochs, model, train_dl, loss_func, optimizer):
    for epoch in range(epochs):         #One epoch runs over the whole image set
        running_loss = 0.0
        for i, data in enumerate(train_dl, start=0):
            xb, yb = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()

            # Forward  + backward + optimize
            preds = model(xb)
            loss = loss_func(preds, yb)
            loss.backward()
            optimizer.step()

            #print statistics
            running_loss += loss.item()
            if i%500==499:
                print('[Epoch: %d, Batch: %5d] loss: %.3f' %(epoch, i+1, running_loss/1000))
                running_loss = 0.0
    print('Finished Training :) ')
        
   
        

In [21]:
fit (epochs, model, train_dl, loss_func, optimizer)

[Epoch: 0, Batch:   500] loss: 0.281
[Epoch: 0, Batch:  1000] loss: 0.288
[Epoch: 0, Batch:  1500] loss: 0.291
[Epoch: 1, Batch:   500] loss: 0.280
[Epoch: 1, Batch:  1000] loss: 0.286
[Epoch: 1, Batch:  1500] loss: 0.296
[Epoch: 2, Batch:   500] loss: 0.278
[Epoch: 2, Batch:  1000] loss: 0.289
[Epoch: 2, Batch:  1500] loss: 0.290
[Epoch: 3, Batch:   500] loss: 0.275
[Epoch: 3, Batch:  1000] loss: 0.281
[Epoch: 3, Batch:  1500] loss: 0.292
[Epoch: 4, Batch:   500] loss: 0.271
[Epoch: 4, Batch:  1000] loss: 0.288
[Epoch: 4, Batch:  1500] loss: 0.293
[Epoch: 5, Batch:   500] loss: 0.274
[Epoch: 5, Batch:  1000] loss: 0.280
[Epoch: 5, Batch:  1500] loss: 0.294
[Epoch: 6, Batch:   500] loss: 0.274
[Epoch: 6, Batch:  1000] loss: 0.277
[Epoch: 6, Batch:  1500] loss: 0.289
[Epoch: 7, Batch:   500] loss: 0.275
[Epoch: 7, Batch:  1000] loss: 0.281
[Epoch: 7, Batch:  1500] loss: 0.282
[Epoch: 8, Batch:   500] loss: 0.268
[Epoch: 8, Batch:  1000] loss: 0.282
[Epoch: 8, Batch:  1500] loss: 0.285
[

# 5. Saving and Loading the trained model
let us save the model so that next time we can start from here. 

In [None]:
# Saving the trained model
savepath = './cifar-stage1-gpu.pth'
torch.save(model.state_dict(), savepath)

In [10]:
savepath = './cifar-stage1-gpu.pth'
model = Net()
model.to(device)
model.load_state_dict(torch.load(savepath))

<All keys matched successfully>

# 6. Test the network on the training data
We have trained the network for 2 passes over the training dataset. But we need to check if the network has learnt anything at all.

We will check this by predicting the class label that the neural network outputs, and checking it against the ground-truth. If the prediction is correct, we add the sample to the list of correct predictions.
Okay, first step. Let us display an image from the test set to get familiar.

In [None]:
def grid_show(images, labels, classes, preds = None):
    """
    This function makes grid of images and labels given. 
    Both original labels and predictions can be given. 
    """
    no_of_images = images.shape[0]
    cols = 8
    rows = no_of_images // cols
    # Changing the channels of the image in order to fit it to the matplotlib format view
    images = images.cpu()
    images = np.array(images).transpose(0,2,3,1)
    # De Normalizing the images
    images = (images/2 + 0.5)*255
    

    plt.figure(figsize=(cols+3, rows+2))
    index = 0
    for i in range(rows):
        for j in range(cols):
            plt.subplot(rows, cols, index+1)
            plt.imshow(images[index].astype('uint8'))
            if preds is None:
                plt.title(str(classes[labels[index].item()]))
            else:
                plt.title(str(classes[labels[index].item()]) + '/' + str(classes[preds[index].item()]))
            plt.axis(False)
            index +=1
    plt.show()
    

In [None]:
def get_predictions(model, images):
    """
    Inference function for a single batch
    """
    with torch.no_grad():
        _, predicted = torch.max(model(images), 1)
    return predicted

In [None]:
def show_predictions(model, test_dl, classes, batches = 1):
    """Function to show the predictions in a grid format for mutiple batches."""
    print('Original/Predicted')
    dataiter = iter(test_dl)
    
    for i in range(batches):
        data = dataiter.next()
        print('Batch No: ', i+1)
        images, labels = data[0].to(device), data[1].to(device) 
        preds = get_predictions(model, images)
        grid_show(images, labels, classes, preds=preds)



        
    
    

In [None]:
show_predictions(model, test_dl, classes, batches=2)

## Calculating the accuracy over the whole testset
Now let us have a look at the whole dataset and try to find out how our model performs over the dataset. 

In [11]:
def get_accuracy(model, test_dl):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_dl:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return  100 * correct / total

In [23]:
get_accuracy(model, test_dl)

67.25

In [24]:
def get_class_accuracy(model, test_dl, classes):
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in test_dl:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1


    for i in range(10):
        print('Accuracy of %5s : %2d%%' % (
            classes[i], 100 * class_correct[i] / class_total[i]))

In [25]:
get_class_accuracy(model, test_dl, classes)

Accuracy of plane : 68%
Accuracy of   car : 87%
Accuracy of  bird : 46%
Accuracy of   cat : 48%
Accuracy of  deer : 62%
Accuracy of   dog : 65%
Accuracy of  frog : 60%
Accuracy of horse : 67%
Accuracy of  ship : 78%
Accuracy of truck : 82%
