In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
import matplotlib.pyplot as plt
import numpy as np
import random

In [None]:
# define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(  # normalize to be between -1 and 1 so the data range is not too big
        (0.5, 0.5, 0.5),
        (0.5, 0.5, 0.5)
    )
])

train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=False)
try_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
# create the neural network
class ConvNeuralNet(nn.Module):
  def __init__(self):
    super().__init__() # mandatory line for it to initiate the network properly

    #2 convolution layers
    self.conv1 = nn.Conv2d(3, 64, 3) # takes 3 features, outputs 64 features -- 3 inputs because there are 3 values, R, G, B, 64 outputs because it learns 64 filters, last 3 is the kernel size (3x3)
    self.conv2 = nn.Conv2d(64, 128, 3) # takes 64 features, outputs 128 features

    self.pool = nn.MaxPool2d(2, stride=2) # max pooling layer with a kernel size and stride of 2, pooling layers reduce the spatial dimensions (height and width) of the feature maps, which helps for reducing the number of parameters and ensuring the model won't break to tiny changes in the data

    self.fc1 = nn.Linear(128 * 6 * 6, 120) # fully connected layers
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10) # 10 output layers

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.pool(x)
    x = F.relu(self.conv2(x))
    x = self.pool(x)
    x = torch.flatten(x, 1) # flatten all dimensions except for batch
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.log_softmax(self.fc3(x), dim=1)
    return x

net = ConvNeuralNet()
net.to(device)

In [None]:
loss_function = nn.NLLLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):

  running_loss = 0.0
  for i, data in enumerate(train_loader):
    inputs, labels = data[0].to(device), data[1].to(device)

    optimizer.zero_grad() # clear gradients
    outputs = net(inputs) # put the images through the network
    loss = loss_function(outputs, labels) # calculate loss

    loss.backward() # calculate gradients for all the weights that need adjusting
    optimizer.step() # optimizer run

    running_loss += loss.item()
    if i % 2000 == 1999:
      print(f'[{epoch+1}/{epochs}, {i+1:5d}] loss: {running_loss / 2000:.3f}')
      running_loss = 0.0

print("Finished training.")

In [None]:
# method for testing it out of the training loop

def view_classification(image, probabilities):
  probabilities = probabilities.data.numpy().squeeze()

  fig, (ax1, ax2) = plt.subplots(figsize=(6, 9), ncols=2)

  image = image.permute(1, 2, 0)
  denormalized_image = image / 2 + 0.5
  ax1.imshow(denormalized_image)
  ax1.axis('off')
  ax2.barh(np.arange(10), probabilities)
  ax2.set_aspect(0.1)
  ax2.set_yticks(np.arange(10))
  ax2.set_yticklabels(classes)
  ax2.set_title('Class Probability')
  ax2.set_xlim(0, 1.1)
  plt.tight_layout()

In [None]:
images, _ = next(iter(try_loader))
image_number = random.randint(0, 100)

image = images[image_number]
batched_image = image.unsqueeze(0).to(device)

with torch.no_grad():
    log_probabilities = net(batched_image)

probabilities = torch.exp(log_probabilities).squeeze().cpu()
view_classification(image, probabilities)

In [None]:
# test overall accuracy
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
# 84% accuracy, good but could be better with more layers?