# Introduction

This basic guide to CNNs is written off of the official PyTorch examples here: 
- [https://github.com/pytorch/examples/tree/master/mnist](https://github.com/pytorch/examples/tree/master/mnist). 
- [https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)

In [None]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, models, utils

import matplotlib.pyplot as plt
import numpy as np

# Settings

In [None]:
args = {}

# Training and testing batch size
args["train_batch_size"] = 8 # 64
args["test_batch_size"] = 8 # 1000

# How long to train for
args["epochs"] = 2 # 100

# Learning rate: "Speed" with which the optimizer adjusts weights
args["lr"] = 0.01

# Momentum: How quickly the weights respond to changing gradients
args["momentum"] = 0.5

# Whether to use CUDA or not
args["no_cuda"] = True

# Seed for reproducible training
args["seed"] = 1

# How often to spit out log / progress updates
args["log_interval"] = 10

# Whether to save the trained model
args["save_model"] = False

# Decide whether to use CUDA
use_cuda = not args["no_cuda"] and torch.cuda.is_available()

# Set the seed
torch.manual_seed(args["seed"])

# Select the device to use based on the `use_cuda` flag
device = torch.device("cuda" if use_cuda else "cpu")

# Keyword arguments for the dataloader
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# Get Data

## CIFAR-10

In [None]:
cifar10_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

cifar10_trainset = datasets.CIFAR10(root='../data', train=True,
                                    download=True, transform=cifar10_transform)
cifar10_trainloader = torch.utils.data.DataLoader(cifar10_trainset, batch_size=args['train_batch_size'],
                                                  shuffle=True, num_workers=2)

cifar10_testset = datasets.CIFAR10(root='../data', train=False,
                                       download=True, transform=cifar10_transform)
cifar10_testloader = torch.utils.data.DataLoader(cifar10_testset, batch_size=args['test_batch_size'],
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

### Visualize Some Images

In [None]:
def imshow(images):
    img_grid = utils.make_grid(images)
    img = img_grid / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    
    fig, ax = plt.subplots(figsize=(20,10))
    ax.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
# Get some random training images (one iteration of the dataloader)
dataiter = iter(cifar10_trainloader)
images, labels = dataiter.next()

imshow(images)

# Print the associated labels
print(' '.join('%5s' % classes[labels[j]] for j in range(args['train_batch_size'])))
print(' ')
print('The size of the image batch is: {}'.format(images.shape))
print('This represents (batch_size, channels, height, width)')

# Model Definition

In [None]:
class CIFAR10Net(nn.Module):
    def __init__(self, disp_size):
        super(CIFAR10Net, self).__init__()
        
        # Flag whether or not to print out information about the tensor
        self.disp_size = disp_size
        
        # nn.Conv2d(in_channels, out_channels, kernel_size)
        self.conv1 = nn.Conv2d(3, 6, 5)
        
        # nn.MaxPool2d(kernel_size, stride)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # nn.Linear(in_features, out_features)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        if self.disp_size:
            print('x input size:\t\t\t{}'.format(x.shape))

        x = self.pool(F.relu(self.conv1(x)))
        if self.disp_size:
            print('x after first block:\t\t{}'.format(x.shape))
        
        x = self.pool(F.relu(self.conv2(x)))
        if self.disp_size:
            print('x after second block:\t\t{}'.format(x.shape))

        x = x.view(-1, 16 * 5 * 5)
        if self.disp_size:
            print('x after reshape:\t\t{}'.format(x.shape))

        x = F.relu(self.fc1(x))
        if self.disp_size:
            print('x after first linear layer:\t{}'.format(x.shape))

        x = F.relu(self.fc2(x))
        if self.disp_size:
            print('x after second linear layer:\t{}'.format(x.shape))

        x = self.fc3(x)
        if self.disp_size:
            print('x after third linear layer:\t{}'.format(x.shape))
            print(' ')
        return x
    
cifar10_net = CIFAR10Net(disp_size=True)

In [None]:
temp = cifar10_net(images)

In [None]:
cifar10_features = nn.Sequential(*list(cifar10_net.children())[0:2])

In [None]:
outputs = cifar10_features(images)

In [None]:
target_img = 1
output_numpy = outputs[target_img].detach().numpy()
#print(output_numpy.shape)

fig, ax = plt.subplots(1,2, figsize=(20,10))
ax[0].imshow(output_numpy[0,:,:])
ax[1].imshow(np.transpose(images[target_img].numpy() / 2 + 0.5, (1,2,0)))

In [None]:
imshow(outputs)

In [None]:
cifar10_net = CIFAR10Net(disp_size=False)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(cifar10_net.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(cifar10_trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = cifar10_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
dataiter = iter(cifar10_testloader)
images, labels = dataiter.next()
outputs = cifar10_net(images)
_, predicted = torch.max(outputs, 1)

# print images
imshow(images)
print('GroundTruth: ', ' '.join('\t%5s' % classes[labels[j]] for j in range(args['test_batch_size'])))
print('Predicted: ', ' '.join('\t%5s' % classes[predicted[j]]
                              for j in range(args['test_batch_size'])))


In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in cifar10_testloader:
        images, labels = data
        outputs = cifar10_net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

In [None]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in cifar10_testloader:
        images, labels = data
        outputs = cifar10_net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# Train Function

In [None]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args["log_interval"] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

# Test Function

In [None]:
def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

# Create Dataloaders

## Custom Dataset

In [None]:
# Calculate the mean and standard deviation of the dataset
# See: https://forums.fast.ai/t/image-normalization-in-pytorch/7534/7

transform = transforms.Compose([
#    transforms.ToPILImage(),
    transforms.ToTensor()
])

dataloader = torch.utils.data.DataLoader(datasets.ImageFolder('./data/tcv_snakes/train', transform=transform), 
                                         batch_size=4096, shuffle=False)

pop_mean = []
pop_std0 = []
pop_std1 = []

for i, data in enumerate(dataloader, 0):
    # shape (batch_size, 3, height, width)
    numpy_image = data[0].numpy()
    
    # shape (3,)
    batch_mean = np.mean(numpy_image, axis=(0,2,3))
    batch_std0 = np.std(numpy_image, axis=(0,2,3))
    batch_std1 = np.std(numpy_image, axis=(0,2,3), ddof=1)
    
    pop_mean.append(batch_mean)
    pop_std0.append(batch_std0)
    pop_std1.append(batch_std1)

# shape (num_iterations, 3) -> (mean across 0th axis) -> shape (3,)
pop_mean = np.array(pop_mean).mean(axis=0)
pop_std0 = np.array(pop_std0).mean(axis=0)
pop_std1 = np.array(pop_std1).mean(axis=0)

print('Calculated Mean: {}'.format(pop_mean))
print('Calculated STD: {}'.format(pop_std0))
print('Calculated STD (adjusted): {}'.format(pop_std1))

In [None]:
transform = transforms.Compose([
#    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=pop_mean, std=pop_std0)
])

In [None]:
train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder('./data/tcv_snakes/train',
                         transform=transform),
    batch_size=args["batch_size"], shuffle=True, **kwargs)


test_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder('./data/tcv_snakes/test',
                         transform=transform),
    batch_size=args["test_batch_size"], shuffle=True, **kwargs)

# Create model and begin training

In [None]:
#model = inception().to(device)
model = Net().to(device)

optimizer = optim.SGD(model.parameters(), lr=args["lr"], momentum=args["momentum"])

for epoch in range(1, args["epochs"] + 1):
    train(args, model, device, train_loader, optimizer, epoch)
    test(args, model, device, test_loader)

if (args["save_model"]):
    torch.save(model.state_dict(), "cnn.pt")
