Based on https://blog.algorithmia.com/convolutional-neural-nets-in-pytorch/

----

In [2]:
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms

----
# Data preparation

In [3]:
# set a standard random seed for reproducible results
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x104cbfd30>

In [4]:
#The compose function allows for multiple transforms
#transforms.ToTensor() converts our PILImage to a tensor of shape (C x H x W) in the range [0,1]
#transforms.Normalize(mean,std) normalizes a tensor to a (mean, std) for (R, G, B)

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_set = torchvision.datasets.CIFAR10(root='./cifardata', train=True, download=True, transform=transform)

test_set = torchvision.datasets.CIFAR10(root='./cifardata', train=False, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifardata/cifar-10-python.tar.gz
Files already downloaded and verified


In [5]:
# designate the possible labels for each image
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [6]:
from torch.utils.data.sampler import SubsetRandomSampler

#Training
n_training_samples = 20000
train_sampler = SubsetRandomSampler(np.arange(n_training_samples, dtype=np.int64))

#Validation
n_val_samples = 5000
val_sampler = SubsetRandomSampler(np.arange(n_training_samples, n_training_samples + n_val_samples, dtype=np.int64))

#Test
n_test_samples = 5000
test_sampler = SubsetRandomSampler(np.arange(n_test_samples, dtype=np.int64))

-----
# Designing a neural net (data preprocessing)
Pytorch makes it pretty easy to implement all of those feature engineering steps that we described above. We’ll be making use of 4 major functions in our CNN class:

1. torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding) – applies convolution
2. torch.nn.relu(x) – applies ReLU
3. torch.nn.MaxPool2d(kernel_size, stride, padding) – applies Max Pooling
4. torch.nn.Linear(in_features, out_features) – fully connected layer (multiply inputs by learned weights)

Writing CNN code in Pytorch can get a little complex, since everything is defined inside of one class. We’ll create a SimpleCNN class which inherits from the master torch.nn.Module class.

In [23]:
from torch.autograd import Variable
import torch.nn.functional as F

class SimpleCNN(torch.nn.Module):
    
    #Our batch shape for input x is (3, 32, 32)
    
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        #Input channels = 3, output channels = 18
        self.conv1 = torch.nn.Conv2d(3, 18, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # Second layer
        #self.conv2 = torch.nn.Conv2d(3, 18, kernel_size=3, stride=1, padding=1)
        #self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        #4608 input features, 64 output features (see sizing flow below)
        self.fc1 = torch.nn.Linear(18 * 16 * 16, 64)
        
        #64 input features, 10 output features for our 10 defined classes
        self.fc2 = torch.nn.Linear(64, 10)
        
    def forward(self, x):
        
        #Computes the activation of the first convolution
        #Size changes from (3, 32, 32) to (18, 32, 32)
        x = F.relu(self.conv1(x))
        
        #Size changes from (18, 32, 32) to (18, 16, 16)
        x = self.pool(x)
        
        #Reshape data to input to the input layer of the neural net
        #Size changes from (18, 16, 16) to (1, 4608)
        #Recall that the -1 infers this dimension from the other given dimension
        x = x.view(-1, 18 * 16 *16)
        
        #Computes the activation of the first fully connected layer
        #Size changes from (1, 4608) to (1, 64)
        x = F.relu(self.fc1(x))
        
        #Computes the second fully connected layer (activation applied later)
        #Size changes from (1, 64) to (1, 10)
        x = self.fc2(x)
        return(x)

In [17]:
def outputSize(in_size, kernel_size, stride, padding):

    output = int((in_size - kernel_size + 2*(padding)) / stride) + 1

    return(output)

---
# Training a neural net
Our basic flow is a training loop: each time we pass through the loop (called and “epoch”), we compute a forward pass on the network and implement backpropagation to adjust the weights. We’ll also record some other measurements like loss and time passed, so that we can analyze them as the net trains itself.

To start, we’ll define our data loaders using the samplers we created above.

In [18]:
#DataLoader takes in a dataset and a sampler for loading (num_workers deals with system level memory) 
def get_train_loader(batch_size):
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,sampler=train_sampler, num_workers=2)
    return(train_loader)

In [19]:
#Test and validation loaders have constant batch sizes, so we can define them directly
test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, sampler=test_sampler, num_workers=2)
val_loader = torch.utils.data.DataLoader(train_set, batch_size=128, sampler=val_sampler, num_workers=2)

We’ll also define our loss and optimizer functions that the CNN will use to find the right weights. We’ll be using Cross Entropy Loss (Log Loss) as our loss function, which strongly penalizes high confidence in the wrong answer. The optimizer is the popular Adam algorithm (not a person!).

In [20]:
import torch.optim as optim

def createLossAndOptimizer(net, learning_rate=0.001):
    
    #Loss function
    loss = torch.nn.CrossEntropyLoss()
    
    #Optimizer
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    return(loss, optimizer)

Finally, we’ll define a function to train our CNN using a simple for loop.

In [21]:
import time

def trainNet(net, batch_size, n_epochs, learning_rate):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    #Get training data
    train_loader = get_train_loader(batch_size)
    n_batches = len(train_loader)
    
    #Create our loss and optimizer functions
    loss, optimizer = createLossAndOptimizer(net, learning_rate)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            #Get inputs
            inputs, labels = data
            
            #Wrap them in a Variable object
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.data[0]
            total_train_loss += loss_size.data[0]
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        #At the end of the epoch, do a pass on the validation set
        total_val_loss = 0
        for inputs, labels in val_loader:
            
            #Wrap tensors in Variables
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Forward pass
            val_outputs = net(inputs)
            val_loss_size = loss(val_outputs, labels)
            total_val_loss += val_loss_size.data[0]
            
        print("Validation loss = {:.2f}".format(total_val_loss / len(val_loader)))
        
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

During each epoch of training, we pass data to the model in batches whose size we define when we call the training loop. Data is feature engineered using the SimpleCNN class we’ve defined, and then basic metrics are printed after a few passes. During each loop, we also calculate the loss on our validation set.

To actually train the net now only requires two lines of code:

In [22]:
CNN = SimpleCNN()
trainNet(CNN, batch_size=32, n_epochs=5, learning_rate=0.001)

===== HYPERPARAMETERS =====
batch_size= 32
epochs= 5
learning_rate= 0.001




Epoch 1, 10% 	 train_loss: 2.14 took: 1.27s
Epoch 1, 20% 	 train_loss: 1.91 took: 1.11s
Epoch 1, 30% 	 train_loss: 1.78 took: 1.09s
Epoch 1, 40% 	 train_loss: 1.70 took: 1.09s
Epoch 1, 50% 	 train_loss: 1.66 took: 1.08s
Epoch 1, 60% 	 train_loss: 1.56 took: 1.11s
Epoch 1, 70% 	 train_loss: 1.54 took: 1.18s
Epoch 1, 80% 	 train_loss: 1.49 took: 1.19s
Epoch 1, 90% 	 train_loss: 1.52 took: 1.23s




Validation loss = 1.38
Epoch 2, 10% 	 train_loss: 1.37 took: 1.45s
Epoch 2, 20% 	 train_loss: 1.32 took: 2.09s
Epoch 2, 30% 	 train_loss: 1.35 took: 2.16s
Epoch 2, 40% 	 train_loss: 1.37 took: 1.92s
Epoch 2, 50% 	 train_loss: 1.29 took: 1.77s
Epoch 2, 60% 	 train_loss: 1.37 took: 1.75s
Epoch 2, 70% 	 train_loss: 1.28 took: 1.74s
Epoch 2, 80% 	 train_loss: 1.31 took: 1.75s
Epoch 2, 90% 	 train_loss: 1.24 took: 1.73s
Validation loss = 1.27
Epoch 3, 10% 	 train_loss: 1.15 took: 1.77s
Epoch 3, 20% 	 train_loss: 1.17 took: 1.72s
Epoch 3, 30% 	 train_loss: 1.23 took: 1.74s
Epoch 3, 40% 	 train_loss: 1.15 took: 1.72s
Epoch 3, 50% 	 train_loss: 1.17 took: 1.75s
Epoch 3, 60% 	 train_loss: 1.17 took: 1.74s
Epoch 3, 70% 	 train_loss: 1.19 took: 1.84s
Epoch 3, 80% 	 train_loss: 1.16 took: 1.74s
Epoch 3, 90% 	 train_loss: 1.16 took: 1.73s
Validation loss = 1.18
Epoch 4, 10% 	 train_loss: 1.08 took: 2.02s
Epoch 4, 20% 	 train_loss: 1.10 took: 1.92s
Epoch 4, 30% 	 train_loss: 1.06 took: 1.78s
Epoch 4

In [24]:
CNN = SimpleCNN()
trainNet(CNN, batch_size=32, n_epochs=5, learning_rate=0.001)

===== HYPERPARAMETERS =====
batch_size= 32
epochs= 5
learning_rate= 0.001




Epoch 1, 10% 	 train_loss: 2.04 took: 1.39s
Epoch 1, 20% 	 train_loss: 1.80 took: 1.21s
Epoch 1, 30% 	 train_loss: 1.72 took: 1.35s
Epoch 1, 40% 	 train_loss: 1.60 took: 1.73s
Epoch 1, 50% 	 train_loss: 1.51 took: 1.70s
Epoch 1, 60% 	 train_loss: 1.49 took: 1.40s
Epoch 1, 70% 	 train_loss: 1.48 took: 1.70s
Epoch 1, 80% 	 train_loss: 1.46 took: 4.14s
Epoch 1, 90% 	 train_loss: 1.47 took: 1.25s




Validation loss = 1.37
Epoch 2, 10% 	 train_loss: 1.32 took: 1.75s
Epoch 2, 20% 	 train_loss: 1.36 took: 2.03s
Epoch 2, 30% 	 train_loss: 1.31 took: 1.93s
Epoch 2, 40% 	 train_loss: 1.28 took: 2.30s
Epoch 2, 50% 	 train_loss: 1.29 took: 2.83s
Epoch 2, 60% 	 train_loss: 1.25 took: 2.40s
Epoch 2, 70% 	 train_loss: 1.28 took: 1.80s
Epoch 2, 80% 	 train_loss: 1.28 took: 1.79s
Epoch 2, 90% 	 train_loss: 1.32 took: 2.70s
Validation loss = 1.26
Epoch 3, 10% 	 train_loss: 1.18 took: 3.83s
Epoch 3, 20% 	 train_loss: 1.18 took: 3.90s
Epoch 3, 30% 	 train_loss: 1.15 took: 3.25s
Epoch 3, 40% 	 train_loss: 1.18 took: 2.84s
Epoch 3, 50% 	 train_loss: 1.17 took: 2.20s
Epoch 3, 60% 	 train_loss: 1.16 took: 2.09s
Epoch 3, 70% 	 train_loss: 1.15 took: 2.11s
Epoch 3, 80% 	 train_loss: 1.19 took: 2.06s
Epoch 3, 90% 	 train_loss: 1.18 took: 2.17s
Validation loss = 1.19
Epoch 4, 10% 	 train_loss: 1.03 took: 2.25s
Epoch 4, 20% 	 train_loss: 1.05 took: 2.05s
Epoch 4, 30% 	 train_loss: 1.05 took: 2.31s
Epoch 4