In [2]:
#colvolution between image and kernel:
#https://en.wikipedia.org/wiki/Kernel_(image_processing)
#https://en.wikipedia.org/wiki/Kernel_(image_processing)#Convolution
import numpy as np
import matplotlib.pyplot as plt

def convolve2d(image, kernel):

    # pad the image to apply the convolution operation
    
    kh, kw = kernel.shape
    ih, iw = image.shape

    output = np.zeros((ih-kh+1, iw-kw+1))

    # find shape of the output image
    for i in range(ih-kh+1):
        for j in range(iw-kw+1):
            output[i,j] = np.sum(image[i:i+kh, j:j+kw]*kernel)
    
    return output

In [4]:
import os
import sys
cwd = os.getcwd()
#add CIFAR10 data in the environment
sys.path.append(cwd + '/../cifar10')

#Numpy is linear algebra lbrary
import numpy as np
# Matplotlib is a visualizations library 
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import utils
from torchvision import transforms
#CIFAR10 is a custom Dataloader that loads a subset ofthe data from a local folder
from torchvision.datasets import CIFAR10

batch_size=4

def show_image(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def load_data():
    
    #convert the images to tensor and normalized them
    transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

    trainset = CIFAR10(root='../cifar10',  transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=False, num_workers=1)
    return trainloader

In [10]:
input_img = torch.rand(2,3,8,8)

conv = nn.Conv2d(3, 6, 5)

output = conv(input_img)

# Define a CNN

Here you will come into play. Try to define the necessary layers and build the forward pass of our model. Remember that the model's structure is:

* A conv layer with 3 channels as input, 6 channels as output, and a 5x5 kernel
* A 2x2 max-pooling layer
* A conv layer with 6 channels as input, 16 channels as output, and a 5x5 kernel
* A linear layer with 1655 nodes
* A linear layer with 120 nodes
* A linear layer with 84 nodes
* A linear layer with 10 nodes

In [None]:
#1. DEFINE THE CNN HERE
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    #define the forward pass for the child class CNN
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
       

In [None]:
#2. TRAIN THE MODEL HERE

def train(model, training_data):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    running_loss = 0.0

    for epoch in range(1):  # loop over the dataset multiple times

        for i, data in enumerate(training_data, 0):
            # get the inputs; cifar10 is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients ( # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward)
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')



def evaluate(model):
    dataiter = iter(load_data())
    images, labels = dataiter.next()

    # print images
    show_image(utils.make_grid(images))
    print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
    
    outputs = model(images)
    
    _, predicted = torch.max(outputs, 1)

    print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))



def main():

    training_data = load_data()

    model = CNN()

    train(model, training_data)
    
    evaluate(model)
    
main()