# Practical computer vision

This notebook will take you through how to create your own dataset and use it to train a machine learning model to classify images.

Firstly, let's import the tools that we'll need

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader


And set up acceleration using a GPU if we can (don't worry about this code)

In [None]:
use_cuda = torch.cuda.is_available()
use_cuda=False
device = torch.device("cuda" if use_cuda else "cpu")

## Making the dataset

Here we will create a simple dataset 

We will create a folder called 'data'

Inside this we will make folders that contain images of different classes, and name thos sub folders after the class of image that they contain.

E.g. we could put folders full of images of cats and dogs inside the data folder.

In [None]:
def create_csv(root=data_root, out_name='labels.csv'):
    """This function finds images in each of the sub folders and creates a csv file that tells us which class each image belongs to"""
    subfolders = [f.path for f in os.scandir(root) if f.is_dir()]
    df = pd.DataFrame(columns=['file_path', 'label'])
    for i, path in enumerate(subfolders):
        files = [f.path for f in os.scandir(path) if f.is_file()]
        for f in files:
            df = df.append({'file_path':f, 'label':i}, ignore_index=True)
    df.to_csv(root+out_name, index=False)
    
create_csv()

class ClassificationDataset(Dataset):

    def __init__(self, csv=data_root + 'labels.csv', transform=None):
        """"""
        self.csv = pd.read_csv(csv)
        self.data_size = len(self.csv)
        self.idx_to_data = dict(zip(range(self.data_size), zip(self.csv['file_path'].tolist(), self.csv['label'].tolist())))
        self.transform = transform

    def __len__(self):
        return self.data_size

    def __getitem__(self, idx):
        filepath, label = self.idx_to_data[int(idx)]
        img = Image.open(filepath)
        if self.transform:
            img, label = self.transform((img, label))
        return img, label


## Preprocessing our data

The model we are going to create to map our inputs to our outputs has a fixed input size, so we need to resize any images that we will pass through it.

We need to make some transforms that will be called on each input, to prepare it for a forward pass through the model.

In [None]:
class SquareResize():
    """Adjust aspect ratio of image to make it square"""

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple)) # assert output_size is int or tuple
        self.output_size = output_size

    def __call__(self, sample):        
        image, label = sample
        h, w = image.size
        if h>w:
            new_w = self.output_size
            scale = new_w/w
            new_h = scale*h
        elif w>h:
            new_h = self.output_size
            scale = new_h/h
            new_w = scale*w
        else:
            new_h, new_w = self.output_size, self.output_size
        new_h, new_w = int(new_h), int(new_w) # account for non-integer computed dimensions (rounds to nearest int)
        image = image.resize((new_h, new_w))
        image = image.crop((0, 0, self.output_size, self.output_size))
        return image, label

class ToTensor():
    def __init__(self):
        pass

    def __call__(self, sample):
        image, label = sample
        image = np.array(image)/255
        image = image.transpose((2, 0, 1))
        return torch.Tensor(image), label
    
ytransforms = []
mytransforms.append(SquareResize(224))
mytransforms.append(ToTensor())
mytransforms = transforms.Compose(mytransforms)

Now we have the dataset and the transforms to preprocess them.

In [None]:
mydataset = ClassificationDataset(csv=data_root + 'labels.csv', transform=mytransforms)


data_size=len(mydataset)
train_size = int(train_split * data_size)
val_size = int(val_split * data_size) - train_size
test_size = data_size - (val_size + train_size)
train_data, val_data, test_data = torch.utils.data.random_split(mydataset, [train_size, val_size, test_size])

To feed these datasets to our models, we can create a dataloader.

A dataloader loads our examples from the datasets for us, and does some useful things for us including batching together input examples and shuffling the dataset.

## Creating the model to map inputs to outputs

In [None]:

class VGGClassifier(torch.nn.Module):
    def __init__(self, out_size):
        super().__init__()
        self.features = models.vgg11(pretrained=True).features #512x7x7
        self.regressor = torch.nn.Sequential(
            torch.nn.Linear(512*7*7, 4096),
            torch.nn.ReLU(),
            torch.nn.Dropout(),
            torch.nn.Linear(4096, 1024),
            torch.nn.ReLU(),
            torch.nn.Linear(1024, out_size),
            torch.nn.Softmax(dim=1)
            )

    def forward(self, x):
        """This function is called on the data for the forward pass"""
        x = F.relu(self.features(x)).reshape(-1, 512*7*7)
        x = self.regressor(x)
        return x     # return the output of the model

    def freeze(self):
        for param in self.features.parameters():
            param.requires_grad=False

    def unfreeze(self):
        for param in self.features.parameters():
            param.requires_grad=True


Now let's create the final function to train, and also visualise the training process.

In [None]:

def train(epochs):
    plt.close()
    mymodel.train()
    
    bcosts = []
    ecosts = []
    valcosts = []
    plt.ion()
    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(121)
    #ax1 = fig.add_subplot(132)
    ax2 = fig.add_subplot(122)
    
    plt.show()
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Cost')

    #ax1.set_xlabel('Batch')
    #ax1.set_ylabel('Cost')

    ax2.axis('off')
    img_label_text = ax2.text(0, -5, '', fontsize=15)
    
    for e in range(epochs):
        ecost=0
        valcost=0
        for i, (x, y) in enumerate(train_samples):
            x, y = x.to(device), y.to(device)
            x = x[:, :3, :, :]
            print(x.shape)

            h = mymodel.forward(x) #calculate hypothesis
            cost = F.cross_entropy(h, y, reduction='sum') #calculate cost
            
            optimizer.zero_grad() #zero gradients
            cost.backward() # calculate derivatives of values of filters
            optimizer.step() #update parameters

            bcosts.append(cost.item()/batch_size)
            #ax1.plot(bcosts, 'b', label='Train cost')
            #if e==0 and i==0: ax1.legend()
            
            y_ind=0
            im = np.array(x[y_ind]).transpose(1, 2, 0)
            predicted_class = id_to_classname[h.max(1)[1][y_ind].item()]
            ax2.imshow(im)
            img_label_text.set_text('Predicted class: '+ predicted_class)
            
            fig.canvas.draw()
            ecost+=cost.item()
        #classes_shown=set()
        """
        for i, (x, y) in enumerate(val_samples):
            x, y = x.to(device), y.to(device)

            h = mymodel.forward(x) #calculate hypothesis
            cost = F.cross_entropy(h, y, reduction='sum') #calculate cost

            '''for y_ind, yval in enumerate(y):
                if yval.item() not in classes_shown:
                    classes_shown.add(yval.item())
                    break'''
            y_ind=0
            im = np.array(x[y_ind]).transpose(1, 2, 0)
            predicted_class = id_to_classname[h.max(1)[1][y_ind].item()]
            ax2.imshow(im)
            img_label_text.set_text('Predicted class: '+ predicted_class)
            fig.canvas.draw()
            
            valcost+=cost.item()
            """
        ecost/=train_size
        #valcost/=val_size
        ecosts.append(ecost)
        #valcosts.append(valcost)
        ax.plot(ecosts, 'b', label='Train cost')
        #ax.plot(valcosts, 'r', label='Validation cost')
        if e==0: ax.legend()
        fig.canvas.draw()

        print('Epoch', e, '\tCost', ecost)
        

def test():
    print('Started evaluation...')
    mymodel.eval() #put model into evaluation mode
    
    #calculate the accuracy of our model over the whole test set in batches
    correct = 0
    for x, y in test_samples:
        x, y = x.to(device), y.to(device)
        h = mymodel.forward(x)
        pred = h.data.max(1)[1]
        correct += pred.eq(y).sum().item()
    return round(correct/len(test_data), 4)


Now lets actually do the training.

In [None]:
mymodel.freeze()
train(20)
#mymodel.unfreeze()
#train(5)

acc = test()
print('Test accuracy: ', acc)