# Building a Neural Network to recognize Quick Draw doodles

The general idea of this notebook will be to try and test the solutions provided by Google AI and other kagglers to the classification problem linked to the Quick Draw dataset.

## Google AI suggestion

Building a Neural Network dataset introducting LSTM layers. Indeed, they want to take into account the notion of temporality by using layers that keep track of what happened before. They also do not want to use a Neural Network that would consider the input as an image by using 1D Convolutionnal Layers. It's worth a try ! 

Tutorial of Google AI's solution in Tensorflow right here : https://www.tensorflow.org/tutorials/sequences/recurrent_quickdraw

## Considering the dataset as images only

A simpler idea would consist in interpreting the vectors of coordinates of the dataset as images only. Thus, we could use classical Neural Networks architectures that would consist in a succession of 2D Convolutionnal Layers and Max Pooling Layers, ending with a Flatten Layer then a FC layer.

Exemple of such a solution in Tensorflow here : https://www.kaggle.com/gaborfodor/black-white-cnn-lb-0-75/notebook

## Import Packages

In [15]:
import pandas as pd 
import numpy as np 
import ast
import matplotlib.pyplot as plt
import cv2
import os
import glob
#from tqdm import tqdm_notebook
import torch
import torchvision
from torchvision import models,transforms,datasets
import torch.nn as nn
import torch.nn.functional as F
import pickle
#tqdm_notebook().pandas()

%matplotlib inline

## Transforming data to image

In [3]:
BASE_SIZE = 256

def draw_cv2(raw_strokes, size=256, lw=6):
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for stroke in raw_strokes:
        for i in range(len(stroke[0]) - 1):
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]), (stroke[0][i + 1], stroke[1][i + 1]), 255, lw)
    if size != BASE_SIZE:
        return cv2.resize(img, (size, size))
    else:
        return img

In [4]:
def df_to_image_array(df, size, lw=6):
    x = np.zeros((len(df), size, size))
    for i, raw_strokes in enumerate(df.drawing.values):
        x[i] = draw_cv2(raw_strokes, size=size, lw=lw)
    x = x / 255.
    x = x.reshape((len(df), size, size)).astype(np.float32)
    return x

## Building Neural Network of a Kaggler 

In [5]:
class classifier_kaggle(nn.Module):
    
    def __init__(self):
        super(classifier_kaggle,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3)
        self.dense1 = nn.Linear(3136, 1024)
        self.dense2 = nn.Linear(1024, num_classes)
    def forward(self, x):
        batch_size = x.shape[0]
        x = self.conv1(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = self.conv2(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = x.reshape(batch_size, -1)
        x = self.dense1(x)
        x = F.relu(x)
        x = self.dense2(x)
        x = F.log_softmax(x)
        return x

In [99]:
def test(model,data_loader):
    model.train(False)

    running_corrects = 0.0
    running_loss = 0.0
    size = 0

    for data in data_loader:
        inputs, labels = data    
        bs = labels.size(0)
        
        if use_gpu:
            inputs.cuda()
        
        outputs = model(inputs)        
        loss = loss_fn(outputs, labels)
        print("outputs", outputs)
        print('labels', labels)

        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data.type(torch.LongTensor))
        running_loss += loss.data
         
        size += bs

    print('Test - Loss: {:.4f} Acc: {:.4f}'.format(running_loss / size, running_corrects.item() / size))

In [6]:
def train(model,data_loader,loss_fn,optimizer,n_epochs=1):
    
    model.train(True)
    
    loss_train = np.zeros(n_epochs)
    acc_train = np.zeros(n_epochs)
    
    for epoch_num in range(n_epochs):
        running_corrects = 0.0
        running_loss = 0.0
        size = 0

        for data in data_loader:
            inputs, labels = data
            bs = labels.size(0)
            
            if use_gpu:
                inputs.cuda()
        
            outputs = model(inputs)        
            loss = loss_fn(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data.type(torch.LongTensor))
            running_loss += loss.data
            
            size += bs
        
        epoch_loss = running_loss / size
        epoch_acc = running_corrects.item() / size
        loss_train[epoch_num] = epoch_loss
        acc_train[epoch_num] = epoch_acc
        print('Train - Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
        
    return loss_train, acc_train

In [None]:
def test(model,data_loader):
    model.train(False)

    running_corrects = 0.0
    running_loss = 0.0
    size = 0
    all_preds = []
    all_labels = []

    for data in data_loader:
        inputs, labels = data    
        bs = labels.size(0)
        
        if use_gpu:
            inputs.cuda()
        
        outputs = model(inputs)        
        loss = loss_fn(outputs, labels)

        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data.type(torch.LongTensor))
        running_loss += loss.data
        
        all_preds = np.append(all_preds, preds.numpy())
        all_labels = np.append(all_labels, labels.numpy())
         
        size += bs
        
    return all_preds, all_labels

    print('Test - Loss: {:.4f} Acc: {:.4f}'.format(running_loss / size, running_corrects.item() / size))

## Loading Data

In [72]:
# TO BE FILLED IN
PATH_TO_DATA = "..\\data\\train_simplified\\"
PATH_TO_MAPPING = "..\\output\\map.p"
PATH_TO_MODEL = "..\\output\\model_kaggle.pt"

In [57]:
mapping = pickle.load(open(PATH_TO_MAPPING, "rb"))

In [40]:
# USED DATA
whole_size = 500
train_size = 460
print("We trained our model on {} classes, with {} images per class.".format(len(mapping), train_size))

We trained our model on 114 classes, with 460 images per class.


In [60]:
files = []
for key in mapping.keys():
    file = PATH_TO_DATA + key + ".csv"
    files.append(file)

In [32]:
train_list = []
test_list = []

for file in files:
    df = pd.read_csv(file, 
                     index_col='key_id', 
                     nrows=whole_size).drop(['countrycode', 'recognized', 'timestamp'], axis=1)
    train_list.append(df[:train_size])
    test_list.append(df[train_size:])

df_train = pd.concat(train_list, axis=0)
df_test = pd.concat(test_list, axis=0)

df_train.drawing = df_train.drawing.map(ast.literal_eval)
df_test.drawing = df_test.drawing.map(ast.literal_eval)

In [62]:
df_train.y = df_train.word.replace(mapping)
df_test.y = df_test.word.replace(mapping)

In [65]:
x_train = df_to_image_array(df=df_train, size=32)
y_train = df_train.y

x_test = df_to_image_array(df=df_test, size=32)
y_test = df_test.y

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(52440, 32, 32)
(52440,)
(4560, 32, 32)
(4560,)


In [68]:
use_gpu = torch.cuda.is_available()

In [69]:
bs = 64 
kwargs = {'num_workers': 1, 'pin_memory': True} if use_gpu else {}

train_dataset = [[torch.from_numpy(e.astype(np.float32)).unsqueeze(0), 
                   torch.from_numpy(np.array(l).astype(np.int64))] for e, l in zip(x_train, y_train)]
test_dataset = [[torch.from_numpy(e.astype(np.float32)).unsqueeze(0), 
                   torch.from_numpy(np.array(l).astype(np.int64))] for e, l in zip(x_test, y_test)]

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=bs, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=bs, shuffle=True, **kwargs)

## Load Model

In [73]:
model = torch.load(PATH_TO_MODEL)
model.eval()

classifier_kaggle(
  (conv1): Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
  (dense1): Linear(in_features=3136, out_features=1024, bias=True)
  (dense2): Linear(in_features=1024, out_features=114, bias=True)
)

In [76]:
loss_fn = nn.NLLLoss(size_average=False)
learning_rate = 1e-3
#optimizer_cl = torch.optim.SGD(kaggle_model.parameters(), lr=learning_rate)
#l_t, a_t = train(kaggle_model,train_loader,loss_fn,optimizer_cl,n_epochs = 10)



## Results

In [155]:
all_preds, all_labels = test(model,test_loader)



In [178]:
result_dic = {}
for key, value in mapping.items():
    pred_of_class = all_preds[(all_labels == value)]
    rate = sum(pred_of_class == value)/len(pred_of_class)
    result_dic[key] = rate
    

In [179]:
result_dic

{'cookie': 0.525,
 'computer': 0.75,
 'compass': 0.775,
 'coffee cup': 0.5,
 'cloud': 0.65,
 'clock': 0.25,
 'clarinet': 0.425,
 'circle': 0.725,
 'church': 0.525,
 'chandelier': 0.45,
 'chair': 0.8,
 'cello': 0.775,
 'cell phone': 0.65,
 'ceiling fan': 0.475,
 'cat': 0.425,
 'castle': 0.475,
 'carrot': 0.575,
 'car': 0.55,
 'canoe': 0.75,
 'cannon': 0.55,
 'candle': 0.65,
 'campfire': 0.6,
 'camouflage': 0.175,
 'camera': 0.65,
 'camel': 0.6,
 'calendar': 0.55,
 'calculator': 0.6,
 'cake': 0.3,
 'cactus': 0.45,
 'butterfly': 0.775,
 'bush': 0.325,
 'bus': 0.75,
 'bulldozer': 0.65,
 'bucket': 0.85,
 'broom': 0.775,
 'broccoli': 0.65,
 'bridge': 0.525,
 'bread': 0.5,
 'brain': 0.5,
 'bracelet': 0.4,
 'bowtie': 0.85,
 'bottlecap': 0.275,
 'boomerang': 0.425,
 'book': 0.525,
 'blueberry': 0.35,
 'blackberry': 0.375,
 'birthday cake': 0.575,
 'bird': 0.175,
 'binoculars': 0.35,
 'bicycle': 0.775,
 'bench': 0.55,
 'belt': 0.6,
 'bee': 0.4,
 'bed': 0.425,
 'beard': 0.65,
 'bear': 0.45,
 'bea