# Classification based on Quickdraw Dataset

### Imports

In [24]:
import os
from PIL import Image
import json

import random
import numpy as np
import cairocffi as cairo

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.io import read_image, ImageReadMode
from torch.optim.lr_scheduler import StepLR

### Functions to preprocess the data

In [12]:
# Get the classes that will be used
def load_classes(file_path):
    res = {}
    count = 0
    for line in open(file_path, 'r'):
        res[count] = line.rstrip()
        count+=1
    return res

In [13]:
### The code is taken from the original GitHub of the QuickDrawDataset
def vector_to_raster(vector_images, side=64, line_diameter=16, padding=16, bg_color=(0,0,0), fg_color=(1,1,1)):
    
    original_side = 256.
    
    surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, side, side)
    ctx = cairo.Context(surface)
    ctx.set_antialias(cairo.ANTIALIAS_BEST)
    ctx.set_line_cap(cairo.LINE_CAP_ROUND)
    ctx.set_line_join(cairo.LINE_JOIN_ROUND)
    ctx.set_line_width(line_diameter)

    # scale to match the new size
    # add padding at the edges for the line_diameter
    # and add additional padding to account for antialiasing
    total_padding = padding * 2. + line_diameter
    new_scale = float(side) / float(original_side + total_padding)
    ctx.scale(new_scale, new_scale)
    ctx.translate(total_padding / 2., total_padding / 2.)

    raster_images = []
    for vector_image in vector_images:
        # clear background
        ctx.set_source_rgb(*bg_color)
        ctx.paint()
        
        bbox = np.hstack(vector_image).max(axis=1)
        offset = ((original_side, original_side) - bbox) / 2.
        offset = offset.reshape(-1,1)
        centered = [stroke + offset for stroke in vector_image]

        # draw strokes, this is the most cpu-intensive part
        ctx.set_source_rgb(*fg_color)        
        for xv, yv in centered:
            ctx.move_to(xv[0], yv[0])
            for x, y in zip(xv, yv):
                ctx.line_to(x, y)
            ctx.stroke()

        data = surface.get_data()
        raster_image = np.copy(np.asarray(data)[::4])
        raster_images.append(raster_image)
    
    return raster_images

In [14]:
# return the data as an array containing images as 1D arrays
def load_data(sample, n_images, dimension):
    sample_data = [json.loads(line) for line in open(f'data/full_simplified_{sample}.ndjson', 'r')]
    sample_data = random.sample(sample_data, k=n_images)
    vector_images = [drawing_data['drawing'] for drawing_data in sample_data]
    return np.array(vector_to_raster(vector_images, side=dimension))

In [15]:
# Saves the datasets as images in the "images" folder
def save_png(drawing_class, data, dimension):
    if not os.path.exists(f"images/{drawing_class}"): 
        os.makedirs(f"images/{drawing_class}")

    count = 0
    for image_arr in data:
        image_arr = np.reshape(image_arr, (dimension, -1))
        img = Image.fromarray(image_arr, "L")
        img.save(f"images/{drawing_class}/{drawing_class}_{count}.png")
        count += 1
    

In [16]:
def preprocess_data(classes, n_images, img_dim, train_prop=0.8, save_images = False):
  data_X = [] # To get the mean and standard deviation of the images
  train_file_names = []
  train_labels = []
  test_file_names = []
  test_labels = []
  
  for key, drawing_class in classes.items():
      print(f"Loading {drawing_class} data")
      data = load_data(drawing_class, n_images, dimension = img_dim)
      data_X.append(data)
      if(save_images):
        save_png(drawing_class, data, img_dim)
      file_names = [f"images/{drawing_class}/{drawing_class}_{i}.png" for i in range(len(data))]
      labels = np.full(len(data), key)
      train_file_names.append(file_names[:(int)(n_images*train_prop)])
      train_labels.append(labels[:(int)(n_images*train_prop)])
      test_file_names.append(file_names[(int)(n_images*train_prop):])
      test_labels.append(labels[(int)(n_images*train_prop):])
  
  # Compute the mean and standard deviation of the images
  data_X = np.array(data_X)
  mean = np.mean(data_X)
  std = np.std(data_X)

  # Save the file names and labels
  train_file_names = np.array(train_file_names).flatten()
  json.dump(train_file_names.tolist(), open("train_file_names.json", 'w'))
  train_labels = np.array(train_labels).flatten()
  json.dump(train_labels.tolist(), open("train_labels.json", 'w'))
  test_file_names = np.array(test_file_names).flatten()
  json.dump(test_file_names.tolist(), open("test_file_names.json", 'w'))
  test_labels = np.array(test_labels).flatten()
  json.dump(test_labels.tolist(), open("test_labels.json", 'w'))
  
  return mean, std

### Dataset class that represent the Dataset

In [101]:
class QuickDrawDataset(Dataset):
    def __init__(self, file_names, labels, transform=None):
        self.file_names = file_names
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = Image.open("images/axe/axe_0.png")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

### Functions for the CNN

In [124]:
class Net(nn.Module):
    def __init__(self, n_classes):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3) # To increase maybe to 16
        self.conv2 = nn.Conv2d(8, 16, 3)# To increase maybe to 32
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.3)
        self.fc1 = nn.Linear(400, 128)
        self.fc2 = nn.Linear(128, n_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.dropout1(x)
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout2(x)
        x = torch.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        output = F.log_softmax(x)
        return output

In [136]:
def train(model, device, train_loader, optimizer, epoch, log_interval):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        print(output)
        print(target)
        loss = F.nll_loss(output, target[0])
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

### Main part

Variables describing the data

In [104]:
# Images will be 64x64
img_dim = 28
# Number of images taken for each animal
n_images = 1000
# Proportion used for to train the model
train_prop = 0.8
learning_rate = 0.001
gamma = 0.7

Store the classes in a dictionnary with their index

In [105]:
classes = load_classes("class_names.txt")
n_classes = len(classes)
print(classes)

{0: 'axe', 1: 'bicycle'}




The preprocess function do multiple thingd : <br>
- Build the png images from the json files. Like this the PyTorch Dataset object will search the images directly in the files
- Separate the dataset in train and test datasets by storing the names of the 
- Process the mean and the standard deviation and return it to later normalize the datasets

In [135]:
mean, std = preprocess_data(classes, n_images=n_images, img_dim=img_dim, train_prop=train_prop, save_images = False)

train_files = json.load(open("train_file_names.json", 'r'))
test_files = json.load(open("test_file_names.json", 'r'))

temp_train_labels = json.load(open("train_labels.json", 'r'))
train_labels = []
for label in temp_train_labels:
    a = np.zeros((n_classes,), dtype=int); 
    a[label] = 1 
    train_labels.append(a)

temp_test_labels = json.load(open("test_labels.json", 'r'))
test_labels = []
for label in temp_test_labels:
    a = np.zeros((n_classes,), dtype=int); 
    a[label] = 1 
    test_labels.append(a)

Loading axe data


Loading bicycle data


Create the Datasets

In [134]:
preprocess = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
        ])

train_dataset = QuickDrawDataset(train_files, train_labels, transform=preprocess)
test_dataset = QuickDrawDataset(test_files, test_labels, transform=preprocess)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 64, shuffle=True)

In [107]:
image = Image.open("images/axe/axe_0.png")
image = preprocess(image)
print(image.shape)

torch.Size([1, 28, 28])


In [137]:
model = Net(n_classes)

optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 10
log_interval = 10

for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch, log_interval)
    scheduler.step()

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x25600 and 400x128)

For the datas :
- sort to keep only the recognized drawings

For the CNN:
- (batch normailzation)
- conv relu pooling conv relu pooling dropout

For vizualization and documentation:
- Confusion Matrix
- evolution of the error with epochs


### The model