# Quickdraw Data Preperation

This notebook is to create a test-{sz} and train-{sz} folders full of .png files from .csvs found in the /train folder.

All credit for this work goes to radekosmulski for his inspiration and code.
This code is based on code from a fast.ai MOOC that will be publicly available in Jan 2019

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
import cv2
import shutil
import PIL

from fastai import *
from fastai.vision import *

In [None]:
sz = 128 # size
r = 0.01 # portion of images to keep, we want only 1% of total train data

In [None]:
### CAUTION ### The following code will delete any work previously done, be warned!
shutil.rmtree(f'data/train-{sz}', ignore_errors=True) # deleting whatever we have saved earlier
os.makedirs(f'data/train-{sz}') # best to have where to save the images

In [None]:
# https://www.kaggle.com/gaborfodor/greyscale-mobilenet-lb-0-892
BASE_SIZE = 256
def draw_cv2(raw_strokes, size=256, lw=4, time_color=False):
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for t, stroke in enumerate(raw_strokes):
        for i in range(len(stroke[0]) - 1):
            color = 255 - min(t, 10) * 13 if time_color else 255
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]),
                         (stroke[0][i + 1], stroke[1][i + 1]), color, lw)
    img = cv2.copyMakeBorder(img,4,4,4,4,cv2.BORDER_CONSTANT)
    if size != BASE_SIZE:
        return cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR)
    else:
        return img

In [None]:
def save_ims_from_df(path):
    df = pd.read_csv(path)
    selected = df[df.recognized==True].sample(int(r * df.shape[0]))
    for row in selected.iterrows():
        idx, drawing, label = row[0], eval(row[1].drawing), '_'.join(row[1].word.split())
        ary = draw_cv2(drawing, size=128)
        rgb_ary = np.repeat(ary[:,:,None], 3, -1)
        PIL.Image.fromarray(rgb_ary).save(f'data/train-{sz}/{label}_{idx}.png')

In [None]:
%%time
with ThreadPoolExecutor(12) as e: e.map(save_ims_from_df, Path('data/train').iterdir())

Sanity check

In [None]:
ls data/train-{sz} -lhS | head -n 10

In [None]:
# PIL.Image.open(f'data/train-{sz}/blackberry_59082.png')

In [None]:
df = pd.read_csv('data/test_simplified.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
shutil.rmtree(f'data/test-{sz}', ignore_errors=True)
os.makedirs(f'data/test-{sz}')

In [None]:
def save_test_ims_from_df(path):
    df = pd.read_csv(path)
    for row in df.iterrows():
        key_id, drawing = row[1].key_id, eval(row[1].drawing)
        ary = draw_cv2(drawing, size=128)
        rgb_ary = np.repeat(ary[:,:,None], 3, -1)
        PIL.Image.fromarray(rgb_ary).save(f'data/test-{sz}/{key_id}.png')

In [None]:
%time save_test_ims_from_df('data/test_simplified.csv')

In [None]:
ls -lht data/test-128 | head -n 10

In [None]:
PIL.Image.open('data/test-128/9999968529902445.png')