Quick, Draw! is an online game developed by Google that challenges players to draw a picture of an object. You can play the game here https://quickdraw.withgoogle.com/ The game prompts users to draw an image depicting a certain category, such as ”marker,” “table,” etc.  The aim of this competition is to build a better classifier for the existing Quick, Draw! dataset.  The challenging thing is that the data is very noisy.

In this kernel, I try to visualize all the images in the train data set to get some sense of the type of data we are dealing with


# Load Libraries

In [None]:
%matplotlib inline
import matplotlib.pylab
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
%pylab inline

import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from tqdm import tqdm_notebook
import ast

sns.set_style("white")
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})

In [None]:
train_path = '../input/train_simplified/'
files = os.listdir(train_path)
categories = [category.split('.')[0] for category in files]
print('Total number of categories: ',len(categories))
print('Few Example Categories',categories[0:5])

# Reading data from all the categories

In [None]:
train_data = pd.DataFrame()
for file in tqdm_notebook(files):
    train_data = train_data.append(pd.read_csv(train_path + file, index_col='word', nrows=10))    

In [None]:
train_data.sample(10)

In [None]:
train_data = train_data.reset_index()
train_data['word_count'] = train_data.groupby('word')['word'].transform('count')
sns.distplot(train_data['word_count'],kde=False)
plt.title('Word Count Distribution in Train Set')

# Generating the Images

In [None]:
if train_data.index.name is not 'word':
    train_data = train_data.set_index('word')
    
img_ar = None
for cat in tqdm_notebook(categories):
    df = train_data[train_data.index==cat]
    drawings = [ast.literal_eval(pts) for pts in df[:9]['drawing'].values]

    fig = Figure()
    ax = fig.subplots(1,9)
    canvas = FigureCanvas(fig)
    for i, drawing in enumerate(drawings):
        for x,y in drawing:
            ax[i].plot(x, y, marker='.')
            ax[i].axis('off')
    fig.suptitle(cat,fontsize=30)
#     plt.show()
    canvas.draw()       # draw the canvas, cache the renderer
    image = np.fromstring(canvas.tostring_rgb(), dtype='uint8')
    width, height = fig.get_size_inches() * fig.get_dpi() 
    img = image.reshape(int(height), int(width), 3)
    img = np.expand_dims(img,axis=0)
    if img_ar is None:
        img_ar = img
    else:
        img_ar = np.concatenate([img_ar,img],axis=0)

# Visualizing all the images

In [None]:
DataRange = (np.absolute(img_ar)).max() 
EXTENT = [0, width, 0 ,height]
NORM = matplotlib.colors.Normalize(vmin =-DataRange, vmax= DataRange, clip =True)

grid_width = 20
grid_height = len(categories)//grid_width
fig,axs = plt.subplots(grid_height,grid_width,figsize=(img_ar.shape[1], img_ar.shape[2]))
for i in range(len(categories)):
    ax = axs[int(i / grid_width), i % grid_width]
    ax.imshow(img_ar[i], norm = NORM, extent = EXTENT, aspect = 1, interpolation='none')
    ax.axis('off')

plt.show()

# You can open the above image in new tab to get better resolution. The image may take some time to load.
OR you can manually visualize each image like shown below

In [None]:
plt.imshow(img_ar[0])