# Introduction

The Quick Draw Dataset is a collection of 50 million drawings across 345 categories, contributed by players of the game Quick Draw. 

![alt text](https://raw.githubusercontent.com/googlecreativelab/quickdraw-dataset/master/preview.jpg)

# The  Raw Dataset

This table shows a description of the fields of each entry in the dataset

>Key | Type | Description
>--- | ---
>key_id 	| 64-bit unsigned integer  |	A unique identifier across all drawings.
> word |	string 	|Category the player was prompted to draw.
>recognized |	boolean 	|Whether the word was recognized by the game.
> timestamp 	| datetime 	| When the drawing was created.
> countrycode |	string |	A two letter country code 
> drawing |	string |	A JSON array representing the vector drawing

# Imports

In [1]:
import os
import io
import random
import glob
import math
import base64
import json
import numpy as np
import urllib.request
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import HTML

In [2]:
import nltk
nltk.download('popular')
import jieba
import jieba.analyse
import jieba.posseg

[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/cmudict.zip.
[nltk_data]    | Downloading package gazetteers to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/gazetteers.zip.
[nltk_data]    | Downloading package genesis to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/genesis.zip.
[nltk_data]    | Downloading package gutenberg to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/gutenberg.zip.
[nltk_data]    | Downloading package inaugural to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/inaugural.zip.
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping corpora/movie_reviews.zip.
[nltk_data]    | Downloading package names to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/names.zip.
[nltk_data]    | Downloading package shakespeare to /root/nltk_data...
[nlt

# Create Chinese - English Dictionary

In [3]:
fd = open("/content/Chinese_Classes.txt", "r")
data = fd.read()
chinese_class = data.split("\n")
fd.close()

fd = open("/content/English_Classes.txt", "r")
data = fd.read()
english_class = data.split("\n")
fd.close()

chi_eng = dict(zip(chinese_class, english_class))
print(chi_eng)

{'航空母艦': 'aircraft carrier', '飛機': 'airplane', '鬧鐘': 'alarm clock', '救護車': 'ambulance', '天使': 'angel', '動物遷徙': 'animal migration', '螞蟻': 'ant', '砧': 'anvil', '蘋果': 'apple', '手臂': 'arm', '蘆筍': 'asparagus', '斧頭': 'axe', '背包': 'backpack', '香蕉': 'banana', '繃帶': 'bandage', '穀倉': 'barn', '棒球': 'baseball', '棒球棒': 'baseball bat', '籃子': 'basket', '籃球': 'basketball', '蝙蝠': 'bat', '浴缸': 'bathtub', '海灘': 'beach', '熊': 'bear', '鬍鬚': 'beard', '床': 'bed', '蜜蜂': 'bee', '腰帶': 'belt', '長椅': 'couch', '自行車': 'bicycle', '望遠鏡': 'binoculars', '鳥': 'bird', '生日蛋糕': 'birthday cake', '黑莓': 'blackberry', '藍莓': 'blueberry', '書': 'book', '迴旋鏢': 'boomerang', '瓶蓋': 'bottlecap', '領結': 'bowtie', '手鐲': 'bracelet', '腦': 'brain', '麵包': 'bread', '橋': 'bridge', '西蘭花': 'broccoli', '掃帚': 'broom', '桶': 'bucket', '推土機': 'bulldozer', '公共汽車': 'bus', '襯套': 'bush', '蝴蝶': 'butterfly', '仙人掌': 'cactus', '蛋糕': 'cake', '計算器': 'calculator', '日曆': 'calendar', '駱駝': 'camel', '相機': 'camera', '偽裝': 'camouflage', '營火': 'campfire', '蠟燭': 'cand

# Download the Dataset 

Loop over the classes and download the currospondent data. We only download 10 classes for visualization. 

In [57]:
!mkdir data
classes = ['sun', 'apple', 'house', 'star', 'diamond', 'car', 'tree', 'cat', 'camera', 'flower']

mkdir: cannot create directory ‘data’: File exists


In [46]:
def download():  
  #base link  
  base = 'https://storage.googleapis.com/quickdraw_dataset/full/'
  
  #download each class as json files 
  for c in classes:
    path = f'{base}raw/{c}.ndjson'
    print(path)
    urllib.request.urlretrieve(path, f'data/{c}.ndjson')

In [47]:
download() 

https://storage.googleapis.com/quickdraw_dataset/full/raw/tree.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/sun.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/apple.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/flower.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/car.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/star.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/house.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/camera.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/diamond.ndjson
https://storage.googleapis.com/quickdraw_dataset/full/raw/cat.ndjson


# Load to Memory 

Load the `drawing` information for each file. Each drawing contains a number of strokes and each stroke contain the array $[x, y, t]$ where $x,y$ are the coordinates as array and $t$ is the time stamps. 

In [48]:
drawings = []

files = os.listdir('data')

i = 0 

for file in files:
  contents = open(f'data/{file}', "r").read() 
  data = contents.split('\n')
  
  #load samples for each class 
  for h in data[:1]:
    drawings.append(json.loads(h)['drawing'])
  i += 1

In [49]:
#the first stroke of the first drawing
[x, y, t] = drawings[0][0]

# Animation

In [50]:
def create_animation(drawing, fps = 30, idx = 0, lw = 5): 
  
  seq_length = 0 
  
  xmax = 0 
  ymax = 0 
  
  xmin = math.inf
  ymin = math.inf
  
  #retreive min,max and the length of the drawing  
  for k in range(0, len(drawing)):
    x = drawing[k][0]
    y = drawing[k][1]

    seq_length += len(x)
    xmax = max([max(x), xmax]) 
    ymax = max([max(y), ymax]) 
    
    xmin = min([min(x), xmin]) 
    ymin = min([min(y), ymin]) 
    
  i = 0 
  j = 0
  
  # First set up the figure, the axis, and the plot element we want to animate
  fig = plt.figure()
  ax = plt.axes(xlim=(xmax+lw, xmin-lw), ylim=(ymax+lw, ymin-lw))
  ax.set_facecolor("white")
  line, = ax.plot([], [], lw=lw)

  #remove the axis 
  ax.grid = False
  ax.set_xticks([])
  ax.set_yticks([])
  
  # initialization function: plot the background of each frame
  def init():
      line.set_data([], [])
      return line, 

  # animation function.  This is called sequentially
  def animate(frame):    
    nonlocal i, j, line
    x = drawing[i][0]
    y = drawing[i][1]
    line.set_data(x[0:j], y[0:j])
    
    if j >= len(x):
      i +=1
      j = 0 
      line, = ax.plot([], [], lw=lw)
      
    else:
      j += 1
    return line,
  
  # call the animator.  blit=True means only re-draw the parts that have changed.
  anim = animation.FuncAnimation(fig, animate, init_func=init,
                                 frames= seq_length + len(drawing), blit=True)
  plt.close()
  
  # save the animation as an mp4.  
  anim.save(f'video.mp4', fps=fps, extra_args=['-vcodec', 'libx264'])

# Get The Nouns

In [51]:
posseg = jieba.posseg.POSTokenizer(tokenizer=None)
def ProperNounExtractor(text):
    sentences = jieba.posseg.POSTokenizer(tokenizer=None)
    words = posseg.cut(text)
    for word, tag in words:
        #print(word, tag)
        if tag == 'n':
            return word

# Run

In [52]:
text_1 = "右邊看到一輛車."
text_2 = "我走進屋."
text_3 = "我愛吃蘋果."
text_4 = "我有一隻貓"
nouns = ProperNounExtractor(text_2)
print(nouns)

Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.932 seconds.
Prefix dict has been built successfully.


屋


In [61]:
#create animation for a random drawing 
index = classes.index(chi_eng.get(nouns))
print(classes[index])
drawing = drawings[index]
create_animation(drawing)

house


In [62]:
video = io.open('video.mp4', 'r+b').read()

encoded = base64.b64encode(video)
HTML(data='''<video alt="video" autoplay loop>
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii')))