In [0]:
# Install kaggle cli
!pip install kaggle --upgrade  > /dev/null
!git clone https://gist.github.com/soulitzer/810d10e3b42666b885715bb872b3ea10 data >/dev/null 2>&1
!rm -rf ~/.kaggle && mkdir ~/.kaggle/
!cp data/kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

In [0]:
# Download files
!kaggle competitions download -c quickdraw-doodle-recognition -f test_simplified.csv > /dev/null
!kaggle competitions download -c quickdraw-doodle-recognition -f train_simplified.zip > /dev/null
# !kaggle competitions download -c quickdraw-doodle-recognition

In [6]:
# Unzip - takes a couple min to run
!mkdir train_simple_csvs
# !mkdir train_raw_csvs
# !unzip train_raw.zip -d train_raw_csvs > /dev/null
!unzip train_simplified.zip -d train_simple_csvs > /dev/null

mkdir: cannot create directory ‘train_simple_csvs’: File exists
replace train_simple_csvs/fence.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [0]:
!pip install dask[bag] --upgrade > /dev/null

In [8]:
import os
from glob import glob
import re
import ast
import numpy as np 
import pandas as pd
from PIL import Image, ImageDraw 
from tqdm import tqdm
from dask import bag

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from keras.layers.normalization import BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.metrics import top_k_categorical_accuracy
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

Using TensorFlow backend.


In [0]:
# Set label dictionary and params
classfiles = os.listdir('./train_simple_csvs')
numstonames = {i: v[:-4].replace(" ", "_") for i, v in enumerate(classfiles)}
num_classes = 340  
imheight, imwidth = 32, 32  
ims_per_class = 2000 

In [0]:
# Conversion to image from sequence
def draw_it(strokes):
    image = Image.new("P", (256,256), color=255)
    image_draw = ImageDraw.Draw(image)
    for stroke in ast.literal_eval(strokes):
        for i in range(len(stroke[0])-1):
            image_draw.line([stroke[0][i], 
                             stroke[1][i],
                             stroke[0][i+1], 
                             stroke[1][i+1]],
                            fill=0, width=5)
    image = image.resize((imheight, imwidth))
    return np.array(image)/255.

In [11]:
# Get training and testing data
train_grand = []
class_paths = glob('./train_simple_csvs/*.csv')
for i,c in enumerate(tqdm(class_paths[0: num_classes])):
    train = pd.read_csv(c, usecols=['drawing', 'recognized'], nrows=ims_per_class*5//4)
    train = train[train.recognized == True].head(ims_per_class)
    imagebag = bag.from_sequence(train.drawing.values).map(draw_it)
    
    trainarray = np.array(imagebag.compute()) 
    trainarray = np.reshape(trainarray, (ims_per_class, -1))    
    labelarray = np.full((train.shape[0], 1), i)
    trainarray = np.concatenate((labelarray, trainarray), axis=1)
    train_grand.append(trainarray)
    
train_grand = np.array([train_grand.pop() for i in np.arange(num_classes)])
train_grand = train_grand.reshape((-1, (imheight*imwidth+1)))

del trainarray
del train

100%|██████████| 340/340 [10:34<00:00,  1.88s/it]


In [0]:
valfrac = 0.1
cutpt = int(valfrac * train_grand.shape[0])

np.random.shuffle(train_grand)
y_train, X_train = train_grand[cutpt: , 0], train_grand[cutpt: , 1:]
y_val, X_val = train_grand[0:cutpt, 0], train_grand[0:cutpt, 1:]

del train_grand

In [13]:
y_train = keras.utils.to_categorical(y_train, num_classes)
X_train = X_train.reshape(X_train.shape[0], imheight, imwidth, 1)
y_val = keras.utils.to_categorical(y_val, num_classes)
X_val = X_val.reshape(X_val.shape[0], imheight, imwidth, 1)

print(y_train.shape, "\n",
      X_train.shape, "\n",
      y_val.shape, "\n",
      X_val.shape)

(612000, 340) 
 (612000, 32, 32, 1) 
 (68000, 340) 
 (68000, 32, 32, 1)


In [0]:
# Top-3 classification accuracy
def top_3_accuracy(x,y): 
    t3 = top_k_categorical_accuracy(x,y, 3)
    return t3

In [42]:
# Basic CNN model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(imheight, imwidth, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(680, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy', top_3_accuracy])

model.fit(x=X_train, y=y_train,
          batch_size = 512,
          epochs = 22,
          validation_data = (X_val, y_val),
          callbacks = callbacks,
          verbose = 1)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 32, 32, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
dropout (Dropout)            (None, 8, 8, 64)          0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 4096)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 680)               2785960   
__________

<tensorflow.python.keras.callbacks.History at 0x7ff75137b358>

In [0]:
# Modified Inception model 
custom_input = Input(shape=(32,32, 1))
#inception_input = Conv2D(3, kernel_size=(1, 1), padding='same')(custom_input)

base_model = keras.applications.MobileNet(
    include_top=False, input_shape=(32, 32, 1), weights=None)

x = Flatten()(base_model(custom_input))
predictions = Dense(num_classes, activation="softmax")(x)

head_model = Model(custom_input, predictions)

# for layer in base_model.layers:
#     layer.trainable = False

# head_model.summary()

In [0]:
head_model.compile(optimizer=keras.optimizers.Adam(lr=0.002), loss='categorical_crossentropy', 
                   metrics=['accuracy', top_3_accuracy])

In [59]:
# Define Callbacks
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, 
                                   verbose=1, mode='auto', min_delta=0.005, 
                                   cooldown=5, min_lr=0.0001)
earlystop = EarlyStopping(monitor='val_top_3_accuracy', mode='max', patience=5) 

# Fit Model
callbacks = [reduceLROnPlat, earlystop]

head_model.fit(x=X_train, y=y_train,
          batch_size = 512,
          epochs = 22,
          validation_data = (X_val, y_val),
          callbacks = callbacks,
          verbose = 1)

Train on 612000 samples, validate on 68000 samples
Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/22
Epoch 13/22
Epoch 14/22
Epoch 15/22
Epoch 16/22
Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0010000000474974513.
Epoch 17/22
Epoch 18/22
Epoch 19/22
Epoch 20/22
Epoch 21/22
Epoch 22/22


<tensorflow.python.keras.callbacks.History at 0x7ff735045390>

In [0]:
import pandas as pd
import os
root_dir = './train_simple_csvs/'
files = [f for f in os.walk(root_dir)][0][2]
for f in files:
    label = '_'.join(f.split('.')[0].split(' '))
    f_dir = root_dir + f 
    if label == 'The_Great_Wall_of_China':
        df = pd.read_csv(f_dir)
        break
df["drawing"].values[1]

In [0]:
# Install tensorboard
!pip install tensorboardcolab >/dev/null 2>&1
from tensorboardcolab import TensorBoardColab, TensorBoardColabCallback

In [0]:
USE_TENSORBOARD = True # Visualize training with Tensorboard
if USE_TENSORBOARD:
    tbc=TensorBoardColab()
    props = dict(verbose=1, callbacks=[TensorBoardColabCallback(tbc)])
else:
    props = dict(verbose=1)

history = model.fit(
    [X_train, X_train_alt, X_train_profile],
    y_train,
    batch_size = 128,
    epochs = 100,
    validation_data = ([X_val, X_val_alt, X_val_profile], y_val),
    **props
)

In [0]:
# Print out graph of val acc if history is saved

import matplotlib.pyplot as plt

plt.plot(history.history['val_accuracy'])
plt.show()

In [61]:
"""Save models"""

from google.colab import files

# serialize model to JSON
model_json = head_model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
files.download('model.json') 
    
# serialize weights to HDF5
head_model.save_weights("model.h5")
files.download('model.h5')

print("Saved model to disk")

Saved model to disk


In [0]:
"""Load models"""

from keras.models import model_from_json

# load json and create model
json_file = open('model (1).json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("model (1).h5")
model = loaded_model

print("Loaded model from disk")

# Model Output Generator

In [0]:
"""Save and download DataFrame"""
with open("out.csv", "w") as f:
    out_df.to_csv(f, index=False)

from google.colab import files
files.download('out.csv') 