<a href="https://colab.research.google.com/github/tohyongyao/AI-Project/blob/master/TYY_Quickdraw_challenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Quickdraw

Name: Toh Yong Yao



In [1]:
!wget -qq https://www.dropbox.com/s/gdlb8dnjzcly51o/quickdraw.zip
  
!unzip -qq quickdraw.zip

!rm -r __MACOSX
!rm quickdraw.zip

!ls

quickdraw  sample_data


## Imports

In [0]:
import numpy as np

from glob import glob
import ntpath

import tensorflow as tf

from tensorflow.keras.layers import Input, Dense, Conv2D, BatchNormalization
from tensorflow.keras.layers import ReLU, Add, MaxPool2D, GlobalAvgPool2D
from tensorflow.keras.models import Model

In [0]:
file_names = glob('./quickdraw/*.npy')

In [6]:
# make some class names
class_names = []

for file in file_names:
  name = ntpath.basename(file)
  class_names.append(name[:-4])
  
  
print(class_names)

['ant', 'cactus', 'birthday cake', 'rainbow', 'eyeglasses', 'face', 'brain', 'cookie', 'pig', 'palm tree', 'ambulance', 'alarm clock', 'donut', 'lollipop', 'angel', 'cat', 'fish', 'banana', 'bee', 'postcard']


In [0]:
# get 1000 of each class

x_data =np.array([])
y_labels =np.array([])

for i, filename in enumerate(file_names):
  labels = [i for j in range(1000)]
  arr = np.load(filename)
  arr = arr[:1000]
  if len(x_data) == 0:
    x_data = arr
    y_labels = np.asarray(labels)
  else:
    x_data = np.concatenate((x_data, arr))
    y_labels = np.concatenate((y_labels, labels))

In [8]:
x_data.shape

(20000, 784)

## Shuffle and split
We should shuffle the data first to prevent having an entire class in the validation set.

In [0]:
# Shuffle
from sklearn.utils import shuffle
x_data, y_labels = shuffle(x_data, y_labels, random_state=3)

# Split
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_data, y_labels, test_size=0.1, random_state=3)

In [11]:
x_train.shape

(18000, 784)

## Prepare data for network

Each image is array of 784. Need to reshape to 28, 28, 1.


In [0]:
image_size = 28
input_shape = (image_size, image_size)
num_classes = len(class_names)
learning_rate = 0.001
batch_size = 32
train_step = x_train.shape[0]/batch_size
valid_step = x_test.shape[0]/batch_size

# Reshape
x_train = x_train.reshape(x_train.shape[0], image_size, image_size).astype('float32')
x_test = x_test.reshape(x_test.shape[0], image_size, image_size).astype('float32')

# Normalize
x_train /= 255.0
x_test /= 255.0

# Convert
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [0]:
# Change Train Data to TF Data
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

# Shuffle & Repeat 
train_dataset = train_dataset.shuffle(10000)
train_dataset = train_dataset.repeat(100)
train_dataset = train_dataset.batch(batch_size, drop_remainder=True)

In [0]:
# Change Validataion data to TF
valid_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
valid_dataset = valid_dataset.repeat()
valid_dataset = valid_dataset.batch(batch_size, drop_remainder=True)

## Model Resnet18

In [0]:
def Conv_BatchNorm(x, filters, kernel_size, strides):
    x = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    return x

In [0]:
def projection_block(tensor, num_filters, strides):
    # left stream
    x = Conv_BatchNorm(tensor, filters=num_filters, kernel_size=1, strides=strides) #[v]
    x = Conv_BatchNorm(x, filters=num_filters, kernel_size=3, strides=1)
    x = Conv2D(filters=4*num_filters, kernel_size=1, strides=1)(x)  # notice: filters=4*num_filters
    x = BatchNormalization()(x)
 
    # right stream
    proj_x = Conv2D(filters=4*num_filters, kernel_size=1, strides=strides)(tensor)  # notice: filters=4*num_filters
    proj_x = BatchNormalization()(proj_x)
 
    x = Add()([x, proj_x])
    x = ReLU()(x)
    return x

In [0]:
def identity_block(orig_x, num_filters):
    x = Conv_BatchNorm(orig_x, filters=num_filters, kernel_size=1, strides=1)
    x = Conv_BatchNorm(x, filters=num_filters, kernel_size=3, strides=1)
    x = Conv2D(filters=4*num_filters, kernel_size=1, strides=1)(x)  # notice: filters=4*num_filters
    x = BatchNormalization()(x)
 
    x = Add()([x, orig_x])
    x = ReLU()(x)
    return x

In [0]:
def resnet_block(x, filters, reps, strides):
    x = projection_block(x, num_filters=filters, strides=strides)
    for _ in range(reps-1):
        x = identity_block(x, num_filters=filters)
    return x

In [0]:
Inp = Input(shape=(28, 28, 1),name="Inp")

x = Conv_BatchNorm(Inp, filters=64, kernel_size=7, strides=2)  
x = MaxPool2D(pool_size=3, strides=2, padding='same')(x)
 
x = resnet_block(x, filters=64, reps=2, strides=1)
x = resnet_block(x, filters=128, reps=2, strides=2)  
x = resnet_block(x, filters=256, reps=2, strides=2) 
x = resnet_block(x, filters=512, reps=2, strides=2) 
 
x = GlobalAvgPool2D()(x)  
 
output = Dense(20, activation='softmax')(x)  
 
model = Model(Inp, output,name='Resnet18')

In [0]:
# Optimizer
opt = tf.keras.optimizers.Adam(lr = learning_rate)

model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer= opt,
              metrics=['accuracy'])

## Train

In [24]:
epochs = 7

hist = model.fit(train_dataset,
                 steps_per_epoch=train_step,
                 batch_size = batch_size,
                 epochs=epochs,
                 verbose=1,
                 callbacks = None,
                 validation_data=valid_dataset, 
                 validation_steps=valid_step)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


## Evaluate 

In [25]:
model.evaluate(valid_dataset, steps=valid_step)



[3.3005623817443848, 0.6101190447807312]