In [1]:
from __future__ import print_function
from future_builtins import map, filter, zip

In [2]:
import tensorflow as tf
import keras
import coremltools
import numpy as np

for name, package in [('tensorflow',tf), ('keras',keras), ('coremltools',coremltools), ('numpy', np)]:
    try:
        print(name + ' v' + package.__version__)
    except AttributeError:
        print(name + ' v?')

Using TensorFlow backend.


tensorflow v1.4.0
keras v2.1.1
coremltools v?
numpy v1.13.3


In [3]:
# Load newline-delimited list of categories

# f = open('quickdraw-categories.txt','r')
f = open('50-categories.txt','r')
categories = [line.rstrip() for line in f]
f.close()

filename_from_category = lambda s: '../../quickdraw/'+s+'.npy'
filenames = list(map(filename_from_category, categories))

In [4]:
# Test that all files exist according to categories
# in quickdraw-categories.txt

import os.path

all_exist = True
for filename in filenames:
    if not os.path.isfile(filename):
        print('file `{}` does not exist'.format(filename))
        all_exist = False
if all_exist:
    print('All {} files found!'.format(len(categories)))

All 50 files found!


In [5]:
image_size = (28,28)
samples_per_category = 500

In [27]:
def one_hot_vector(index):
    hot = np.zeros(len(categories),dtype=np.int8)
    hot[index] = 1
    return hot

def one_hot_array(index):
    is_column = False
    hot = np.zeros((1,len(categories)),dtype=np.int8)
    hot[0,index] = 1
    if is_column:
        return hot.T
    else:
        return hot

In [29]:
import random

def dataset_generator():
    categories_and_filenames = list(zip(categories, filenames))
    # Shuffle order
    random.shuffle(categories_and_filenames)
    for category_index, (category, filename) in enumerate(categories_and_filenames):
        label_one_hot_encoded = one_hot_array(category_index)
        category_images = np.load(filename)
        n_images = category_images.shape[0]
        sample_indices = random.sample(range(n_images), min(samples_per_category,n_images))
        for image in category_images[sample_indices]:
            image = np.matrix(image)
            image = np.expand_dims(image, axis=1)
            yield (image, label_one_hot_encoded)
        yield None

In [30]:
import random

def make_dataset():
    data = []
    target = []
    for category_index, (category, filename) in enumerate(zip(categories, filenames)):
        label_one_hot_encoded = one_hot_vector(category_index)
        category_images = np.load(filename)
        n_images = category_images.shape[0]
        for image in category_images[:samples_per_category]:
#             image = np.matrix(image)
#             image = np.expand_dims(image, axis=1)
            data.append(image)
            target.append(label_one_hot_encoded)
    data = np.array(data)
    target = np.array(target)
    print(data.shape)
    print(target.shape)
    return (data, target)

In [31]:
import keras
from keras.models import Sequential
from keras.layers import Reshape, Dense, BatchNormalization, Dropout, \
                         Conv2D, MaxPooling2D, Activation, Flatten, Lambda

### PARAMS ###
input_shape=image_size
n_filters=[32,64,64,256]
dense_sizes=[128,128]
output_size=len(categories)

batch_normalize = True

layer_activation='relu'
final_activation='softmax'

loss='categorical_crossentropy'
optimizer='rmsprop'
metrics=['categorical_accuracy']

batch_size=32
epochs=40
shuffle=False
verbose=2
    
def _make_convolution_layers(model):
    model.add(
        Reshape(
            (1, input_shape[0], input_shape[1]), 
            input_shape=(1, input_shape[0]*input_shape[1])
        )
    )
    
    model.add(
        Lambda(lambda x: x/127.5-1)
    )

    for filters in n_filters:
        # Add a single convolution layer
        model.add(
            Conv2D(
                filters=filters,
                kernel_size=(3,3),
                padding='same',
                activation=layer_activation
            )
        )
        # Add batch normalization to the convolution layer
        if batch_normalize:
            model.add(
                BatchNormalization(axis=1)
            )
        # Pool the layer
        model.add(
            MaxPooling2D(pool_size=(2,2), data_format='channels_first')
        )

    model.add(Flatten())

def _make_dense_layers(model):
    for size in dense_sizes:
        model.add(
            Dense(size, activation=layer_activation)
        )
        if batch_normalize:
            model.add(
                BatchNormalization(axis=1)
            )
    model.add(
        Dense(output_size, activation=final_activation)
    )

def one_hot_output(y_hat_raw):
    # predict() outputs floats. We want one-hot in most cases
    y_hat = np.zeros(y_hat_raw.shape, dtype=np.int)
    y_hat[np.arange(y_hat_raw.shape[0]), y_hat_raw.argmax(1)] = 1
    return y_hat    

model = Sequential()

_make_convolution_layers(model)
_make_dense_layers(model)

model.compile(
    loss=loss,
    optimizer=optimizer,
    metrics=metrics
)

In [34]:
data, target = make_dataset()
for image in data:
    image = np.matrix(image)
    image = np.expand_dims(image, axis=1)
print(data.shape)

(25000, 784)
(25000, 50)
(25000, 784)


In [20]:
hist = model.fit(
    x=data, 
    y=target, 
    batch_size=32, 
    epochs=40, 
    verbose=1, 
    callbacks=None, 
    validation_split=0.0, 
    validation_data=None, 
    shuffle=True, 
    class_weight=None, 
    sample_weight=None, 
    initial_epoch=0, 
    steps_per_epoch=None, 
    validation_steps=None
)

ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 25000 arrays: [array([[[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   ...

In [None]:
# from itertools import cycle

# samples = samples_per_category * len(categories)

# model.fit_generator(
#     cycle(dataset_generator()), 
#     steps_per_epoch=samples, 
#     epochs=40, 
#     verbose=1, 
#     callbacks=None, 
#     validation_data=None, 
#     validation_steps=None, 
#     class_weight=None, 
#     max_queue_size=10,
#     workers=1, 
#     use_multiprocessing=False, 
#     shuffle=True, 
#     initial_epoch=0
# )