In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 
import ast
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
import keras
from keras import backend as K
from keras import Model
from keras import optimizers
from keras.legacy import interfaces
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Flatten, Activation, GlobalAveragePooling2D, BatchNormalization
from keras.metrics import categorical_accuracy, top_k_categorical_accuracy, categorical_crossentropy
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.optimizers import Adam
from keras.utils.training_utils import multi_gpu_model

from keras.optimizers import SGD
from keras import callbacks
from data import *
from keras_helper import *

from keras.applications import Xception
from keras.applications.xception import preprocess_input

Using TensorFlow backend.


In [2]:
def image_generator_xd(size, batchsize, lw=2, 
                       df_path = '../input/train_all.csv', time_color=True, preprocess_input = None,
                       channel = 1, mixup = 0, center = False):
    while True:
        for df in pd.read_csv(df_path, chunksize=batchsize):
            df['drawing'] = df['drawing'].apply(json.loads)
            x = np.zeros((len(df), size, size, channel), dtype=np.uint8)
            for i, raw_strokes in enumerate(df.drawing.values):
                x[i, :, :, :] = draw_cv2_parts_opt(raw_strokes, size=size, 
                                             lw=lw, center = center)
            
            if 'word' in df:
                y = keras.utils.to_categorical(df.word, num_classes=NCATS)
                if preprocess_input is not None:
                    x = preprocess_input(x.astype(np.float32)).astype(np.float32)

                yield x, y  
            else:
                if preprocess_input is not None:
                    x = preprocess_input(x.astype(np.float32)).astype(np.float32)
                yield x
            
def df_to_image_array_xd(df, size, lw=2, 
                         time_color=True, preprocess_input = None,
                         channel = 1, center = False):
    df['drawing'] = df['drawing'].apply(json.loads)
    x = np.zeros((len(df), size, size, channel ), dtype=np.uint8)
    for i, raw_strokes in enumerate(df.drawing.values):
        x[i, :, :, :] = draw_cv2_parts_opt(raw_strokes, size=size, 
                                     lw=lw, center = center)
    if preprocess_input is not None:
        print('x shape',x.shape, 'x max', x.max())
        x = preprocess_input(x.astype(np.float32)).astype(np.float32)
    return x 

In [3]:
# TOT = 35534421
TOT = 46613580 - 340000
EPOCHS = 70
size = 71
batchsize = 340
lw = 6
channel = 3
STEPS = TOT / EPOCHS / batchsize
NCATS = 340
mixup = 0
center = False
optimizer = 'adam'
cb = 'raw'

In [4]:
K.clear_session()
# model = Xception(input_shape=(size, size, channel), weights=None, classes=NCATS)
model = Xception(input_shape=(None,None,3), weights=None, classes=NCATS)

if optimizer == 'sgd':
    opt = SGD(0.002, momentum=0.9, nesterov=True)
    opt = NormalizedOptimizer(opt, normalization='l2')
elif optimizer == 'adam':
    opt = Adam(lr=0.0005)

# model = multi_gpu_model(model, gpus=2)

# model.load_weights('models/xception128_parts2_lw6_balance_0_adam_parts.model')

model.compile(optimizer=opt, loss='categorical_crossentropy',
              metrics=[categorical_crossentropy, categorical_accuracy, top_3_accuracy])
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, None, None, 3 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, None, None, 3 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, None, None, 3 0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

In [5]:
valid_df = pd.read_csv('../input/valid.csv')
x_valid = df_to_image_array_xd(valid_df, size, lw=lw, preprocess_input=preprocess_input, 
                               channel=channel, center = center)
y_valid = keras.utils.to_categorical(valid_df.word, num_classes=NCATS)

x shape (34000, 71, 71, 3) x max 255


In [6]:
print(x_valid.shape, y_valid.shape)
print('Validation array memory {:.2f} GB'.format(x_valid.nbytes / 1024.**3 ))

(34000, 71, 71, 3) (34000, 340)
Validation array memory 1.92 GB


In [7]:
train_datagen = image_generator_xd(df_path = '../input/train_all.csv', 
                                   size=size, batchsize=batchsize, lw=lw, 
                                   preprocess_input=preprocess_input,
                                  channel=channel, mixup=mixup, center=center)

In [8]:
model_prefix = 'xception{}_parts_opt_lw{}_balance_{}_{}'.format(size, lw, mixup, optimizer, cb)
if center:
    model_prefix += '_center'
if channel == 3:
    model_prefix += '_parts'
print(model_prefix)
check_path = "./models/{}.model".format(model_prefix)
fold = 9
if cb == 'raw':
    callbks = [
        ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=5,
                          min_delta=0.005, mode='max', cooldown=3, verbose=1),
        callbacks.ModelCheckpoint(check_path,
                                    monitor='val_categorical_accuracy', 
                                    mode = 'max', 
                                  save_best_only=True, save_weights_only=True,
                                  verbose=1),
        callbacks.TensorBoard(log_dir='./log/{}'.format(model_prefix)),
        EarlyStopping(monitor='val_categorical_accuracy', mode='max',patience=20, verbose=1)
    ]
elif cb == 'snap':
    snapshot = SnapshotCallbackBuilder(nb_epochs=EPOCHS,
                                       nb_snapshots=3,
                                       size = size,
                                       init_lr=1e-3, 
                                       fold=fold)

    callbks = snapshot.get_callbacks(model_prefix = model_prefix)

xception71_parts_opt_lw6_balance_0_adam_parts


In [None]:
hists = []
hist = model.fit_generator(
    train_datagen, steps_per_epoch=STEPS, epochs=EPOCHS, verbose=1,
    validation_data=(x_valid, y_valid),
    callbacks = callbks
)

Epoch 1/70

Epoch 00001: val_categorical_accuracy improved from -inf to 0.61291, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 2/70

Epoch 00002: val_categorical_accuracy improved from 0.61291 to 0.62682, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 3/70

Epoch 00003: val_categorical_accuracy improved from 0.62682 to 0.67844, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 4/70

Epoch 00004: val_categorical_accuracy did not improve from 0.67844
Epoch 5/70

Epoch 00005: val_categorical_accuracy improved from 0.67844 to 0.70797, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 6/70

Epoch 00006: val_categorical_accuracy improved from 0.70797 to 0.72306, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 7/70

Epoch 00007: val_categorical_accuracy did not improve from 0.72306
Epoch 8/70

Epoch 00008: val_categorical


Epoch 00020: val_categorical_accuracy improved from 0.79135 to 0.79268, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 21/70

Epoch 00021: val_categorical_accuracy did not improve from 0.79268
Epoch 22/70

Epoch 00022: val_categorical_accuracy improved from 0.79268 to 0.79421, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 23/70

Epoch 00023: val_categorical_accuracy improved from 0.79421 to 0.79462, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 24/70

Epoch 00024: val_categorical_accuracy improved from 0.79462 to 0.79624, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 25/70

Epoch 00025: val_categorical_accuracy improved from 0.79624 to 0.79632, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 26/70

Epoch 00026: val_categorical_accuracy did not improve from 0.79632
Epoch 27/70

Epoch 00027: val_categorical_


Epoch 00039: val_categorical_accuracy did not improve from 0.81362
Epoch 40/70

Epoch 00040: val_categorical_accuracy did not improve from 0.81362
Epoch 41/70

Epoch 00041: val_categorical_accuracy improved from 0.81362 to 0.81385, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 42/70

Epoch 00042: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 00042: val_categorical_accuracy improved from 0.81385 to 0.81471, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 43/70

Epoch 00043: val_categorical_accuracy improved from 0.81471 to 0.81585, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 44/70

Epoch 00044: val_categorical_accuracy improved from 0.81585 to 0.81638, saving model to ./models/xception71_parts_opt_lw6_balance_0_adam_parts.model
Epoch 45/70

Epoch 00045: val_categorical_accuracy did not improve from 0.81638
Epoch 46/70

Epoch 00046: val_categorical_a

In [None]:
test = pd.read_csv('../input/test_simplified.csv')
test.head()
x_test = df_to_image_array_xd(test, size, lw=lw, 
                              preprocess_input=preprocess_input,
                             channel=channel, center = center)
print(test.shape, x_test.shape)
print('Test array memory {:.2f} GB'.format(x_test.nbytes / 1024.**3 ))

np_classes = np.load('../input/classes.npy')
id2cat = {k: cat.replace(' ', '_') for k, cat in enumerate(np_classes)}


In [None]:
def doodle_predict(model, model_path, x_test):
    model.load_weights(model_path)

    test_predictions = model.predict(x_test, batch_size=128, verbose=1)
    top3 = preds2catids(test_predictions)
    top3cats = top3.replace(id2cat)
    test['word'] = top3cats['a'] + ' ' + top3cats['b'] + ' ' + top3cats['c']
    submission = test[['key_id', 'word']]

    import kaggle_util
    kaggle_util.save_result(submission,  
                            '../result/{}.csv'.format(model_prefix), 
                            'quickdraw-doodle-recognition', 
                            send=True, index=False)

In [None]:
doodle_predict(model, check_path, x_test)