In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 
import ast
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
import keras
from keras import backend as K
from keras import Model
from keras import optimizers
from keras import layers
from keras.legacy import interfaces
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Flatten, Activation, GlobalAveragePooling2D, BatchNormalization
from keras.layers import GlobalMaxPooling2D
from keras.metrics import categorical_accuracy, top_k_categorical_accuracy, categorical_crossentropy
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.optimizers import Adam

from keras.optimizers import SGD
from keras import callbacks
from data import *
from keras_helper import *

from keras.applications import MobileNetV2
from keras.applications.mobilenetv2 import preprocess_input

Using TensorFlow backend.


In [2]:
TOT = 49673580
EPOCHS = 50
size = 64
batchsize = 640
lw = 6
channel = 1
STEPS = TOT / EPOCHS / batchsize / 3
NCATS = 340

In [9]:
def plotDoodles(x):
    plt.figure()
    cnt = x.shape[0]
    side = int(math.sqrt(cnt))
    fig, axs = plt.subplots(side, side)
    for i in range(side):
        for j in range(side):
            ax = axs[i,j] 
            ax.imshow(x[i * side + j].squeeze())
            ax.axis('off')
    plt.show()

def crop_center(image_data):
    non_empty_columns = np.where(image_data.max(axis=0)>0)[0]
    non_empty_rows = np.where(image_data.max(axis=1)>0)[0]
    cropBox = (min(non_empty_rows), max(non_empty_rows), min(non_empty_columns), max(non_empty_columns))

    image_data_new = image_data[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1]
    return image_data_new

def add_padding(img, pad_l, pad_t, pad_r, pad_b):
    height, width = img.shape
    #Adding padding to the left side.
    pad_left = np.zeros((height, pad_l), dtype = np.int)
    img = np.concatenate((pad_left, img), axis = 1)

    #Adding padding to the top.
    pad_up = np.zeros((pad_t, pad_l + width))
    img = np.concatenate((pad_up, img), axis = 0)

    #Adding padding to the right.
    pad_right = np.zeros((height + pad_t, pad_r))
    img = np.concatenate((img, pad_right), axis = 1)

    #Adding padding to the bottom
    pad_bottom = np.zeros((pad_b, pad_l + width + pad_r))
    img = np.concatenate((img, pad_bottom), axis = 0)

    return img

def center_image(imgnew, size):
    imgcent = imgnew
    pad_up = 0
    pad_down = 0
    pad_left = 0
    pad_right = 0
    
    if imgnew.shape[0] < size:
        pad_up = math.ceil((size - imgnew.shape[0]) / 2)
        pad_down = math.floor((size - imgnew.shape[0]) / 2)
        
    if imgnew.shape[1] < size:
        pad_left = math.ceil((size - imgnew.shape[1]) / 2)
        pad_right = math.floor((size - imgnew.shape[1]) / 2)
    
    imgcent = add_padding(imgnew, pad_left, pad_up, pad_right, pad_down)

    return imgcent

def pad_center(image_data, size):
    imgnew = crop_center(image_data)
#     print(imgnew.shape)
    imgnew = center_image(imgnew, size)
    return imgnew

def draw_cv2(raw_strokes, size=256, lw=6, time_color=True, center = True):
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for t, stroke in enumerate(raw_strokes):
        for i in range(len(stroke[0]) - 1):
            color = 255 - min(t, 10) * 13 if time_color else 255
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]),
                         (stroke[0][i + 1], stroke[1][i + 1]), color, lw)
            
    if size != BASE_SIZE:
        img = cv2.resize(img, (size, size))
        
    if center:
        img = pad_center(img, size)
        
    return img

def draw_cv2_pointcnts(raw_strokes, size=256, lw=2, center = True):
    points_cnt = [len(s[0]) for s in raw_strokes]
    min_cnt = min(points_cnt)
    max_cnt = max(points_cnt)
    if max_cnt > min_cnt:
        color_step = (200 / (max_cnt - min_cnt))
    else:
        color_step = 0
#     print(max_cnt, min_cnt, color_step)
    
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    
    for t, stroke in enumerate(raw_strokes):
        if color_step > 0:
            color = 255 - int(color_step * (max_cnt - len(stroke[0])))
            #print(color, color_step, len(stroke[0]), (len(stroke[0]) - min_cnt))
        else:
            color = 255
        
        for i in range(len(stroke[0]) - 1):
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]),
                         (stroke[0][i + 1], stroke[1][i + 1]), color, lw)
    if size != BASE_SIZE:
        img = cv2.resize(img, (size, size))
    
    if center:
        img = pad_center(img, size)
        
    return img
    
def get_line_length(stroke):
    length = 0
    for i in range(len(stroke[0]) - 1):
        x = stroke[0][i + 1] - stroke[0][i]
        y = stroke[1][i + 1] - stroke[1][i]
        step_length = (x**2 + y**2) **(1/2)
#         print(step_length)
        length += step_length
    return int(length)

def draw_cv2_linelength(raw_strokes, size=256, lw=2, center = True):
    lengths = [get_line_length(s) for s in raw_strokes]
    min_length = min(lengths)
    max_length = max(lengths)
    if max_length > min_length:
        color_step = 200 / (max_length - min_length)
    else:
        color_step = 0
#     print(max_cnt, min_cnt, color_step)
    
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    
    for t, stroke in enumerate(raw_strokes):
        if color_step > 0:
            color = 255 - int(color_step * (max_length - get_line_length(stroke)))
#             print(color, color_step, get_line_length(stroke))
        else:
            color = 255
        
        for i in range(len(stroke[0]) - 1):
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]),
                         (stroke[0][i + 1], stroke[1][i + 1]), color, lw)
    if size != BASE_SIZE:
        img = cv2.resize(img, (size, size))
    
    if center:
        img = pad_center(img, size)
        
    return img
    
def draw_cv2_whole(raw_strokes, size=256, lw=2, center = True):
    img = np.zeros((size, size, 3), np.uint8)
    img[..., 0] = draw_cv2(raw_strokes, size=size, lw=lw, time_color=True, center = center)
    img[..., 1] = draw_cv2_pointcnts(raw_strokes, size=size, lw=lw, center = center)
    img[..., 2] = draw_cv2_linelength(raw_strokes, size=size, lw=lw, center = center)
    
    return img

def mixup_onedata(data, labels, weight, index, batch_size):
    x = np.zeros_like(data, dtype=data.dtype)
    y = np.zeros_like(labels, dtype=labels.dtype)
    
    x1, x2 = data, data[index]
    y1, y2 = labels, labels[index]
    
    for i in range(batch_size):
        x[i] = x1[i] * weight[i] + x2[i] * (1 - weight[i])
        y[i] = y1[i] * weight[i] + y2[i] * (1 - weight[i])
    return x, y

def mixup_all(data, labels, alpha):
    batch_size = len(labels)
    weight = np.random.beta(alpha, alpha, batch_size)
    index = np.random.permutation(batch_size)
    
    return mixup_onedata(data, labels, weight, index, batch_size)
    
def image_generator_xd(size, batchsize, lw=2, 
                       df_path = '../input/train_all.csv', time_color=True, preprocess_input = None,
                       channel = 1, mixup = 0, center = True):
    while True:
        for df in pd.read_csv(df_path, chunksize=batchsize):
            df['drawing'] = df['drawing'].apply(json.loads)
            x = np.zeros((len(df), size, size, channel), dtype=np.uint8)
            for i, raw_strokes in enumerate(df.drawing.values):
                if channel == 1:
                    x[i, :, :, 0] = draw_cv2(raw_strokes, size=size, 
                                             lw=lw,
                                             channel = channel, center = center)
                else:
                    x[i, :, :, :] = draw_cv2_whole(raw_strokes, size=size, 
                                             lw=lw, center = center)
            
            if 'word' in df:
                y = keras.utils.to_categorical(df.word, num_classes=NCATS)

                if mixup > 0:
                    x, y = mixup_all(x, y, mixup)

                if preprocess_input is not None:
                    x = preprocess_input(x).astype(np.float32)

                yield x, y  
            else:
                if preprocess_input is not None:
                    x = preprocess_input(x.astype(np.float32)).astype(np.float32)
                yield x
            
def df_to_image_array_xd(df, size, lw=2, 
                         time_color=True, preprocess_input = None,
                         channel = 1, center = True):
    df['drawing'] = df['drawing'].apply(json.loads)
    x = np.zeros((len(df), size, size, channel ), dtype=np.uint8)
    for i, raw_strokes in enumerate(df.drawing.values):
        if channel == 1:
            img = draw_cv2(raw_strokes, size=size, 
                                     lw=lw, center = center)
#             print(img.shape)
            x[i, :, :, 0] = img
        else:
            x[i, :, :, :] = draw_cv2_whole(raw_strokes, size=size, 
                                     lw=lw, center = center)
    if preprocess_input is not None:
        print('x shape',x.shape, 'x max', x.max())
        x = preprocess_input(x.astype(np.float32)).astype(np.float32)
    return x 

In [8]:
valid_df = pd.read_csv('../input/valid.csv')
x_valid = df_to_image_array_xd(valid_df, size, lw=lw, 
                               preprocess_input=preprocess_input, 
                               channel=channel)
y_valid = keras.utils.to_categorical(valid_df.word, num_classes=NCATS)

x shape (34000, 64, 64, 1) x max 255


In [11]:
print(x_valid.shape, y_valid.shape)
print('Validation array memory {:.2f} GB'.format(x_valid.nbytes / 1024.**3 ))

(34000, 64, 64, 1) (34000, 340)
Validation array memory 0.52 GB


In [10]:
K.clear_session()

alpha = 1.
dropout=0.1

base_model = MobileNetV2(input_shape=(size, size, channel), alpha=alpha, 
                       weights=None, include_top=False)
x = base_model.output
x = GlobalMaxPooling2D()(x)
shape = (1, 1, int(1280 * alpha))
x = layers.Reshape(shape, name='reshape_1')(x)
x = layers.Dropout(dropout, name='dropout')(x)
x = layers.Conv2D(NCATS, (1, 1),
                          padding='same',
                          name='conv_preds')(x)
x = layers.Activation('softmax', name='act_softmax')(x)
predictions = layers.Reshape((NCATS,), name='reshape_2')(x)
model = Model(inputs=base_model.input, outputs=predictions)

#model = MobileNetV2(input_shape=(size, size, 1), alpha=1., weights=None, classes=NCATS)

model.compile(optimizer=Adam(lr=0.002), loss='categorical_crossentropy',
              metrics=[categorical_crossentropy, categorical_accuracy, top_3_accuracy])
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 64, 64, 1)    0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 65, 65, 1)    0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 32, 32, 32)   288         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 32, 32, 32)   128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu

In [12]:
train_datagen = image_generator_xd(size=size, batchsize=batchsize, lw=lw, 
                                   preprocess_input=preprocess_input,
                                  channel=channel)

In [None]:
model_prefix = 'mobilenetv2_maxpool_{}_lw{}'.format(size, lw)
fold = 9
callbks = [
    ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=5,
                      min_delta=0.005, mode='max', cooldown=3, verbose=1),
    callbacks.ModelCheckpoint("./models/{}.model".format(model_prefix),
                                monitor='val_categorical_accuracy', 
                                mode = 'max', save_best_only=True, verbose=1),
    callbacks.TensorBoard(log_dir='./log/{}'.format(model_prefix)),
]

In [None]:
hists = []
hist = model.fit_generator(
    train_datagen, steps_per_epoch=STEPS, epochs=EPOCHS, verbose=1,
    validation_data=(x_valid, y_valid),
    callbacks = callbks
)

In [None]:
model.load_weights('./models/{}.model'.format(model_prefix))

In [None]:
test = pd.read_csv('../input/test_simplified.csv')
test.head()
x_test = df_to_image_array_xd(test, size, lw=lw, 
                              preprocess_input=preprocess_input,
                             channel=channel)
print(test.shape, x_test.shape)
print('Test array memory {:.2f} GB'.format(x_test.nbytes / 1024.**3 ))

In [None]:
test_predictions = model.predict(x_test, batch_size=128, verbose=1)

top3 = preds2catids(test_predictions)
top3.head()
top3.shape

In [None]:
np_classes = np.load('../input/classes.npy')
id2cat = {k: cat.replace(' ', '_') for k, cat in enumerate(np_classes)}
top3cats = top3.replace(id2cat)
top3cats.head()
top3cats.shape

In [None]:
test['word'] = top3cats['a'] + ' ' + top3cats['b'] + ' ' + top3cats['c']
submission = test[['key_id', 'word']]
submission.head()

In [None]:
import kaggle_util
kaggle_util.save_result(submission, 
                        '../result/{}.csv'.format(model_prefix), 
                        'quickdraw-doodle-recognition', 
                        send=True, index=False)