In [0]:
#@title Dependencies
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import ast
import os
import json
import datetime as dt
from tqdm import tqdm

import cv2
import math
import numpy as np
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [16, 10]
plt.rcParams['font.size'] = 14

import tensorflow as tf
import keras
from keras.models import Model
from keras.models import Sequential
from keras.layers import concatenate, Input, Conv2D, MaxPooling2D
from keras.layers import GlobalAveragePooling2D, LSTM, Bidirectional, Conv1D, BatchNormalization, Dense, Dropout, Flatten, Activation

from keras.metrics import categorical_accuracy, top_k_categorical_accuracy, categorical_crossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, CSVLogger
from keras.optimizers import Adam

from keras.applications import MobileNet
from keras.applications.mobilenet import preprocess_input
from keras.preprocessing.sequence import pad_sequences

start = dt.datetime.now()

In [0]:
#@title Auth for GDrive

from google.colab import drive
from google.colab import auth
auth.authenticate_user()
from googleapiclient.discovery import build
drive_service = build('drive', 'v3')

In [0]:
#@title Download npy files from GDrive

file_ids = {
}

import numpy as np
import io
from googleapiclient.http import MediaIoBaseDownload

data = {}

for file_name, file_id in file_ids.items():
    request = drive_service.files().get_media(fileId=file_id) 
    downloaded = io.BytesIO()
    downloader = MediaIoBaseDownload(downloaded, request)
    done = False

    while done is False:
        status, done = downloader.next_chunk()
        print('Download %d%%.' % int(status.progress() * 100))
        
    downloaded.seek(0)
    data[file_name] = np.load(downloaded)

In [0]:
#@title Code: Directory Downloader {display-mode: "form"}

# This code will be hidden when the notebook is loaded.

!pip install -U -q PyDrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

def download_drive_dir(local_dir, folder_id, whitelist=[]):
    """
    params:
        local_dir: Colaboratory directory
        folder_id: Google Drive folder ID
    """
    local_download_path = os.path.expanduser(local_dir) 
    try:
        os.makedirs(local_download_path)
    except Exception as e:
        print('Error creating path:', e)

    file_list = drive.ListFile(
        {'q': "'{}' in parents".format(folder_id)}).GetList()

    for f in tqdm(file_list):
        if not len(whitelist) or f['title'] in whitelist:
            # print('title: %s, id: %s' % (f['title'], f['id']))
            fname = os.path.join(local_download_path, f['title'])
            # print('downloading to {}'.format(fname))
            f_ = drive.CreateFile({'id': f['id']})
            f_.GetContentFile(fname)

In [0]:
download_drive_dir('./models', '1Fjv7uXEc92yrbg7drIryB4ZdtOp-aLgQ')
download_drive_dir('./pretrained_models', '1swZb9ootRNXeSXR7iaTmVZsccsRK-UW9')

Error creating path: [Errno 17] File exists: './models'


100%|██████████| 23/23 [00:19<00:00,  1.11it/s]


Error creating path: [Errno 17] File exists: './pretrained_models'


100%|██████████| 19/19 [00:15<00:00,  1.17it/s]


In [0]:
download_drive_dir('./input/shuffle-csvs', '1H8ogDcbBGsgAJkxaOxXd-XR3ZH14su3i')

In [0]:
NCATS = 340
image_size = 128
size = 128
time_color = True
stroke_size = 3
DP_DIR = './input/shuffle_csvs/'

In [0]:
#@title Utils
sampler = {image_size:[round(i*(image_size-10)/256)+5 for i in range(512)] for image_size in [128, 224, 299, 331]}
stroke_color = defaultdict(lambda: 125 if time_color else 255,
        {t:255-min(t,10)*13 for t in range(10)} if time_color else {})
def f2cat(filename): return filename.split('.')[0]
def list_all_categories(): return sorted([f2cat(f) for f in os.listdir(os.path.join(INPUT_DIR, 'train_simplified'))], key=str.lower)
def apk(actual, predicted, k=3):
    if len(predicted) > k: predicted = predicted[:k]
    score, num_hits = 0.0, 0.0
    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i + 1.0)
    if len(actual)==0: return 0.0
    return score / min(len(actual), k)
def mapk(actual, predicted, k=3): return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])
def preds2catids(predictions): return pd.DataFrame(np.argsort(-predictions, axis=1)[:, :3], columns=['a', 'b', 'c'])
def top_3_accuracy(y_true, y_pred): return top_k_categorical_accuracy(y_true, y_pred, k=3)
log_keys = ('loss', 'val_loss', 'categorical_accuracy', 'top_3_accuracy', 'val_categorical_accuracy', 'val_top_3_accuracy')
def print_history(hists): print("\n".join(["\t".join(["%.3f"%hist.history[key][-1] for key in log_keys]) for hist in hists]))

In [0]:
#@title Image processing utils
def _stack_it(stroke_vec):
    """preprocess the string and make a standard Nx3 stroke vector"""
    # unwrap the list
    in_strokes = [(xi,yi,i)
          for i,(x,y) in enumerate(stroke_vec)
          for xi,yi in zip(x,y)]
    c_strokes = np.stack(in_strokes)

    # replace stroke id with 1 for continue, 2 for new
    c_strokes[:,2] = [1]+np.diff(c_strokes[:,2]).tolist()
    c_strokes[:,2] += 1 # since 0 is no stroke
    
    # pad the strokes with zeros
    return pad_sequences(c_strokes.swapaxes(0, 1), 
                         maxlen=70, 
                         padding='post').swapaxes(0, 1)

def draw_cv2(img, raw_strokes, image_size):
    for t in range(len(raw_strokes)-1, -1, -1):
        stroke = raw_strokes[t]
        for i in range(len(stroke[0]) - 1):
            cv2.line(img,
                (int(sampler[image_size][stroke[0][i]]), int(sampler[image_size][stroke[1][i]])),
                (int(sampler[image_size][stroke[0][i+1]]), int(sampler[image_size][stroke[1][i+1]])),
                stroke_color[i], stroke_size)

def sample(strokes, downsize_to=size-2):
    division = 256 / downsize_to
    for i in range(len(strokes)):
        for j in range(len(strokes[i])):
            for k in range(len(strokes[i][j])):
                strokes[i][j][k] = round(strokes[i][j][k]/division)+1
    return strokes

def image_generator_xd(batchsize, ks, image_size):
    while True:
        for k in np.random.permutation(ks):
            filename = os.path.join(DP_DIR, 'train_k{}.csv'.format(k))
            for df in pd.read_csv(filename, chunksize=batchsize):
                df.drawing = df.drawing.apply(json.loads)
                
                x = np.zeros((len(df), image_size, image_size, 1))
                for i, raw_strokes in enumerate(df.drawing.values):
                    draw_cv2(x[i], raw_strokes, image_size)
                x = np.repeat(x, 3, axis=3)
                x = preprocess_input(x).astype(np.float32)
                
                y = keras.utils.to_categorical(df.y, num_classes=NCATS)
                yield x, y
                
def image_generator_test(filename, batchsize, image_size, repeat_vec=True):
    for df in pd.read_csv(filename, chunksize=batchsize):
        df.drawing = df.drawing.apply(json.loads)

        x = np.zeros((len(df), image_size, image_size, 1))
        for i, raw_strokes in enumerate(df.drawing.values):
            draw_cv2(x[i], raw_strokes, image_size)
        if repeat_vec:
            x = np.repeat(x, 3, axis=3)
        x = preprocess_input(x).astype(np.float32)

        yield x

In [0]:
test = pd.read_csv('test_simplified.csv')

In [0]:
#@title MobileNet RNN Model
def mobilenet_rnn():
    base_model = MobileNet(input_shape=(image_size, image_size, 1), alpha=1.,
                      weights=None, dropout=0.2, classes=NCATS)
#     base_model.load_weights("mobilenet.h5")

#     for layer in base_model.layers[:-12]:
#         layer.trainable = False

    base_model = Sequential(base_model.layers)

    inp = Input(shape = (70,3))

    x = BatchNormalization()(inp)

    x = Conv1D(256, (5,), activation = "relu")(x)
    x = Dropout(0.2)(x)

    x = Conv1D(256, (5,), activation = 'relu')(x)
    x = Dropout(0.2)(x)

    x = Conv1D(256, (3,), activation = 'relu')(x)
    x = Dropout(0.2)(x)

    x = Bidirectional(LSTM(128, return_sequences = True))(x)
    x = Dropout(0.2)(x)

    x = Bidirectional(LSTM(128, return_sequences = False))(x)
    x = Dropout(0.2)(x)

    x = Dense(512, activation = 'relu')(x)

    stroke_read_model = Model(inp, x)
    stroke_read_model = Sequential(stroke_read_model.layers)

    inp1 = base_model.input
    out1 = base_model.output

    inp2 = Input(shape = (70, 3))
    out2 = stroke_read_model(inp2)

    x = concatenate([out1, out2])
    x = Dropout(0.2)(x)
    x = Dense(NCATS, activation='softmax')(x)
    model = Model([inp1, inp2], x)

    return model
    

In [0]:
def generate_model(model_generator):
    model = model_generator(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
    out = model.output
    out = GlobalAveragePooling2D()(out)
    out = Dense(1000, activation="relu")(out)
    out = Dense(NCATS, activation="softmax")(out)

    model = Model(model.input, out)
    model.compile(optimizer=Adam(), loss='categorical_crossentropy',
                  metrics=[categorical_accuracy, top_3_accuracy])
    return model

from keras.applications import mobilenet, resnet50, xception, inception_v3

_mobilenet_rnn = mobilenet_rnn()
_mobilenet_rnn.load_weights("./models/MobilenetRNN.128.256.2000.02-022524.30-0.74.h5")

_mobilenet_2 = MobileNet(input_shape=(image_size, image_size, 1), alpha=1.,
                  weights=None, dropout=0.2, classes=NCATS)
_mobilenet_2.load_weights("./models/FastLoader.128.256.2000.10-0.75.h5")

_inception = generate_model(inception_v3.InceptionV3)
_inception.load_weights("./pretrained_models/InceptionV3.02-071834-10-1.08.h5")

_mobilenet_1 = MobileNet(input_shape=(image_size, image_size, 1), alpha=1.,
                  weights=None, dropout=0.2, classes=NCATS)
_mobilenet_1.load_weights("./models/MobileNet.01-0.88.hdf5")

_mobilenet = generate_model(mobilenet.MobileNet)
_mobilenet.load_weights("./pretrained_models/MobileNetv1.02-071834-10-1.01.h5")

_resnet50 = generate_model(resnet50.ResNet50)
_resnet50.load_weights("./pretrained_models/ResNet50.02-071834-10-1.07.h5")

_xception = generate_model(xception.Xception)
_xception.load_weights("./pretrained_models/Xception.02-071834-10-1.02.h5")

In [0]:
batchsize=1024
        
valid_datagen = image_generator_xd(batchsize, range(190, 200), 128)

def predict(model, repeat_vec=True):
    return model.predict_generator(
        image_generator_test('./test_simplified.csv', batchsize, 128, repeat_vec=repeat_vec), 
        steps=int(len(test)/batchsize)+1)

In [0]:
from google.colab import files

npy_mobilenet_rnn = predict(_mobilenet_rnn)
np.save('mobilenet_rnn', npy_mobilenet_rnn); files.download('mobilenet_rnn.npy')
npy_mobilenet_2 = predict(_mobilenet_2, repeat_vec=False)
np.save('mobilenet_probas_2', npy_mobilenet); files.download('mobilenet_probas_2.npy')
npy_inception = predict(_inception)
np.save('inception_probas', npy_inception); files.download('inception_probas.npy')
npy_mobilenet_1 = predict(_mobilenet_1, repeat_vec=False)
np.save('mobilenet_probas_1', npy_mobilenet); files.download('mobilenet_probas_1.npy')
npy_mobilenet = predict(_mobilenet)
np.save('mobilenet_probas', npy_mobilenet); files.download('mobilenet_probas.npy')
npy_xception = predict(_xception)
np.save('xception_probas', npy_xception); files.download('xception_probas.npy')
npy_resnet50 = predict(_resnet50)
np.save('resnet_probas', npy_resnet50); files.download('resnet_probas.npy')

In [0]:
# Create Submission

import numpy as np
import pandas as pd
from google.colab import files

cats = ['airplane', 'alarm clock', 'ambulance', 'angel', 'animal migration', 'ant', 'anvil', 'apple', 'arm', 'asparagus', 'axe', 'backpack', 'banana', 'bandage', 'barn', 'baseball', 'baseball bat', 'basket', 'basketball', 'bat', 'bathtub', 'beach', 'bear', 'beard', 'bed', 'bee', 'belt', 'bench', 'bicycle', 'binoculars', 'bird', 'birthday cake', 'blackberry', 'blueberry', 'book', 'boomerang', 'bottlecap', 'bowtie', 'bracelet', 'brain', 'bread', 'bridge', 'broccoli', 'broom', 'bucket', 'bulldozer', 'bus', 'bush', 'butterfly', 'cactus', 'cake', 'calculator', 'calendar', 'camel', 'camera', 'camouflage', 'campfire', 'candle', 'cannon', 'canoe', 'car', 'carrot', 'castle', 'cat', 'ceiling fan', 'cell phone', 'cello', 'chair', 'chandelier', 'church', 'circle', 'clarinet', 'clock', 'cloud', 'coffee cup', 'compass', 'computer', 'cookie', 'cooler', 'couch', 'cow', 'crab', 'crayon', 'crocodile', 'crown', 'cruise ship', 'cup', 'diamond', 'dishwasher', 'diving board', 'dog', 'dolphin', 'donut', 'door', 'dragon', 'dresser', 'drill', 'drums', 'duck', 'dumbbell', 'ear', 'elbow', 'elephant', 'envelope', 'eraser', 'eye', 'eyeglasses', 'face', 'fan', 'feather', 'fence', 'finger', 'fire hydrant', 'fireplace', 'firetruck', 'fish', 'flamingo', 'flashlight', 'flip flops', 'floor lamp', 'flower', 'flying saucer', 'foot', 'fork', 'frog', 'frying pan', 'garden', 'garden hose', 'giraffe', 'goatee', 'golf club', 'grapes', 'grass', 'guitar', 'hamburger', 'hammer', 'hand', 'harp', 'hat', 'headphones', 'hedgehog', 'helicopter', 'helmet', 'hexagon', 'hockey puck', 'hockey stick', 'horse', 'hospital', 'hot air balloon', 'hot dog', 'hot tub', 'hourglass', 'house', 'house plant', 'hurricane', 'ice cream', 'jacket', 'jail', 'kangaroo', 'key', 'keyboard', 'knee', 'ladder', 'lantern', 'laptop', 'leaf', 'leg', 'light bulb', 'lighthouse', 'lightning', 'line', 'lion', 'lipstick', 'lobster', 'lollipop', 'mailbox', 'map', 'marker', 'matches', 'megaphone', 'mermaid', 'microphone', 'microwave', 'monkey', 'moon', 'mosquito', 'motorbike', 'mountain', 'mouse', 'moustache', 'mouth', 'mug', 'mushroom', 'nail', 'necklace', 'nose', 'ocean', 'octagon', 'octopus', 'onion', 'oven', 'owl', 'paint can', 'paintbrush', 'palm tree', 'panda', 'pants', 'paper clip', 'parachute', 'parrot', 'passport', 'peanut', 'pear', 'peas', 'pencil', 'penguin', 'piano', 'pickup truck', 'picture frame', 'pig', 'pillow', 'pineapple', 'pizza', 'pliers', 'police car', 'pond', 'pool', 'popsicle', 'postcard', 'potato', 'power outlet', 'purse', 'rabbit', 'raccoon', 'radio', 'rain', 'rainbow', 'rake', 'remote control', 'rhinoceros', 'river', 'roller coaster', 'rollerskates', 'sailboat', 'sandwich', 'saw', 'saxophone', 'school bus', 'scissors', 'scorpion', 'screwdriver', 'sea turtle', 'see saw', 'shark', 'sheep', 'shoe', 'shorts', 'shovel', 'sink', 'skateboard', 'skull', 'skyscraper', 'sleeping bag', 'smiley face', 'snail', 'snake', 'snorkel', 'snowflake', 'snowman', 'soccer ball', 'sock', 'speedboat', 'spider', 'spoon', 'spreadsheet', 'square', 'squiggle', 'squirrel', 'stairs', 'star', 'steak', 'stereo', 'stethoscope', 'stitches', 'stop sign', 'stove', 'strawberry', 'streetlight', 'string bean', 'submarine', 'suitcase', 'sun', 'swan', 'sweater', 'swing set', 'sword', 't-shirt', 'table', 'teapot', 'teddy-bear', 'telephone', 'television', 'tennis racquet', 'tent', 'The Eiffel Tower', 'The Great Wall of China', 'The Mona Lisa', 'tiger', 'toaster', 'toe', 'toilet', 'tooth', 'toothbrush', 'toothpaste', 'tornado', 'tractor', 'traffic light', 'train', 'tree', 'triangle', 'trombone', 'truck', 'trumpet', 'umbrella', 'underwear', 'van', 'vase', 'violin', 'washing machine', 'watermelon', 'waterslide', 'whale', 'wheel', 'windmill', 'wine bottle', 'wine glass', 'wristwatch', 'yoga', 'zebra', 'zigzag']

id2cat = {k: cat.replace(' ', '_') for k, cat in enumerate(cats)}

# test length x num categories
m0 = np.load('mobilenet_probas_2.npy')
m1 = np.load('mobilenet_probas_1.npy')
m2 = np.load('xception_probas (1).npy')
m3 = np.load('resnet_probas (1).npy')
m4 = np.load('mobilenet_probas (1).npy')
m5 = np.load('inception_probas.npy')

# test_probas = [m0, m1, m2, m3, m4, m5]
# weights = [92, 88, 74, 73, 74, 71]

test_probas = [m0]
weights = [92]

test_probas = [a*b for a, b in zip(test_probas, weights)]
M, N = test_probas[0].shape

for arr in test_probas:
    assert((M, N) == arr.shape)

# Sum up the probabilities
test_probas = np.array(test_probas)
probas_sum = np.sum(test_probas, axis=0)

predictions = []
for i in range(M):
    top_3 = np.argsort(probas_sum[i, :])[::-1][:3]# np.argpartition(probas_sum[i, :], -3)[:3]
    top_3_cat = ' '.join(map(id2cat.get, top_3))
    predictions.append(top_3_cat)

test = pd.read_csv('./test_simplified.csv')
test['word'] = pd.Series(predictions)
submission = test[['key_id', 'word']]

submission.head()
submission.shape
submission.to_csv('ensemble_submission.csv', index=False)

files.download('ensemble_submission.csv')d
