# Load Packages and Data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import keras
import math

In [None]:
test = pd.read_csv('../input/quickdraw-doodle-recognition/test_simplified.csv')
print(test.shape)
test.head()

# Convert Test Strings to Arrays

In [None]:
def img_to_np(img_str, ht, wt, lw, pad):

    strokes = eval(img_str)

    ht_ = ht - 2*pad
    wt_ = wt - 2*pad

    img = np.zeros((ht, wt), np.uint8)

    for s in strokes:
        sx = (np.array(s[0]) * wt_ / 256).round().astype('int') + pad
        sy = (np.array(s[1]) * ht_ / 256).round().astype('int') + pad

        for i in range(len(sx) - 1):
            p1 = (sx[i],   sy[i])
            p2 = (sx[i+1], sy[i+1])
            img = cv2.line(img, p1, p2, (255, 0, 0), lw, lineType=cv2.LINE_AA)
            #img = cv2.resize(img, (ht, wt))
    return img


In [None]:
test_imgs = np.zeros(shape = (test.shape[0], 64, 64, 1))

In [None]:
%%time

for i, row in test.iterrows():
    test_imgs[i,:,:,0] = img_to_np(row.drawing, 64, 64, 1, 2) / 255
    
#test_imgs[0,:,:,:]

In [None]:
plt.figure(figsize=[12,12])
for i in range(64):
    plt.subplot(8,8,i+1)
    plt.imshow(test_imgs[i,:,:,0], cmap='binary')
    #plt.title(labels[i])
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
sample = test.sample(64)
sample.shape

plt.figure(figsize=[12,12])
for i in range(64):
    plt.subplot(8,8,i+1)
    plt.imshow(img_to_np(sample.drawing.values[i], 64, 64, 1, 2), cmap='binary')
    #plt.title(labels[i])
    plt.axis('off')
plt.tight_layout()
plt.show()

# Load Model

In [None]:
cnn = keras.models.load_model('../input/models-and-submissions/Quick Draw Models/model_v06.h5')
cnn.summary()

# Generate Predictions

In [None]:
test_imgs.shape

In [None]:
%%time 

probs = cnn.predict(test_imgs)

print(probs.shape)

# Distribution of Top 3 Probabilities

In [None]:
N_train = probs.shape[0]
top_3_probs = np.zeros(shape=(N_train, 3))

for i in range(N_train):
    p = probs[i, :]
    top_classes = np.argpartition(p, -3)[-3:]                      # Gives top 3 classes in increasing order
    top_classes = top_classes[np.argsort(p[top_classes])]      # Sorts in increasing order
    top_classes = np.flip(top_classes)                             # Flips the order.

    top_probs = p[top_classes]              

    top_3_probs[i,:] = top_probs
    
print(top_3_probs[:10, :].round(2))

print(top_3_probs.shape)

plt.figure(figsize=[10,6])
for i in range(3):
    plt.subplot(3,1,i+1)
    plt.hist(top_3_probs[:,i], color='orchid', edgecolor='k', bins = np.arange(0, 1.01, 0.025))
    plt.yscale('log')
plt.show()

# Determine Predictions

In [None]:
N_train = probs.shape[0]
predictions = []

t = 0.35

for i in range(N_train):
    p = probs[i, :]
    top_classes = np.argpartition(p, -3)[-3:]                   # Gives top 3 classes in increasing order
    top_classes = top_classes[np.argsort(p[top_classes])]       # Sorts in increasing order
    top_classes = np.flip(top_classes)                          # Flips the order.

    top_probs = p[top_classes]              

    # Keep Probs Over Threshold
    sel = top_probs > t
    sel[0] = True                               # Always keep first pred
    predictions.append(top_classes[sel])
    
print(len(predictions))

# Create Submission

In [None]:
test.head()

In [None]:
submission = pd.read_csv('../input/quickdraw-doodle-recognition/sample_submission.csv')
submission.head()

In [None]:
label_lookup_df = pd.read_csv('../input/models-and-submissions/Quick Draw Models/label_lookup.csv')
label_lookup = {k:v for k,v in zip(label_lookup_df.index.values, label_lookup_df.label.values)}
label_lookup[0]

In [None]:
%%time

for i in range(N_train):
    classes = predictions[i]
    words_list = [label_lookup[c] for c in classes]
    words_string = ' '.join(words_list)
    submission.loc[i, 'word'] = words_string
    #print(words_string)
    
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)

# Images with Predictions

In [None]:
idx = np.random.choice(range(N_train), 64, replace=False)
test_sample = test.iloc[idx,:]
sub_sample = submission.iloc[idx, :]

plt.figure(figsize=[16,16])

for i in range(64):
    plt.subplot(8,8,i+1)
    plt.imshow(img_to_np(test_sample.drawing.values[i], 64, 64, 1, 2), cmap='binary')
    plt.title(sub_sample.word.values[i].replace(' ', '\n'))
    plt.axis('off')
plt.tight_layout()
plt.show()