In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPool2D, TimeDistributed, BatchNormalization, SimpleRNN, Reshape, LSTM, Permute
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler

## Useful functions

In [None]:
def save_model(model, filename):
    model_json = model.to_json()
    with open(filename + ".json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(filename + ".h5")
    print("Saved model to disk")

In [None]:
def plot_history(history):
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

## Load data

In [None]:
DATA_FOLDER = "./data/"

In [None]:
df_training = pd.read_parquet(DATA_FOLDER + "training.parquet")

In [None]:
df_public_test = pd.read_parquet(DATA_FOLDER + "public_test.parquet")

In [None]:
df_private_test = pd.read_parquet(DATA_FOLDER + "private_test.parquet")

## Process data

In [None]:
x_train = df_training.values[:,1:]
y_train = df_training.values[:,0]
x_test = df_public_test.values[:,1:]
y_test = df_public_test.values[:,0]

In [None]:
x_train = x_train.reshape(-1, 48, 48, 1)
x_test = x_test.reshape(-1, 48, 48, 1)

In [None]:
x_train = x_train.astype("float32")/255.
x_test = x_test.astype("float32")/255.

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Model

In [None]:
model = Sequential()

model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation='relu',
                 input_shape = (48, 48, 1)))
model.add(BatchNormalization())
model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(strides=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(strides=(2,2)))
model.add(Dropout(0.25))

model.add(Permute((3, 2, 1)))
model.add(Reshape((64, 81)))
model.add(LSTM(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Reshape((128, 2)))
model.add(Flatten())
model.add(Dense(7, activation='softmax'))

In [None]:
datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 10)

In [None]:
model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=1e-4), metrics=["accuracy"])

In [None]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)

In [None]:
hist = model.fit_generator(datagen.flow(x_train, y_train, batch_size=16),
                           steps_per_epoch=500,
                           epochs=60, #Increase this when not on Kaggle kernel
                           validation_data=(x_test[:400,:], y_test[:400,:]), #For speed
                           callbacks=[annealer])
save_model(model, "./trained_models/conv_rnn_model")

In [None]:
plot_history(hist)