In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Embedding, Dense, Flatten, Concatenate,
                                     Dot, Reshape, Add, Subtract, BatchNormalization)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
train_data = pd.read_csv("train_sample.csv")
test_data = pd.read_csv("test_sample.csv")

In [None]:
train_data.head()

Unnamed: 0,feature_0,feature_1,feature_2,target
0,11,25,29,0
1,12,48,26,1
2,36,39,26,1
3,38,46,45,2
4,17,32,49,0


In [None]:
test_data.head()

Unnamed: 0,ID,feature_0,feature_1,feature_2
0,9000,20,37,36
1,9001,37,8,8
2,9002,21,35,8
3,9003,22,48,22
4,9004,21,18,10


In [None]:
train_features = train_data.drop('target', axis=1)
train_target = train_data['target']

In [None]:
train, val, y_train, y_val = train_test_split(train_features, train_target,
                                             stratify=train_target, test_size=0.3)

In [None]:
features = train_data.drop('target', axis=1).columns
f_size  = [int(train[f].max()) + 1 for f in features]
X_train = [train[f].values for f in features]
X_val = [val[f].values for f in features]
X_test = [test_data[f].values for f in features]

In [None]:
k_latent = 2
embedding_reg = 0.0002
kernel_reg = 0.1

In [None]:
out_dim=2
def get_embed(x_input, x_size, out_dim):
    # x_input is index of input (either user or item)
    # x_size is length of vocabulary (e.g. total number of users or items)
    # out_dim is size of embedding vectors
    if x_size > 0: #category
        embed = Embedding(x_size, out_dim, input_length=1,
                          embeddings_regularizer=l2(embedding_reg))(x_input)
        embed = Flatten()(embed)
    else:
        embed = Dense(out_dim, kernel_regularizer=l2(embedding_reg))(x_input)
    return embed

In [None]:
def build_model(f_size):
    dim_input = len(f_size)
    input_x = [Input(shape=(1,)) for i in range(dim_input)]
    lin_terms = [get_embed(x, size, 1) for (x, size) in zip(input_x, f_size)]
    factors = [get_embed(x, size, k_latent) for (x, size) in zip(input_x, f_size)]
    s = Add()(factors)
    diffs = [Subtract()([s, x]) for x in factors]
    dots = [Dot(axes=1)([d, x]) for d,x in zip(diffs, factors)]
    x = Concatenate()(lin_terms + dots)
    x = BatchNormalization()(x)
    output = Dense(1, activation='relu', kernel_regularizer=l2(kernel_reg))(x)
    model = Model(inputs=input_x, outputs=[output])
    model.compile(optimizer=Adam(clipnorm=0.25,learning_rate=0.001),
                  loss='mean_squared_error')
    return model

In [None]:
model = build_model(f_size)

In [None]:
n_epochs = 1000
batch_size = 128

In [None]:
earlystopper = EarlyStopping(patience=2, verbose=0)
model.fit(X_train, y_train, epochs=n_epochs, batch_size=batch_size, verbose=0,
          validation_data=(X_val, y_val), callbacks=[earlystopper], shuffle=True)
best_epoch = earlystopper.stopped_epoch

In [None]:
pred = model.predict(X_test)



In [None]:
sub = pd.read_csv("test_sample.csv", usecols=['ID'])
sub['target'] = pred
sub.to_csv('submission.csv', index=False)