In [1]:
## import keras models, layers and optimizers
from keras.models import Sequential, Model
from keras.layers import Embedding, Flatten, Dense, Dropout, concatenate, multiply, Input
from keras.optimizers import Adam
from time import time
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import zscore
from keras import backend




def read_user_id():
    with open('./input.txt', 'r') as f:
        return [l.strip().split(',') for l in  f.readlines()]


def write_output(prediction):
    with open('./output.txt', 'w') as f:
        for pred in prediction:
            f.write(pred+"\n")


def preprocess_by_user(dataframe):
    dataframe['rating']  = dataframe.groupby(['userId']).rating.transform(lambda x : zscore(x, ddof=1))
    return dataframe

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def build_MLP(n_users, n_items):
  
    # build model
    num_epochs = 20
    batch_size = 256
    mf_dim = 8
    layers = eval('[64,32,16,8]')
    reg_mf = 0
    reg_layers = eval('[0,0,0,0]')
    verbose = 1

    # Build model
    dim_embedding_user = 50
    dim_embedding_item = 50

    ## item embedding
    item_input= Input(shape=[1], name='item')
    item_embedding = Embedding(n_items + 1, dim_embedding_item, name='Item-Embedding')(item_input)
    item_vec = Flatten(name='Item-Flatten')(item_embedding)
    item_vec = Dropout(0.2)(item_vec)

    ## user embedding
    user_input = Input(shape=[1], name='User')
    user_embedding = Embedding(n_users + 1, dim_embedding_user, name ='User-Embedding')(user_input)
    user_vec = Flatten(name ='User-Flatten')(user_embedding)
    user_vec = Dropout(0.2)(user_vec)

    ## concatenate flattened values 
    concat = concatenate([item_vec, user_vec])
    concat_dropout = Dropout(0.2)(concat)

    ## add dense layer (can try more)
    dense_1 = Dense(50, name ='Dense1', activation='relu')(concat)
    dropout_1 = Dropout(0.2)(dense_1)
    dense_2 = Dense(20, activation="relu", name = "Dense2")(dropout_1)
    dropout_2 = Dropout(0.2)(dense_2)
    dense_3 = Dense(10, activation="relu", name = "Dense3")(dropout_2)
    dropout_3 = Dropout(0.2)(dense_3)

    ## define output (can try sigmoid instead of relu)
    result = Dense(1, activation ='relu',name ='Activation')(dropout_3)

    ## define model with 2 inputs and 1 output
    return Model(inputs=[user_input, item_input], outputs=result, name="MLP")



def rmse(y_true, y_pred):
    return backend.sqrt(backend.mean(backend.square(y_pred - y_true), axis=-1))


In [16]:


if __name__ == "__main__":
    df_train = pd.read_csv('data/ratings_train.csv', usecols = ['userId', 'movieId', 'rating'])
    df_valid = pd.read_csv('data/ratings_vali.csv', usecols = ['userId', 'movieId', 'rating'])
 
    # prepare train data
    n_users, n_items = max(df_train.userId.unique()), max(df_train.movieId.unique())
    user_train = df_train['userId'].to_numpy()
    item_train = df_train['movieId'].to_numpy()
    rate_train = df_train['rating'].to_numpy()

    ## define model 
    recommender = build_MLP(n_users, n_items)
    # recommender.summary()
    
    # compile model
    opt_adam = Adam(lr = 0.002)
    recommender.compile(optimizer=Adam(lr = 0.002), loss= ['mse'], metrics=['accuracy', rmse ])
                      
    ## fit model
    track_training = recommender.fit([df_train['userId'], df_train['movieId']],
                                    df_train['rating'],
                                    batch_size = 256,
                                    validation_split = 0.005,
                                    epochs = 8,
                                    verbose = 0)
    TRAINED_PARAM = 'param.data'
    recommender.save_weights(TRAINED_PARAM)
#     recommender.load_weights(TRAINED_PARAM)
   

In [18]:
pd.DataFrame(track_training.history)

Unnamed: 0,val_loss,val_accuracy,val_rmse,loss,accuracy,rmse
0,1.486967,0.259843,1.031036,3.329209,0.186402,1.419978
1,1.021059,0.28084,0.838755,1.476454,0.247285,0.971814
2,0.933275,0.335958,0.784753,1.313285,0.263163,0.911691
3,0.818889,0.401575,0.728196,1.182826,0.281565,0.861937
4,0.823676,0.396325,0.729417,1.087712,0.292132,0.824766
5,0.816455,0.396325,0.725264,1.01491,0.305448,0.793925
6,0.782831,0.39895,0.707024,0.949753,0.318287,0.766806
7,0.806876,0.419948,0.7148,0.897927,0.325791,0.744134


In [19]:
inputs = read_user_id()
# result = do(user_ids)
predictions = []
for user, movie in inputs:
    target = [[int(user)],[int(movie)]]
    predict = recommender.predict(target)[0][0]
    predict = round(predict, 8)
    predictions.append('{},{},{}'.format(user, movie, str(predict)))
write_output(predictions)    