In [1]:
## import keras models, layers and optimizers
from keras.models import Sequential, Model
from keras.layers import Embedding, Flatten, Dense, Dropout, concatenate, multiply, Input
from keras.optimizers import Adam
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from keras import backend


def read_user_id():
    with open('./input.txt', 'r') as f:
        return [l.strip().split(',') for l in  f.readlines()]


def write_output(prediction):
    with open('./output.txt', 'w') as f:
        for pred in prediction:
            f.write(pred+"\n")


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def build_MLP(n_users, n_items):
  
    # build model
    num_epochs = 20
    batch_size = 256
    mf_dim = 8
    layers = eval('[64,32,16,8]')
    reg_mf = 0
    reg_layers = eval('[0,0,0,0]')
    verbose = 1

    # Build model
    dim_embedding_user = 50
    dim_embedding_item = 50

    ## item embedding
    item_input= Input(shape=[1], name='item')
    item_embedding = Embedding(n_items + 1, dim_embedding_item, name='Item-Embedding')(item_input)
    item_vec = Flatten(name='Item-Flatten')(item_embedding)
    item_vec = Dropout(0.2)(item_vec)

    ## user embedding
    user_input = Input(shape=[1], name='User')
    user_embedding = Embedding(n_users + 1, dim_embedding_user, name ='User-Embedding')(user_input)
    user_vec = Flatten(name ='User-Flatten')(user_embedding)
    user_vec = Dropout(0.2)(user_vec)

    ## concatenate flattened values 
    concat = concatenate([item_vec, user_vec])
    concat_dropout = Dropout(0.2)(concat)

    ## add dense layer (can try more)
    dense_1 = Dense(50, name ='Dense1', activation='relu')(concat)
    dropout_1 = Dropout(0.2)(dense_1)
    dense_2 = Dense(20, activation="relu", name = "Dense2")(dropout_1)
    dropout_2 = Dropout(0.2)(dense_2)
    dense_3 = Dense(10, activation="relu", name = "Dense3")(dropout_2)
    dropout_3 = Dropout(0.2)(dense_3)

    ## define output (can try sigmoid instead of relu)
    result = Dense(1, activation ='relu',name ='Activation')(dropout_3)

    ## define model with 2 inputs and 1 output
    return Model(inputs=[user_input, item_input], outputs=result, name="MLP")



def rmse(y_true, y_pred):
    return backend.sqrt(backend.mean(backend.square(y_pred - y_true), axis=-1))


In [3]:


if __name__ == "__main__":
    df_train = pd.read_csv('data/ratings_train.csv', usecols = ['userId', 'movieId', 'rating'])
    df_valid = pd.read_csv('data/ratings_vali.csv', usecols = ['userId', 'movieId', 'rating'])
 
    # prepare train data
    n_users, n_items = max(df_train.userId.unique()), max(df_train.movieId.unique())
    user_train = df_train['userId'].to_numpy()
    item_train = df_train['movieId'].to_numpy()
    rate_train = df_train['rating'].to_numpy()

    ## define model 
    recommender = build_MLP(n_users, n_items)
    # recommender.summary()
    
    # compile model
    opt_adam = Adam(lr = 0.002)
    recommender.compile(optimizer=Adam(lr = 0.002), loss= ['mse'], metrics=['accuracy', rmse ])
                      
    ## fit model
    track_training = recommender.fit([df_train['userId'], df_train['movieId']],
                                    df_train['rating'],
                                    batch_size = 256,
                                    validation_split = 0.005,
                                    epochs =8,
                                    verbose = 0)
    # store model weights
    TRAINED_PARAM = 'param.data'
    recommender.save_weights(TRAINED_PARAM)
#     recommender.load_weights(TRAINED_PARAM)

    # predict requests
    inputs = read_user_id()
    predictions = []
    for user, movie in inputs:
        target = [[int(user)],[int(movie)]]
        predict = recommender.predict(target)[0][0]
        predict = round(predict, 8)
        predictions.append('{},{},{}'.format(user, movie, str(predict)))
    write_output(predictions)    




In [4]:
# pd.DataFrame(track_training.history)

Unnamed: 0,val_loss,val_accuracy,val_rmse,loss,accuracy,rmse
0,1.143258,0.286089,0.89103,2.472173,0.2101,1.224775
1,0.936814,0.333333,0.785889,1.442404,0.252265,0.95693
2,0.892556,0.356955,0.763437,1.278712,0.272212,0.897409
3,0.85784,0.370079,0.746392,1.171518,0.280957,0.858485
4,0.817504,0.417323,0.717188,1.057239,0.298169,0.812811
5,0.789778,0.419948,0.701811,0.97766,0.310454,0.780457
6,0.815432,0.422572,0.716418,0.909432,0.322145,0.750318
7,0.789488,0.427822,0.698049,0.855335,0.332224,0.72624


In [5]:
# recommender.summary()

Model: "MLP"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
item (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
User (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
Item-Embedding (Embedding)      (None, 1, 50)        4180200     item[0][0]                       
__________________________________________________________________________________________________
User-Embedding (Embedding)      (None, 1, 50)        33600       User[0][0]                       
________________________________________________________________________________________________