<a href="https://colab.research.google.com/github/kumaranu7/Recommendation-System-using-NCF/blob/master/Recommendation_System_using_Neural_Collaborative_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [122]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import pandas as pd
import numpy as np

In [0]:
rating_df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Projects/RecSys/ratings.csv')

In [125]:
rating_df = rating_df.sort_values('timestamp')
rating_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
66719,429,595,5.0,828124615
66716,429,588,5.0,828124615
66717,429,590,5.0,828124615
66718,429,592,5.0,828124615
66712,429,432,3.0,828124615


Labeling the movieId and UserId column


In [0]:
from sklearn.preprocessing import LabelEncoder
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()
user_ids = user_encoder.fit_transform(rating_df.userId)
movie_ids = movie_encoder.fit_transform(rating_df.movieId)

Train, test split


In [0]:
num_train = int(len(user_ids) * 0.8)
train_user_ids = user_ids[:num_train]
train_movie_ids = movie_ids[:num_train]
train_ratings = rating_df.rating.values[:num_train]
test_user_ids = user_ids[num_train:]
test_movie_ids = movie_ids[num_train:]
test_ratings = rating_df.rating.values[num_train:]

In [0]:
num_users = user_ids.max() + 1
num_movie = movie_ids.max() + 1

In [0]:
train_ratings /= 5
test_ratings /= 5

In [0]:
def dcg_at_k(r, k):
  r = r[:k]
  dcg = np.sum(r / np.log2(np.arange(2, len(r) + 2)))
  return dcg

In [0]:
def ndgc_at_k(r, k, method = 0):
  dcg_max = dcg_at_k(sorted(r, reverse = True), k)
  return dcg_at_k(r, k) / dcg_max

In [0]:
def evaluate_prediction(predictions):
  ndgcs = []
  
  for target_users in np.unique(test_user_ids):
    target_movie_id = test_movie_ids[target_users == test_user_ids]
    target_rating = test_ratings[target_users == test_user_ids]
    #compute ndgc for users
    rel = target_rating[np.argsort(-predictions[target_users == test_user_ids])]
    ndgc = ndgc_at_k(rel, k=30)
    ndgcs.append(ndgc)

  ndcg = np.mean(ndgcs)
  return ndcg



In [0]:
from keras.models import Model
from keras.layers import Input, Dense, Embedding, concatenate, Flatten, Activation, Add, Dropout, Multiply
from keras.optimizers import Adam

In [0]:
def mf_model():

  user_ip = Input((1, ))
  user_hidden = Embedding(input_dim=num_users, output_dim = 50)(user_ip)
  user_hidden = Flatten()(user_hidden)

  #item 
  item_ip = Input((1, ))
  item_hidden = Embedding(input_dim= num_movie, output_dim=50)(item_ip)
  item_hidden = Flatten()(item_hidden)
  hidden = Multiply()([user_hidden, item_hidden])
  output = Dense(1, activation= 'sigmoid')(hidden)
  model = Model(inputs = [user_ip, item_ip], outputs = output)
  model.compile(loss='mse', optimizer=Adam(lr=0.005))
  return model

In [0]:
model = mf_model()

In [136]:
model.summary()

Model: "model_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_33 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_34 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_29 (Embedding)        (None, 1, 50)        30500       input_33[0][0]                   
__________________________________________________________________________________________________
embedding_30 (Embedding)        (None, 1, 50)        486200      input_34[0][0]                   
____________________________________________________________________________________________

In [157]:
from keras.callbacks import EarlyStopping
callbacks = [EarlyStopping(patience = 5)]
model.fit([train_user_ids, train_movie_ids], train_ratings,
          validation_data=([test_user_ids, test_movie_ids], test_ratings), epochs=50, batch_size=128, callbacks=callbacks)

Train on 80668 samples, validate on 20168 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


<keras.callbacks.History at 0x7f36403290f0>

As we can see after 3rd epoch the val_loss has increased so we set the value of patience = 1



In [137]:
callbacks = [EarlyStopping(patience = 1)]
model.fit([train_user_ids, train_movie_ids], train_ratings,
          validation_data=([test_user_ids, test_movie_ids], test_ratings), epochs=50, batch_size=128, callbacks=callbacks)

Train on 80668 samples, validate on 20168 samples
Epoch 1/50
Epoch 2/50


<keras.callbacks.History at 0x7f364076d550>

In [138]:
predictions = model.predict([test_user_ids, test_movie_ids])
evaluate_prediction(predictions[:, 0])

0.845351062491839

In [0]:
def mlp_model():
  user_inp = Input((1, ))
  user_hidden = Embedding(input_dim=num_users, output_dim=64)(user_inp)
  user_hidden = Flatten()(user_hidden)

  item_inp = Input((1, ))
  item_hidden = Embedding(input_dim=num_movie, output_dim=64)(item_inp)
  item_hidden = Flatten()(item_hidden)

  item_hidden = concatenate([user_hidden, item_hidden])
  hidden = Dense(128, activation='relu')(item_hidden)
  hidden = Dropout(0.2)(hidden)
  hidden = Dense(64, activation='relu')(hidden)
  hidden = Dropout(0.2)(hidden)
  output = Dense(1, activation='sigmoid')(hidden)

  model = Model(inputs = [user_inp, item_inp], outputs = output)
  model.compile(loss = 'mse', optimizer='adam')
  return model

In [140]:
model = mlp_model()
model.summary()

Model: "model_10"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_35 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_36 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_31 (Embedding)        (None, 1, 64)        39040       input_35[0][0]                   
__________________________________________________________________________________________________
embedding_32 (Embedding)        (None, 1, 64)        622336      input_36[0][0]                   
___________________________________________________________________________________________

In [141]:
model.fit([train_user_ids, train_movie_ids], train_ratings, validation_data= ([test_user_ids, test_movie_ids], test_ratings), epochs=50, batch_size=128, callbacks=callbacks) 

Train on 80668 samples, validate on 20168 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50


<keras.callbacks.History at 0x7f364054eeb8>

In [142]:
predictions = model.predict([test_user_ids, test_movie_ids])
evaluate_prediction(predictions[:, 0])

0.8743498605773988

In [0]:
def ncf_model():
  user_inp = Input((1, ))
  user_hidden = Embedding(input_dim = num_users, output_dim=64)(user_inp)
  user_hidden = Flatten()(user_hidden)

  item_inp = Input((1, ))
  item_hidden = Embedding(num_movie, 64)(item_inp)
  item_hidden = Flatten()(item_hidden)

  mf_output = Multiply()([user_hidden, item_hidden])

  hidden = concatenate([user_hidden, item_hidden])
  hidden = Dense(128, activation='relu')(hidden)
  hidden = Dropout(0.2)(hidden)
  mlp_output = Dense(64, activation='relu')(hidden)

  output = concatenate([mf_output, mlp_output])
  output = Dense(1, activation='sigmoid')(output)

  model = Model(inputs = [user_inp, item_inp], outputs = output)
  model.compile(loss = 'mse', optimizer='adam')
  return model

In [152]:
model = ncf_model()
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_45 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_46 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_39 (Embedding)        (None, 1, 64)        39040       input_45[0][0]                   
__________________________________________________________________________________________________
embedding_40 (Embedding)        (None, 1, 64)        622336      input_46[0][0]                   
___________________________________________________________________________________________

In [155]:
model.fit([train_user_ids, train_movie_ids], train_ratings, validation_data=([test_user_ids, test_movie_ids], test_ratings), epochs=50, batch_size=128, callbacks=callbacks)

Train on 80668 samples, validate on 20168 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50


<keras.callbacks.History at 0x7f36404b67b8>

In [156]:
predictions = model.predict([test_user_ids, test_movie_ids])
evaluate_prediction(predictions[:, 0])

0.8724066344106951