<a href="https://colab.research.google.com/github/ntolayd/Projects/blob/main/Recommendation_Engines/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import seaborn as sns
import warnings
import pickle
from sklearn.metrics import mean_absolute_error, mean_squared_error
warnings.filterwarnings('ignore')

In [3]:
#data loading
!unzip "/content/gdrive/My Drive/ml-1m.zip"
masked = pd.read_csv('/content/gdrive/MyDrive/masked_melted.csv')
test_set = pd.read_csv('/content/gdrive/MyDrive/test_set.csv')
with open(r'/content/gdrive/MyDrive/masked_idx.pkl', "rb") as input_file:
  masked_idx = pickle.load(input_file)
movies = pd.read_csv("ml-1m/movies.dat", sep='::', engine='python', header=None)
movies.columns = ['movie_id', 'movie_name', 'genre']
ratings = pd.read_csv("ml-1m/ratings.dat", sep='::', engine='python', header=None)
ratings.columns=['user_id', 'movie_id', 'rating','timestamp']

Archive:  /content/gdrive/My Drive/ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         


In [4]:
masked = masked[~(masked['value'] == 0)]

Base Model

In [5]:
from keras.models import Model
from keras.layers import Input, Flatten, Dense, Concatenate, Dropout, merge
from keras.layers.merge import Multiply
from keras.layers.merge import Dot
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.regularizers import l1, l2

In [6]:
unique_movie_numb = masked['movie_id'].max()
unique_user_numb = masked['user_id'].max()

In [7]:
masked['value'] = masked['value'] / 5

In [8]:
embedding_size= 5

#Movie input
input_movies = Input(shape=[1])
embed_movies = Embedding(unique_movie_numb + 1, embedding_size,name = 'movie_embedding')(input_movies)
movies_out = Flatten()(embed_movies)

#user network
input_users = Input(shape=[1])
embed_users = Embedding(unique_user_numb + 1, embedding_size,name = 'user_embedding')(input_users)
users_out = Flatten()(embed_users)

y = Dot(1, normalize=False)([users_out, movies_out])

model = Model(inputs=[input_users, input_movies], outputs=y)

model.compile(loss='mse',
              optimizer=Adam(lr=0.001),
              metrics=['mae']
             )

In [9]:
history = model.fit([masked["user_id"], masked["movie_id"]]
                    , masked["value"] 
                    , batch_size=128, epochs=10
                    , validation_split=0.1
                    , shuffle=True)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
predictions = model.predict([test_set["user_id"], test_set["movie_id"]]) * 5
print(mean_absolute_error(test_set['rating'], predictions))
print(math.sqrt(mean_squared_error(test_set['rating'], predictions)))

0.9760546746745525
1.4320631950946228


In [32]:
movie_ids = masked['movie_id'].unique()


#recommender
def make_recommendation(user_id, k):
  user = np.array([user_id for i in range(len(movie_ids))])
  predictions = model.predict([user, movie_ids])
  predictions = [i[0] for i in predictions]
  df = pd.DataFrame({'user_id':user, 'movie_id':movie_ids, 'Recommendation_Score':predictions}).sort_values(by='Recommendation_Score', ascending=False).head(k)
  return df

In [12]:
def apk(actual, predicted, k=10):

    if len(actual) == 0:
      return 0

    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)


    return score / min(len(actual), k)

In [47]:
def apk_user(user_id,k):
  user_avg = masked[masked['user_id']== user_id]['value'].mean()*5
  user_test = test_set[test_set['user_id'] == user_id][['movie_id','rating']]
  user_test = user_test[user_test['rating'] >= user_avg]['movie_id'].values
  rec = make_recommendation(user_id,k)
  rec = rec['movie_id']
  return apk(user_test, rec, k)

In [48]:
mean_apk_enhanced = np.mean([apk_user(i,30) for i in masked['user_id'].unique()])

In [49]:
mean_apk_enhanced

0.011021581883988761

Deeper model

In [None]:
embedding_size= 10

#Movie input
input_movies = Input(shape=[1])
embed_movies = Embedding(unique_movie_numb + 1,embedding_size,name = 'movie_embedding')(input_movies)
movies_out = Flatten()(embed_movies)

#user input
input_users = Input(shape=[1])
embed_users = Embedding(unique_user_numb + 1,embedding_size,name = 'user_embedding')(input_users)
users_out = Flatten()(embed_users)


input_vecs = Concatenate()([users_out, movies_out])


x = Dense(256, activation='relu')(input_vecs)
x = Dropout(0.3)(x)


y = Dense(1,activation='relu')(x)

model = Model(inputs=[input_users, input_movies], outputs=y)

model.compile(loss='mse',
              optimizer=Adam(lr=0.001),
              metrics=['mae']
             )

In [None]:
history = model.fit([masked["user_id"], masked["movie_id"]]
                    , masked["value"] 
                    , batch_size=128, epochs=10
                    , validation_split=0.1
                    , shuffle=True)


In [None]:
predictions = model.predict([test_set["user_id"], test_set["movie_id"]]) * 5
print(mean_absolute_error(test_set['rating'], predictions))
print(math.sqrt(mean_squared_error(test_set['rating'], predictions)))

In [None]:
mean_apk_enhanced = np.mean([apk_user(i,30) for i in masked.index.unique()])

NeuMF Model

In [None]:
input_movies = Input(shape=[1])
input_users = Input(shape=[1])
latent_dim = 10

mf_user_embedding = Embedding(input_dim=unique_user_numb + 1, output_dim=latent_dim,
                    name='mf_user_embedding', input_length=1)

mf_item_embedding = Embedding(input_dim=unique_movie_numb + 1, output_dim=latent_dim,
                    name='mf_item_embedding', input_length=1)

mlp_user_embedding = Embedding(input_dim=unique_user_numb + 1, output_dim=128,
                      name='mlp_user_embedding',
                      input_length=1)

mlp_item_embedding = Embedding(input_dim=unique_movie_numb + 1, output_dim=128,
                      name='mlp_item_embedding',
                      input_length=1)

# MF latent vector
mf_user_latent = Flatten()(mf_user_embedding(input_users))
mf_item_latent = Flatten()(mf_item_embedding(input_movies))
mf_cat_latent = Multiply()([mf_user_latent, mf_item_latent])

# MLP latent vector
mlp_user_latent = Flatten()(mlp_user_embedding(input_users))
mlp_item_latent = Flatten()(mlp_item_embedding(input_movies))
mlp_cat_latent = Concatenate()([mlp_user_latent, mlp_item_latent])

mlp_vector = mlp_cat_latent


layer = Dense(256, activation='relu')
mlp_vector = layer(mlp_vector)

predict_layer = Concatenate()([mf_cat_latent, mlp_vector])
result = Dense(1, activation='relu')

model = Model(inputs=[input_users,input_movies], outputs=result(predict_layer))

model.compile(loss='mse',
              optimizer=Adam(lr=0.001),
              metrics=['mae']
             )

In [None]:
def get_model(num_users, num_items, mf_dim=10, layers=[256]):

    num_layer = len(layers) #Number of layers in the MLP
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')
    
    # Embedding layer
    MF_Embedding_User = Embedding(input_dim = num_users+1, output_dim = mf_dim, name = 'mf_embedding_user',
                                  input_length=1)
    MF_Embedding_Item = Embedding(input_dim = num_items+1, output_dim = mf_dim, name = 'mf_embedding_item',
                                  input_length=1)   

    MLP_Embedding_User = Embedding(input_dim = num_users+1, output_dim = int(layers[0]/2), name = "mlp_embedding_user",
                                   input_length=1)
    MLP_Embedding_Item = Embedding(input_dim = num_items+1, output_dim = int(layers[0]/2), name = 'mlp_embedding_item',
                                   input_length=1)   
    
    # MF part
    mf_user_latent = Flatten()(MF_Embedding_User(user_input))
    mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent]) # element-wise multiply

    # MLP part 
    mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
    mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    
    for idx in range(1, num_layer):
        layer = Dense(layers[idx], activation='relu', name="layer%d" %idx)
        mlp_vector = layer(mlp_vector)

    predict_vector = Concatenate()([mf_vector, mlp_vector])
    
    # Final prediction layer
    prediction = Dense(1, activation='relu', name = "prediction")(predict_vector)
    
    model = Model(inputs=[user_input, item_input], 
                  outputs=prediction)
    
    return model

In [None]:
model = get_model(unique_user_numb, unique_movie_numb)

In [None]:
model.compile(loss='mse',
              optimizer=Adam(lr=0.001),
              metrics=['mae']
             )

In [None]:
history = model.fit([masked["user_id"], masked["movie_id"]]
                    , masked["value"] 
                    , batch_size=128, epochs=10
                    , validation_split=0.1
                    , shuffle=True)

In [None]:
predictions = model.predict([test_set["user_id"], test_set["movie_id"]]) * 5
print(mean_absolute_error(test_set['rating'], predictions))
print(math.sqrt(mean_squared_error(test_set['rating'], predictions)))

In [None]:
mean_apk_enhanced = np.mean([apk_user(i,30) for i in masked.index.unique()])

Making Recommendations

In [None]:
movie_ids = np.array(list(set(ratings['movie_id'])))
#for user 123
user = np.array([123 for i in range(len(movie_ids))])
predictions = model.predict([user, movie_ids])
predictions = np.array([a[0] for a in predictions])
recommended_movie_ids = (-predictions).argsort()[:10]

In [None]:
#movies already watched by user 123
final_df[final_df['user_id']==123].sort_values(by='rating', ascending=False)[:10][['user_id','rating','movie_name','genre']]

Unnamed: 0,user_id,rating,movie_name,genre
15695,123,5,Rain Man (1988),Drama
15301,123,5,Heat (1995),Action|Crime|Thriller
15711,123,5,Forrest Gump (1994),Comedy|Romance|War
15307,123,5,"Deer Hunter, The (1978)",Drama|War
15310,123,5,"Time to Kill, A (1996)",Drama
15312,123,5,One Flew Over the Cuckoo's Nest (1975),Drama
15324,123,5,"Silence of the Lambs, The (1991)",Drama|Thriller
15524,123,5,Braveheart (1995),Action|Drama|War
15504,123,5,Bottle Rocket (1996),Comedy
15841,123,5,True Romance (1993),Action|Crime|Romance


In [None]:
#movies recommended to user 123
movies[movies['movie_id'].isin(recommended_movie_ids)]

Unnamed: 0,movie_id,movie_name,genre
48,49,When Night Is Falling (1995),Drama|Romance
250,253,Interview with the Vampire (1994),Drama|Horror
306,309,"Red Firecracker, Green Firecracker (1994)",Drama
509,513,Radioland Murders (1994),Comedy|Mystery|Romance
792,802,Phenomenon (1996),Drama|Romance
832,843,Lotto Land (1995),Drama
1052,1066,Shall We Dance? (1937),Comedy|Musical|Romance
1092,1108,Prerokbe Ognja (1995),Documentary
2240,2309,"Inheritors, The (Die Siebtelbauern) (1998)",Drama
2629,2698,Zone 39 (1997),Sci-Fi
