In [None]:
!pip install -U keras-tuner
%matplotlib inline

import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
import sklearn.preprocessing
import kerastuner as kt

from keras.models import Model
from keras import backend as K
from keras.layers import Input, Reshape, Dot
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.layers.normalization import BatchNormalization

from keras.layers import Add, Activation, Lambda
from keras.layers import Concatenate, Dense, Dropout

## Model Exploration

We take in the active_users dataset and the movies dataset. We obtain the number of unique users, unique movies, the minimum rating, as well as the maximum rating as these will be used for the model's embedding layers and output.

In [3]:
active=pd.read_csv('active_users.csv')
movies=pd.read_csv('movies.csv')

n_users = active['userId'].nunique()
n_movies = active['movieId'].nunique()
min_rating = min(active['rating'])
max_rating = max(active['rating'])

Next, we join the two datasets together, and apply a multi-label binarizer to the genres column. The user ID and movie ID columns are label encoded. We return arrays containing the inputs to the model, as well dataframes containing the original user and movie labels.

In [4]:
def active_preprocessing(active_user_dataset, movies_data):
    merged=active_user_dataset.merge(movies_data).drop(['timestamp','title'], axis=1)
    genres_encoder = sklearn.preprocessing.MultiLabelBinarizer()
    merged['genres'] = genres_encoder.fit_transform(merged['genres'].apply(lambda s: s.split("|"))).tolist()
    merged[genres_encoder.classes_.tolist()] = pd.DataFrame(merged.genres.tolist(), index= merged.index)
    merged=merged.drop('genres',axis=1)
    
    user_enc = LabelEncoder()
    merged['user'] = user_enc.fit_transform(merged['userId'].values)
    item_enc = LabelEncoder()
    merged['movie'] = item_enc.fit_transform(merged['movieId'].values)
    merged['rating'] = merged['rating'].values.astype(np.float32)
    

    X = merged[[c for c in merged.columns if c!='rating']].values
    y = merged['rating'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify=merged.user)

    X_train_array = [X_train[:, -2], X_train[:, -1], X_train[:, 2:-2]]
    X_test_array = [X_test[:, -2], X_test[:, -1], X_test[:, 2:-2]]
    
    return X_train_array, X_test_array, y_train, y_test, X_test, X_train

In [5]:
X_train_array, X_test_array, y_train, y_test, X_test, X_train= active_preprocessing(active, movies)

We define an implementation of the wide and deep model below. Using keras tuner, we define hyperparameters we want to tune as well as the search space. Here, we chose to tune the the dimensionality of the user and movie embeddings, the number of dense layers in the deep portion of the model, the dropout rate, as well as the learning rate in the optimizer.

In [None]:
class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                      embeddings_regularizer=l2(1e-6))(x)
        x = Reshape((self.n_factors,))(x)
        return x

def WideDeepRecommender(hp):
  
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, hp.Int('n_factors_users', 10, 100, step=10, default=50))(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, hp.Int('n_factors_movies', 10, 100, step=10, default=50))(movie)

    genres = Input(shape=(20,))
    
    combinedembedding = Concatenate()([u, m])
    x = Activation('relu')(combinedembedding)
    
    dropout_rate=hp.Float('dropout', 0, 0.90, step=0.1, default=0.5)
    
    for i in range(hp.Int('dense_blocks', 1, 3, default=2)):
      x = BatchNormalization()(x)
      x = Dense(128/(2**i), kernel_initializer='he_normal', kernel_regularizer=l2(1e-6), activation='relu')(x)
      x = Dropout(dropout_rate)(x)

    widedeep=Concatenate()([genres, x])
    
    x = Dense(1, kernel_initializer='he_normal')(widedeep)
    x = Activation('sigmoid')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)
    
    model = Model(inputs=[user, movie, genres], outputs=x)
    opt = Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling='log'))
    
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

We use Bayesian Optimization to search for the hyperparameters that minimize the validation RMSE. Only 5 trials of hyperparameter combinations are run due to GPU usage limit on Google Colab.

In [None]:
tuner = kt.BayesianOptimization(WideDeepRecommender, kt.Objective("val_root_mean_squared_error", direction="min"), max_trials=5, )

In [None]:
tuner.search_space_summary()

Search space summary
Default search space size: 5
n_factors_users (Int)
{'default': 50, 'conditions': [], 'min_value': 10, 'max_value': 100, 'step': 10, 'sampling': None}
n_factors_movies (Int)
{'default': 50, 'conditions': [], 'min_value': 10, 'max_value': 100, 'step': 10, 'sampling': None}
dropout (Float)
{'default': 0.5, 'conditions': [], 'min_value': 0.0, 'max_value': 0.9, 'step': 0.1, 'sampling': None}
dense_blocks (Int)
{'default': 2, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': None}
learning_rate (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


In [None]:
tuner.search(X_train_array, y_train, validation_split=0.2, epochs=100, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_root_mean_squared_error', patience=1)])

Trial 5 Complete [00h 36m 46s]
val_root_mean_squared_error: 1.0363560914993286

Best val_root_mean_squared_error So Far: 0.8482723832130432
Total elapsed time: 03h 15m 03s
INFO:tensorflow:Oracle triggered exit


Looking at the results below, it appears the best hyperparameter combination was n_factors_users: 100, n_factors_movies: 100, dropout: 0.0, dense_blocks: 1 ,learning_rate: 0.0001, which obtained a RMSE of 0.8483 on the validation set.

In [None]:
tuner.results_summary()

Results summary
Results in ./untitled_project
Showing 10 best trials
Objective(name='val_root_mean_squared_error', direction='min')
Trial summary
Hyperparameters:
n_factors_users: 100
n_factors_movies: 100
dropout: 0.0
dense_blocks: 1
learning_rate: 0.0001
Score: 0.8482723832130432
Trial summary
Hyperparameters:
n_factors_users: 100
n_factors_movies: 100
dropout: 0.0
dense_blocks: 3
learning_rate: 0.0001
Score: 0.8532862663269043
Trial summary
Hyperparameters:
n_factors_users: 60
n_factors_movies: 80
dropout: 0.2
dense_blocks: 2
learning_rate: 0.0029594770617025457
Score: 0.8740978837013245
Trial summary
Hyperparameters:
n_factors_users: 30
n_factors_movies: 20
dropout: 0.7000000000000001
dense_blocks: 2
learning_rate: 0.0007719356053322489
Score: 0.9145234227180481
Trial summary
Hyperparameters:
n_factors_users: 100
n_factors_movies: 100
dropout: 0.9
dense_blocks: 1
learning_rate: 0.01
Score: 1.0363560914993286


In [None]:
best_model = tuner.get_best_models(1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(1)[0]

In [None]:
best_model.save('best_model.h5')

Now, let's see the performance of the best model on the entire trainset as well as the held out test set.

In [6]:
uploadbestmodel=tf.keras.models.load_model('best_model.h5')
testpredictions=uploadbestmodel.predict(X_test_array)
trainpredictions=uploadbestmodel.predict(X_train_array)



In [8]:
testmetricmatrix = pd.DataFrame({'userId': X_test[:, 0], 'movieId': X_test[:, 1],'rating': y_test, 'prediction': testpredictions.flatten()}, columns=['userId', 'movieId', 'rating', 'prediction'])
trainmetricmatrix = pd.DataFrame({'userId': X_train[:, 0], 'movieId': X_train[:, 1],'rating': y_train, 'prediction': trainpredictions.flatten()}, columns=['userId', 'movieId', 'rating', 'prediction'])

In [9]:
from collections import defaultdict

def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for index, row in predictions.iterrows(): 
        uid = row['userId']
        user_est_true[uid].append((row['prediction'], row['rating']))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

def get_top_recs(ratings, n):
    #returns the top n recommendations 
    top_recs = {}
    
    for index, row in ratings.iterrows():
        if row['userId'] not in top_recs.keys():
            top_recs[row['userId']] = [(row['movieId'], row['prediction'], row['rating'])]
        else:
            top_recs[row['userId']].append((row['movieId'], row['prediction'], row['rating']))
    
    # sort the preds for each user and get the n highest ones.
    for user, ratings in top_recs.items():
        ratings.sort(key=lambda x: x[1], reverse=True)
        top_recs[user] = ratings[:n]

    return top_recs

def user_coverage(top_recs, k):
    user_cov = {}
    tot = 0
    for user in top_recs:
        user_cov[user] = []
        for ratings in top_recs[user]:
            if ratings[2] >= 3.5:
                user_cov[user].append(ratings[0])
        if len(user_cov[user]) >= k:
            tot += 1
    return tot 

def item_coverage(top_recs, k):
    item_cov = {}
    tot = 0
    for user in top_recs:
        for ratings in top_recs[user]:
            if ratings[0] not in item_cov.keys(): 
                item_cov[ratings[0]] = [] 
            if ratings[2] >= 3.5:
                item_cov[ratings[0]].append(ratings[0])  
                
    for item in item_cov:
        if len(item_cov[item]) >= k:
            tot += 1        
    return tot, item_cov

def ndcg_at_k(predictions, k=10):
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    ndcgs = [] 
    for index, row in predictions.iterrows(): 
        uid = row['userId']
        user_est_true[uid].append((row['prediction'], row['rating']))
        
    def dcg_at_k(user_ratings, user_preds, k=10):
        # Sort user ratings by estimated value
        user_preds = np.argsort(user_preds)[::-1]
        user_ratings = np.take(user_ratings, user_preds[:k])
        user_ratings = np.array(user_ratings)
        num = 2 ** user_ratings - 1
        den = np.log2(np.arange(2, num.size + 2))
        dcg = np.sum(num/den)
        return dcg
        
    for uid, scores in user_est_true.items():
        user_preds = [x[0] for x in scores]
        user_ratings = [x[1] for x in scores]
        dcg = dcg_at_k(user_ratings, user_preds)
        idcg = dcg_at_k(user_ratings, user_ratings)
        ndcg = dcg/idcg
        ndcgs.append(ndcg)
        
        
    return ndcgs

## Train Performance

In [12]:
precisions_dl, recalls_dl = precision_recall_at_k(trainmetricmatrix, k=10, threshold=3.5)
avg_dl_precision = sum(precisions_dl.values()) / len(precisions_dl)
print("Average Precision @ 10:", avg_dl_precision)
avg_dl_recall = sum(recalls_dl.values()) / len(recalls_dl)
print("Average Recall @ 10:", avg_dl_recall)

Average Precision @ 10: 0.889979138321986
Average Recall @ 10: 0.126984926967584


In [15]:
toprecs_dl_train = get_top_recs(trainmetricmatrix, 10)
user_cov_train_dl = user_coverage(toprecs_dl_train, 1) / len(toprecs_dl_train)
print ("User Coverage of test set:", user_cov_train_dl)
item_cov_train_dl, item_dict_train_dl = item_coverage(toprecs_dl_train, 1)
item_cov_train_dl = item_cov_train_dl / len(item_dict_train_dl)
print ("Item Coverage of test set:", item_cov_train_dl)

User Coverage of test set: 0.9997142857142857
Item Coverage of test set: 0.9522096608427544


In [13]:
ndcgs = ndcg_at_k(trainmetricmatrix)
avg_ndcg = sum(ndcgs)/len(ndcgs)
print("Average NDCG @ 10:", avg_ndcg)

Average NDCG @ 10: 0.7276305173808937


In [16]:
print("Train RMSE:",np.sqrt(mean_squared_error(y_train, trainpredictions.flatten())))

Train RMSE: 0.80679214


## Test Performance

In [None]:
precisions_dl, recalls_dl = precision_recall_at_k(testmetricmatrix, k=10, threshold=3.5)
avg_dl_precision = sum(precisions_dl.values()) / len(precisions_dl)
print("Average Precision @ 10:", avg_dl_precision)
avg_dl_recall = sum(recalls_dl.values()) / len(recalls_dl)
print("Average Recall @ 10:", avg_dl_recall)

Average Precision @ 10: 0.8074136054421732
Average Recall @ 10: 0.393811255416487


In [None]:
toprecs_dl_test = get_top_recs(testmetricmatrix, 10)
user_cov_test_dl = user_coverage(toprecs_dl_test, 1) / len(toprecs_dl_test)
print ("User Coverage of test set:", user_cov_test_dl)
item_cov_ts_dl, item_dict_ts_dl = item_coverage(toprecs_dl_test, 1)
item_cov_test_dl = item_cov_ts_dl / len(item_dict_ts_dl)
print ("Item Coverage of test set:", item_cov_test_dl)

User Coverage of test set: 0.9981428571428571
Item Coverage of test set: 0.8741970021413277


In [None]:
ndcgs = ndcg_at_k(testmetricmatrix)
avg_ndcg = sum(ndcgs)/len(ndcgs)
print("Average NDCG @ 10:", avg_ndcg)

Average NDCG @ 10: 0.7441638893728827


In [11]:
print("Test RMSE:",np.sqrt(mean_squared_error(y_test, testpredictions.flatten())))

Test RMSE: 0.84867543
