## Neural Collaborative Filtering with Hyperparameter Tuning

### Importing Libraries

In [2]:
import sys
import os
import shutil

# Pandas and Numpy is used for efficient handling of arrays.
import pandas as pd
import numpy as np


from recommenders.utils.timer import Timer
from recommenders.datasets.python_splitters import python_chrono_split

# importing the dataset
from recommenders.datasets import movielens
from recommenders.models.ncf.dataset import Dataset as NCFDataset

# Importing the NCF model class from the recommenders library
from recommenders.models.ncf.ncf_singlenode import NCF

# importing the evaluation metrics
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k,
                                                     recall_at_k, get_top_k_items)
from recommenders.utils.constants import SEED as DEFAULT_SEED


print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))

System version: 3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) [MSC v.1928 64 bit (AMD64)]
Pandas version: 1.4.4


### Loading the Dataset

We will be using the movielens dataset. It contains the user, movie and the rating given by the user.

In [3]:
df = pd.read_csv('cf_final2.csv')
df= df.rename(columns={"user":"userID", 'item':"itemID", "label":"rating", "song_by": "song"})
df=df[["userID","itemID","rating","timestamp","song"]]
df.head(5)

Unnamed: 0,userID,itemID,rating,timestamp,song
0,577,30377,0.172815,1447978000.0,The Safety Dance by Men Without Hats
1,147,15910,0.127811,1441325000.0,Endless Summer by Grizfolk
2,690,40737,0.097224,1413331000.0,Castaway by Zac Brown Band
3,59,40737,0.103762,1404950000.0,Castaway by Zac Brown Band
4,415,49732,0.102779,1402963000.0,Islands In the Stream by Dolly Parton


In [15]:
# top k items to recommend
TOP_K = 10

# Setting seed to remove any stochasticity and reproduce results
SEED = DEFAULT_SEED  # Set N

In [5]:
# Splitting the dataset.
# 75% will be used during training and 25% will be used during testing

train, test = python_chrono_split(df, 0.75)


In [6]:
# Filtering out users and items in the test set that do not appear in the training set.
# This is done so that we can see if our model has learnt user's previous item interactions and can recommend relevant items.

test = test[test["userID"].isin(train["userID"].unique())]
test = test[test["itemID"].isin(train["itemID"].unique())]

# Creating a test set which only contains the last interaction for each user. Remaining data of the user is used in the train set
leave_one_out_test = test.groupby("userID").last().reset_index()


In [7]:
# Writing the data into csv files

train_file = "./train.csv"
test_file = "./test.csv"
leave_one_out_test_file = "./leave_one_out_test.csv"
train.to_csv(train_file, index=False)
test.to_csv(test_file, index=False)
leave_one_out_test.to_csv(leave_one_out_test_file, index=False)

In [8]:
data = NCFDataset(train_file=train_file, test_file=leave_one_out_test_file, seed=SEED, overwrite_test_file_full=True)

INFO:recommenders.models.ncf.dataset:Indexing ./train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing ./leave_one_out_test.csv ...
INFO:recommenders.models.ncf.dataset:Creating full leave-one-out test file ./leave_one_out_test_full.csv ...
100%|██████████| 717/717 [00:07<00:00, 97.38it/s] 
INFO:recommenders.models.ncf.dataset:Indexing ./leave_one_out_test_full.csv ...


### Training the NCF Model

In [9]:
import optuna

def objective(trial):
    # Define hyperparameter search space
    lr = trial.suggest_float("lr", 1e-3, 1e-2)
    model_type= trial.suggest_categorical('model_type', ["NeuMF", 'GMF'])
    BATCH_SIZE = trial.suggest_int("BATCH_SIZE", 128, 256)
    EPOCHS = trial.suggest_int("EPOCHS", 10, 30)

    # Create and train the model with the suggested hyperparameters
    model = NCF (
        n_users=data.n_users,
        n_items=data.n_items,
        model_type=model_type,
        n_factors=4,
        layer_sizes=[16,8,4],
        n_epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        learning_rate=lr,
        verbose=10,
        seed=SEED
    )
    model.fit(data)
    
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item)
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

    score = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
    return score

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

# Get the best hyperparameters from the study
best_params = study.best_params

model = NCF(
    n_users=data.n_users,
    n_items=data.n_items,
    model_type=best_params['model_type'],
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=best_params['EPOCHS'],
    batch_size=best_params['BATCH_SIZE'],
    learning_rate=best_params['lr'],
    verbose=10,
    seed=SEED
)

# Fitting the model on the training data.
model.fit(data)

  from .autonotebook import tqdm as notebook_tqdm
[I 2023-11-15 16:52:44,929] A new study created in memory with name: no-name-d91d2f99-4011-46e9-82c7-22facb4a45e7
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [8.94s]: train_loss = 0.305048 
[I 2023-11-15 17:01:17,546] Trial 0 finished with value: 0.004463040446304046 and parameters: {'lr': 0.008305961765046204, 'model_type': 'GMF', 'BATCH_SIZE': 241, 'EPOCHS': 18}. Best is trial 0 with value: 0.004463040446304046.
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [12.25s]: train_loss = 0.090696 
[I 2023-11-15 17:10:30,702] Trial 1 finished with value: 0.009483960948396096 and parameters: {'lr': 0.004549609628628861, 'model_type': 'NeuMF', 'BATCH_SIZE': 198, 'EPOCHS': 16}. Best is trial 1 with value: 0.009483960948396096.
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [12.13s]: train_loss = 0.102895 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 20 [12.40s]: train_loss = 0.046034 
[I 2023-11-15 17:20:24,004] Trial 2

### Prediction and Evaluation

Getting predictions from our trained model. We are converting it to a pandas dataframe later.

In [10]:
predictions = [[row.userID, row.itemID, model.predict(row.userID, row.itemID)]
               for (_, row) in test.iterrows()]


predictions = pd.DataFrame(predictions, columns=['userID', 'itemID', 'prediction'])
predictions.head()

Unnamed: 0,userID,itemID,prediction
0,0,26234,0.013188
1,1,50800,0.999457
2,1,16571,0.985448
3,1,17377,0.882039
4,2,8182,0.844812


In this step we are removing items that have already been rated by the user. We do not want to recommend the same item again to the user.

In [11]:
with Timer() as test_time:

    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item)
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time.interval))

Took 73.68985769999927 seconds for prediction.


#### MAP

It is the average precision for each user normalized over all users.

In [16]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print(f"MAP @ {TOP_K}: {eval_map}")

MAP @ 10: 0.004766950408786136


#### NDCG

Normalized Discounted Cumulative Gain (NDCG) - evaluates how well the predicted items for a user are ranked based on relevance



In [17]:
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print(f"NDCG @ {TOP_K}: {eval_ndcg}")

NDCG @ 10: 0.014740506991552552


#### Precision Recall

Precision - this measures the proportion of recommended items that are relevant

Recall - this measures the proportion of relevant items that are recommended

In [18]:
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print(f"Precision @ {TOP_K}: {eval_precision} \n Recall @ {TOP_K}: {eval_recall}")

Precision @ 10: 0.01436541143654114 
 Recall @ 10: 0.01370616571450153
