<h1>Neural Collaborative Filtering</h1>

In [1]:
import sys
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets import movielens
from recommenders.utils.notebook_utils import is_jupyter
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:48:25) 
[Clang 14.0.6 ]
Pandas version: 1.5.3
Tensorflow version: 2.11.0


In [2]:
tf.config.threading.set_inter_op_parallelism_threads(6)

In [4]:
# top k items to recommend
TOP_K = 100

# Model parameters
EPOCHS = 50
BATCH_SIZE = 32

SEED = 42

<h1>Load the MovieLens dataset</h1>

In [5]:
train_file = "../data/sas/train.csv"
test_file = "../data/sas/test.csv"

train = pd.read_csv(train_file)
test  = pd.read_csv(test_file)

In [6]:
data = NCFDataset(train_file=train_file, test_file=test_file, seed=SEED)

INFO:recommenders.models.ncf.dataset:Indexing ../data/sas/train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing ../data/sas/test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing ../data/sas/test_full.csv ...


<h1>Train the NCF model on the training data, and get the top-k recommendations for our testing data</h1>

- NCF accepts implicit feedback and generates prospensity of items to be recommended to users in the scale of 0 to 1
- A recommended item list can then be generated based on the scores

In [7]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=3e-3,
    verbose=1,
    seed=SEED
)

2023-03-18 23:05:30.115973: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-03-18 23:05:30.126313: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [8]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [17.67s]: train_loss = 0.400070 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [18.73s]: train_loss = 0.354878 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [20.18s]: train_loss = 0.327123 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [20.51s]: train_loss = 0.306472 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [21.77s]: train_loss = 0.291558 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [20.49s]: train_loss = 0.280598 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [23.40s]: train_loss = 0.271243 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [26.27s]: train_loss = 0.265039 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [22.00s]: train_loss = 0.260578 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [24.28s]: train_loss = 0.255399 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 11 [22.95s]: train_loss = 0.251603 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 12 [24.62s]: 

Took 1025.6395 seconds for training.


In [9]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 37.8891 seconds for prediction.


<h1>Evaluate how well NCF performs</h1>

In [10]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.061344
NDCG:	0.120762
Precision@K:	0.008978
Recall@K:	0.207981


In [16]:
if not is_jupyter():
    # Record results with papermill for tests
    import papermill as pm
    import scrapbook as sb
    sb.glue("map", eval_map)
    sb.glue("ndcg", eval_ndcg)
    sb.glue("precision", eval_precision)
    sb.glue("recall", eval_recall)
    sb.glue("train_time", train_time.interval)
    sb.glue("test_time", test_time.interval)