In [4]:
!svn checkout https://github.com/microsoft/recommenders/trunk/reco_utils

A    reco_utils/README.md
A    reco_utils/__init__.py
A    reco_utils/azureml
A    reco_utils/azureml/__init__.py
A    reco_utils/azureml/aks_utils.py
A    reco_utils/azureml/azureml_designer_modules
A    reco_utils/azureml/azureml_designer_modules/entries
A    reco_utils/azureml/azureml_designer_modules/entries/map_entry.py
A    reco_utils/azureml/azureml_designer_modules/entries/ndcg_entry.py
A    reco_utils/azureml/azureml_designer_modules/entries/precision_at_k_entry.py
A    reco_utils/azureml/azureml_designer_modules/entries/recall_at_k_entry.py
A    reco_utils/azureml/azureml_designer_modules/entries/score_sar_entry.py
A    reco_utils/azureml/azureml_designer_modules/entries/stratified_splitter_entry.py
A    reco_utils/azureml/azureml_designer_modules/entries/train_sar_entry.py
A    reco_utils/azureml/azureml_designer_modules/module_specs
A    reco_utils/azureml/azureml_designer_modules/module_specs/map.yaml
A    reco_utils/azureml/azureml_designer_modules/module_specs/ndcg.yaml


In [3]:
import sys
import time
import pandas as pd

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

from reco_utils.recommender.ncf.ncf_singlenode import NCF
from reco_utils.recommender.ncf.dataset import Dataset as NCFDataset
from reco_utils.dataset import movielens
from reco_utils.common.notebook_utils import is_jupyter
from reco_utils.dataset.python_splitters import python_chrono_split
from reco_utils.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                               recall_at_k, get_top_k_items)

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.7.6 (default, Dec 19 2019, 23:50:13) 
[GCC 7.4.0]
Pandas version: 0.25.3
Tensorflow version: 1.15.2


In [4]:
tf.logging.set_verbosity(tf.logging.ERROR)

In [5]:
# top k items to recommend
TOP_K = 10

# Model parameters
EPOCHS = 5
BATCH_SIZE = 256

SEED = 42

In [6]:
df = pd.read_csv('../Data/ratings.csv', names = ['userID', 'itemID', 'rating'])

In [7]:
df = df[:50000]

In [8]:
print(len(df['itemID'].unique()))
print(len(df['userID'].unique()))

27042
325


In [9]:
df['timestamp'] = range(50000)

In [10]:
train, test = python_chrono_split(df, 0.75)

In [11]:
data = NCFDataset(train=train, test=test, seed=SEED)

In [12]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=1,
    seed=SEED
)

In [13]:
start_time = time.time()

model.fit(data)

train_time = time.time() - start_time

print("Took {} seconds for training.".format(train_time))

Epoch 1 [7.49s]: train_loss = 0.513784 
Epoch 2 [7.59s]: train_loss = 0.448289 
Epoch 3 [7.39s]: train_loss = 0.385378 
Epoch 4 [7.67s]: train_loss = 0.334699 
Epoch 5 [7.30s]: train_loss = 0.298456 
Took 37.43812942504883 seconds for training.


In [14]:
start_time = time.time()

users, items, preds = [], [], []
item = list(train.itemID.unique())
for user in train.userID.unique():
    user = [user] * len(item) 
    users.extend(user)
    items.extend(item)
    preds.extend(list(model.predict(user, item, is_list=True)))

all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

test_time = time.time() - start_time
print("Took {} seconds for prediction.".format(test_time))

Took 9.038759469985962 seconds for prediction.


In [15]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.000920
NDCG:	0.006844
Precision@K:	0.007077
Recall@K:	0.004493
