In [1]:
import pandas as pd
import sys
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets import movielens
from recommenders.utils.notebook_utils import is_jupyter
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)
from sklearn import preprocessing
print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


System version: 3.8.8 (default, Apr 13 2021, 12:59:45) 
[Clang 10.0.0 ]
Pandas version: 1.2.4
Tensorflow version: 2.12.0


In [2]:
NCF_df = [pd.read_excel("NCF.xlsx", sheet_name=i).drop('Unnamed: 0', axis=1) for i in range(8)]

In [3]:
# top k items to recommend
TOP_K = 5

# Model parameters
EPOCHS = 10
BATCH_SIZE = 128
SEED = 42

In [4]:
train, test = python_chrono_split(
    NCF_df[1], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')


In [5]:
train

Unnamed: 0,userID,itemID,timestamp,rating
0,0,36,36,2
1,1,63,63,1
2,2,9,9,1
3,3,53,53,1
4,4,48,48,1
...,...,...,...,...
26386,24180,45,45,1
26387,24181,45,45,1
26388,24182,56,56,1
26389,24183,63,63,1


In [6]:
data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...


In [7]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [8]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [279.45s]: train_loss = 0.029267 


Took 2992.6555 seconds for training.


In [9]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 11.2605 seconds for prediction.


In [10]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.034399
NDCG:	0.044918
Precision@K:	0.015503
Recall@K:	0.077516


In [11]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
24185,0,63,,1.436304e-04
24186,0,9,,5.378236e-10
24187,0,53,,1.273338e-06
24188,0,48,,2.597639e-12
24189,0,44,,2.544109e-08
...,...,...,...,...
1958980,24184,0,,1.029917e-07
1958981,24184,71,,5.794173e-08
1958982,24184,77,,5.782405e-08
1958983,24184,72,,8.802663e-08


In [12]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [13]:
top5[top5.userID == 8]

Unnamed: 0,userID,itemID,timestamp,prediction
24827,8,53,,0.666886
24851,8,22,,0.616729
24852,8,33,,0.548691
24833,8,82,,0.017115
24839,8,51,,0.009447


In [14]:
train, test = python_chrono_split(
    NCF_df[0], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')

In [15]:
data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...


In [16]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [17]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [68.02s]: train_loss = 0.017688 


Took 690.2720 seconds for training.


In [18]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 5.3882 seconds for prediction.


In [19]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.324528
NDCG:	0.371159
Precision@K:	0.101734
Recall@K:	0.508671


In [20]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
12566,0,38,,1.363026e-02
12567,0,50,,2.288455e-01
12568,0,11,,3.328384e-01
12569,0,28,,1.648446e-04
12570,0,29,,6.871377e-04
...,...,...,...,...
540204,12562,17,,4.848926e-13
540205,12562,14,,1.112624e-13
540206,12562,18,,5.195743e-13
540207,12562,48,,6.714768e-13


In [21]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [22]:
top5[top5.userID == 1306]

Unnamed: 0,userID,itemID,timestamp,prediction
67420,1306,11,,0.109994
67418,1306,27,,0.006429
67419,1306,38,,0.000655
67427,1306,32,,5e-06
67422,1306,29,,5e-06


In [23]:
train, test = python_chrono_split(
    NCF_df[2], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')

In [24]:
data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...


In [25]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [26]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [19.68s]: train_loss = 0.042252 


Took 201.6655 seconds for training.


In [27]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 2.4746 seconds for prediction.


In [28]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.062957
NDCG:	0.084698
Precision@K:	0.030323
Recall@K:	0.151613


In [29]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
6356,0,37,,2.012565e-01
6357,0,4,,4.547384e-04
6358,0,39,,2.317167e-03
6359,0,35,,1.392983e-01
6360,0,36,,7.737205e-02
...,...,...,...,...
228811,6355,30,,1.669608e-11
228812,6355,29,,4.823942e-11
228813,6355,33,,1.810621e-11
228814,6355,28,,1.771045e-11


In [30]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [31]:
top5[top5.userID == 355]

Unnamed: 0,userID,itemID,timestamp,prediction
18781,355,6,,0.348032
18782,355,4,,0.038598
18784,355,35,,0.008042
18783,355,39,,0.006427
18785,355,36,,0.002883


In [32]:
train, test = python_chrono_split(
    NCF_df[3], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')

data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [23.95s]: train_loss = 0.018135 


Took 213.4112 seconds for training.
Took 2.4336 seconds for prediction.
MAP:	0.147240
NDCG:	0.172538
Precision@K:	0.050000
Recall@K:	0.250000


In [33]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
6333,0,7,,6.556437e-03
6334,0,10,,2.210625e-02
6335,0,12,,1.073632e-03
6336,0,1,,5.952925e-06
6337,0,14,,9.799859e-07
...,...,...,...,...
145608,6330,9,,1.684664e-08
145609,6330,17,,4.239941e-08
145610,6330,20,,1.998342e-08
145611,6330,0,,2.147337e-08


In [34]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [35]:
top5[top5.userID == 6478]

Unnamed: 0,userID,itemID,timestamp,prediction


In [36]:
train, test = python_chrono_split(
    NCF_df[4], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')

data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [11.70s]: train_loss = 0.173798 


Took 118.0367 seconds for training.
Took 2.2226 seconds for prediction.
MAP:	0.051326
NDCG:	0.066015
Precision@K:	0.022509
Recall@K:	0.109994


In [37]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
6399,0,8,,0.003368
6400,0,76,,0.045470
6401,0,78,,0.026213
6402,0,33,,0.003280
6403,0,12,,0.000275
...,...,...,...,...
410811,4610,18,,0.001064
410812,4610,77,,0.000355
410813,4610,83,,0.000473
410814,4610,22,,0.002111


In [38]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [39]:
top5[top5.userID == 6724]

Unnamed: 0,userID,itemID,timestamp,prediction


In [40]:
train, test = python_chrono_split(
    NCF_df[5], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')

data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [26.96s]: train_loss = 0.014887 


Took 278.3583 seconds for training.
Took 2.9653 seconds for prediction.
MAP:	0.169959
NDCG:	0.186496
Precision@K:	0.047251
Recall@K:	0.236253


In [41]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
7720,0,37,,2.944159e-03
7721,0,36,,9.060510e-09
7722,0,40,,8.898050e-02
7723,0,33,,6.909954e-07
7724,0,20,,3.957189e-07
...,...,...,...,...
293355,7719,7,,6.629518e-09
293356,7719,34,,3.089470e-09
293357,7719,29,,2.848238e-09
293358,7719,25,,2.797665e-09


In [42]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [43]:
top5[top5.userID == 2084]

Unnamed: 0,userID,itemID,timestamp,prediction
84830,2084,40,,0.04555297
84828,2084,37,,0.01685525
84831,2084,33,,1.627365e-06
84832,2084,20,,1.536404e-06
84839,2084,13,,9.962342e-07


In [44]:
train, test = python_chrono_split(
    NCF_df[6], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')

data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [11.63s]: train_loss = 0.068853 


Took 116.1422 seconds for training.
Took 1.8436 seconds for prediction.
MAP:	0.082842
NDCG:	0.105001
Precision@K:	0.034808
Recall@K:	0.174041


In [45]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
4531,0,14,,0.082389
4532,0,46,,0.810754
4533,0,39,,0.000471
4534,0,29,,0.013829
4535,0,7,,0.002080
...,...,...,...,...
208375,4529,23,,0.000258
208376,4529,33,,0.000268
208377,4529,17,,0.000265
208378,4529,13,,0.000220


In [46]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [47]:
top5[top5.userID == 6908]

Unnamed: 0,userID,itemID,timestamp,prediction


In [48]:
train, test = python_chrono_split(
    NCF_df[7], ratio=0.72, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

train.to_csv("train.csv", encoding='utf-8')
test.to_csv("test.csv", encoding='utf-8')

data = NCFDataset(train_file = 'train.csv', test_file = 'test.csv', seed=SEED)

model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

INFO:recommenders.models.ncf.dataset:Indexing train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test.csv ...
INFO:recommenders.models.ncf.dataset:Indexing test_full.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [157.73s]: train_loss = 0.001342 


Took 1614.2818 seconds for training.
Took 6.8170 seconds for prediction.
MAP:	0.277260
NDCG:	0.329661
Precision@K:	0.096271
Recall@K:	0.481356


In [49]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
18430,0,13,,7.231650e-09
18431,0,15,,6.359312e-04
18432,0,11,,5.850212e-04
18433,0,10,,2.998461e-14
18434,0,18,,1.320626e-09
...,...,...,...,...
313305,18429,5,,3.181315e-16
313306,18429,6,,7.262157e-16
313307,18429,7,,3.400473e-16
313308,18429,3,,9.504721e-16


In [50]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [53]:
top5[top5.userID == 12649]

Unnamed: 0,userID,itemID,timestamp,prediction
220819,12649,18,,0.936236
220816,12649,15,,0.041417
220823,12649,1,,0.014854
220822,12649,9,,0.003671
220815,12649,13,,0.002554


In [75]:
train[train.userID == 12649] 

Unnamed: 0,userID,itemID,timestamp,rating
13080,12649,4,4,1
