In [2]:
conda activate reco_base


Note: you may need to restart the kernel to use updated packages.


In [3]:
cd D:\Algorithmic Marketing\Assignment 3\recommenders

D:\Algorithmic Marketing\Assignment 3\recommenders


In [1]:
import sys
sys.path.append("../../")
import os
import papermill as pm
import pandas as pd
import numpy as np
import tensorflow as tf
from reco_utils.common.timer import Timer
from reco_utils.recommender.deeprec.models.graphrec.lightgcn import LightGCN
from reco_utils.recommender.deeprec.DataModel.ImplicitCF import ImplicitCF
from reco_utils.dataset import movielens
from reco_utils.dataset.python_splitters import python_stratified_split
from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from reco_utils.common.constants import SEED as DEFAULT_SEED
from reco_utils.recommender.deeprec.deeprec_utils import prepare_hparams


print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
Pandas version: 0.25.3
Tensorflow version: 2.2.0


In [7]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

# Model parameters
EPOCHS = 50
BATCH_SIZE = 1024

SEED = DEFAULT_SEED  # Set None for non-deterministic results

yaml_file = "D:/Algorithmic Marketing/Assignment 3/recommenders/reco_utils/recommender/deeprec/config/lightgcn.yaml"
user_file = "../../tests/resources/deeprec/lightgcn/user_embeddings.csv"
item_file = "../../tests/resources/deeprec/lightgcn/item_embeddings.csv"

In [8]:
df = movielens.load_pandas_df(size=MOVIELENS_DATA_SIZE)

100%|██████████| 4.81k/4.81k [00:00<00:00, 8.39kKB/s]


In [9]:
df.head()

Unnamed: 0,userID,itemID,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [10]:
train, test = python_stratified_split(df, ratio=0.75)

In [11]:
data = ImplicitCF(train=train, test=test, seed=SEED)

In [12]:
hparams = prepare_hparams(yaml_file,
                          n_layers=3,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          learning_rate=0.005,
                          eval_epoch=5,
                          top_k=TOP_K,
                         )

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [13]:
model = LightGCN(hparams, data, seed=SEED)


Already create adjacency matrix.
Already normalize adjacency matrix.

Using xavier initialization.




  d_inv = np.power(rowsum, -0.5).flatten()









In [14]:
with Timer() as train_time:
    model.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)5.0s: train loss = 0.47059 = (mf)0.47034 + (embed)0.00025
Epoch 2 (train)4.1s: train loss = 0.27253 = (mf)0.27182 + (embed)0.00070
Epoch 3 (train)4.1s: train loss = 0.24397 = (mf)0.24307 + (embed)0.00089
Epoch 4 (train)4.1s: train loss = 0.22951 = (mf)0.22845 + (embed)0.00106
Epoch 5 (train)4.4s + (eval)0.4s: train loss = 0.22529 = (mf)0.22412 + (embed)0.00117, recall = 0.16052, ndcg = 0.34971, precision = 0.30445, map = 0.09283
Epoch 6 (train)4.2s: train loss = 0.21715 = (mf)0.21588 + (embed)0.00127
Epoch 7 (train)4.2s: train loss = 0.20724 = (mf)0.20586 + (embed)0.00138
Epoch 8 (train)4.1s: train loss = 0.19893 = (mf)0.19742 + (embed)0.00152
Epoch 9 (train)4.2s: train loss = 0.18752 = (mf)0.18587 + (embed)0.00165
Epoch 10 (train)4.3s + (eval)0.3s: train loss = 0.18446 = (mf)0.18265 + (embed)0.00181, recall = 0.17741, ndcg = 0.38339, precision = 0.33499, map = 0.10538
Epoch 11 (train)4.5s: train loss = 0.17381 = (mf)0.17186 + (embed)0.00195
Epoch 12 (train)4.2s: train l

In [15]:
topk_scores = model.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

topk_scores.head()

Unnamed: 0,userID,itemID,prediction
0,1,7,5.61661
1,1,89,5.236757
2,1,475,5.16939
3,1,210,5.104498
4,1,127,5.09701


In [16]:
eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.136253
NDCG:	0.454250
Precision@K:	0.398621
Recall@K:	0.213418


In [25]:
import scrapbook as sb
sb.glue("map", eval_map)

AttributeError: module 'scrapbook' has no attribute 'glue'

In [23]:
pm.record("map", eval_map)
pm.record("ndcg", eval_ndcg)
pm.record("precision", eval_precision)
pm.record("recall", eval_recall)

AttributeError: module 'papermill' has no attribute 'record'

In [22]:
import scrapbook as sb

sb.glue("hello", "world")
sb.glue("number", 123)
sb.glue("some_list", [1, 3, 5])
sb.glue("some_dict", {"a": 1, "b": 2})
sb.glue("non_json", df, 'arrow')


AttributeError: module 'scrapbook' has no attribute 'glue'

In [24]:
model.infer_embedding(user_file, item_file)