In [7]:
!pip install scrapbook

Collecting scrapbook
  Downloading scrapbook-0.5.0-py3-none-any.whl (34 kB)
Installing collected packages: scrapbook
Successfully installed scrapbook-0.5.0


In [3]:
!pip install cornac

Collecting cornac
  Downloading cornac-1.14.1-cp37-cp37m-manylinux1_x86_64.whl (12.4 MB)
[K     |████████████████████████████████| 12.4 MB 5.3 MB/s 
[?25hCollecting powerlaw
  Downloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw, cornac
Successfully installed cornac-1.14.1 powerlaw-1.5


In [5]:
pip install papermill

Collecting papermill
  Downloading papermill-2.3.4-py3-none-any.whl (37 kB)
Collecting ansiwrap
  Downloading ansiwrap-0.8.4-py2.py3-none-any.whl (8.5 kB)
Collecting jupyter-client>=6.1.5
  Downloading jupyter_client-7.1.2-py3-none-any.whl (130 kB)
[K     |████████████████████████████████| 130 kB 9.2 MB/s 
Collecting textwrap3>=0.9.2
  Downloading textwrap3-0.9.2-py2.py3-none-any.whl (12 kB)
Installing collected packages: textwrap3, jupyter-client, ansiwrap, papermill
  Attempting uninstall: jupyter-client
    Found existing installation: jupyter-client 5.3.5
    Uninstalling jupyter-client-5.3.5:
      Successfully uninstalled jupyter-client-5.3.5
Successfully installed ansiwrap-0.8.4 jupyter-client-7.1.2 papermill-2.3.4 textwrap3-0.9.2


In [11]:
!pip install recommenders


Collecting recommenders
  Downloading recommenders-1.0.0-py3-none-manylinux1_x86_64.whl (318 kB)
[K     |████████████████████████████████| 318 kB 5.3 MB/s 
Collecting pyyaml<6,>=5.4.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 70.2 MB/s 
Collecting pydocumentdb>=2.3.3<3
  Downloading pydocumentdb-2.3.5-py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 2.4 MB/s 
[?25hCollecting category-encoders<2,>=1.3.0
  Downloading category_encoders-1.3.0-py2.py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 5.5 MB/s 
[?25hCollecting transformers<5,>=2.5.0
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 52.2 MB/s 
Collecting memory-profiler<1,>=0.54.0
  Downloading memory_profiler-0.60.0.tar.gz (38 kB)
Collecting pandera[strategies]>=0.6.5
  Downloading pandera-0.9.0-py3-none-any.whl (197 kB)
[K     |████████████

<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>

<i>Licensed under the MIT License.</i>

## 0 Global Settings and Imports

In [21]:
import sys
import os
import torch
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
print("PyTorch version: {}".format(torch.__version__))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.7.12 (default, Jan 15 2022, 18:48:18) 
[GCC 7.5.0]
PyTorch version: 1.10.0+cu111
Cornac version: 1.14.1


In [14]:
# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

# top k items to recommend
TOP_K = 10

# Model parameters
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
NUM_EPOCHS = 500
BATCH_SIZE = 128
LEARNING_RATE = 0.001

**Load and split data**

In [15]:
data = movielens.load_pandas_df(
    size=MOVIELENS_DATA_SIZE,
    header=["userID", "itemID", "rating"]
)

data.head()


100%|██████████| 4.81k/4.81k [00:00<00:00, 27.5kKB/s]


Unnamed: 0,userID,itemID,rating
0,196,242,3.0
1,186,302,3.0
2,22,377,1.0
3,244,51,2.0
4,166,346,1.0


In [16]:
data = movielens.load_pandas_df(
    size=MOVIELENS_DATA_SIZE,
    
)

data.head()
# type(data)

100%|██████████| 4.81k/4.81k [00:00<00:00, 27.5kKB/s]


Unnamed: 0,userID,itemID,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [22]:
train, test = python_random_split(data, 0.75)

In [23]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 943
Number of items: 1642


 **Training the BiVAE model**

In [24]:
bivae = cornac.models.BiVAECF(
    k=LATENT_DIM,
    encoder_structure=ENCODER_DIMS,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)

with Timer() as t:
    bivae.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/500 [00:00<?, ?it/s]

Took 132.5962 seconds for training.


**Prediction and Evaluation**

In [25]:
with Timer() as t:
    all_predictions = predict_ranking(bivae, train, usercol='userID', itemcol='itemID', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 1.5201 seconds for prediction.


In [26]:
data_811 = all_predictions[all_predictions.userID == 811]

# data_811.sort(by="prediction")
data_811.sort_values(by="prediction", ascending=False)



Unnamed: 0,userID,itemID,prediction
75038,811,300,8.895675e-01
75279,811,313,8.315240e-01
75343,811,328,6.250576e-01
75093,811,288,5.828511e-01
75374,811,259,4.568913e-01
...,...,...,...
76344,811,1108,1.973982e-07
76169,811,1421,1.835117e-07
76458,811,793,1.768580e-07
76323,811,1445,1.622060e-07


In [27]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.144506
NDCG:	0.470748
Precision@K:	0.411890
Recall@K:	0.227210


In [28]:
# Record results with papermill for tests
sb.glue("map", eval_map)
sb.glue("ndcg", eval_ndcg)
sb.glue("precision", eval_precision)
sb.glue("recall", eval_recall)