In [20]:
import sys
sys.path.append("../")
import os
import torch
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
import numpy as np
from reco_utils.dataset import movielens
from reco_utils.dataset.python_splitters import python_random_split
from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from reco_utils.recommender.cornac.cornac_utils import predict_ranking, predict
from reco_utils.common.timer import Timer
from reco_utils.common.constants import SEED

print("System version: {}".format(sys.version))
print("PyTorch version: {}".format(torch.__version__))
print("Cornac version: {}".format(cornac.__version__))

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll.base import scope
from hyperopt.pyll.stochastic import sample

System version: 3.6.11 | packaged by conda-forge | (default, Nov 27 2020, 18:57:37) 
[GCC 9.3.0]
PyTorch version: 1.4.0
Cornac version: 1.12.0


### Load data from our saved .parquet

**To work with models implemented in Cornac, we need to construct an object from Dataset class.** (!!!)

In [2]:
SEED = 1066

In [3]:
train = pd.read_parquet('../data/train1m.parquet').drop('index', 1)
val = pd.read_parquet('../data/val1m.parquet').drop('index', 1)
test = pd.read_parquet('../data/test1m.parquet').drop('index', 1)

def to_cornac_Dataset(x):
    return cornac.data.Dataset.from_uir(x.itertuples(index=False))

c_train = to_cornac_Dataset(train)
c_val = to_cornac_Dataset(val)
c_test = to_cornac_Dataset(test)

c_train = cornac.data.Dataset.from_uir(train.itertuples(index=False))


In [4]:
full = pd.read_feather('../data/full_indices1mk.feather').drop('index', 1)
i_100k = pd.read_feather('../data/full_indices100k.feather').drop('index', 1)

### Train using selected hyperparameters

In [5]:
bivae = cornac.models.BiVAECF(
    k=30,
    encoder_structure=[200],
    act_fn='tanh',
    likelihood='pois',
    n_epochs=400,
    batch_size=64,
    learning_rate=0.00253293728177303,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)

bivae.fit(c_train)

  0%|          | 0/400 [00:00<?, ?it/s]

<cornac.models.bivaecf.recom_bivaecf.BiVAECF at 0x7ff6bd033e10>

In [6]:
full_pred_bivae = predict(bivae, full, usercol='userID', itemcol='itemID')
i_100k_pred_bivae = predict(bivae, i_100k, usercol='userID', itemcol='itemID')

In [7]:
bpr = cornac.models.BPR(
    k=120, 
    learning_rate=0.009890968453212743, 
    seed=SEED,
    verbose=True
)

bpr.fit(c_train)

  0%|          | 0/100 [00:00<?, ?it/s]

Optimization finished!


<cornac.models.bpr.recom_bpr.BPR at 0x7ff6a2a97e80>

In [8]:
full_pred_bpr = predict(bpr, full, usercol='userID', itemcol='itemID')
i_100k_pred_bpr = predict(bpr, i_100k, usercol='userID', itemcol='itemID')

In [16]:
## fix the resulting datasets and export

full_pred_bivae['prediction'] = [x[0] for x in full_pred_bivae['prediction']]

In [22]:
full_pred_bpr['prediction'] = full_pred_bpr['prediction'].astype(np.float64)

In [24]:
print(full_pred_bivae.dtypes)
print(full_pred_bpr.dtypes)

userID          int64
itemID          int64
prediction    float64
dtype: object
userID          int64
itemID          int64
prediction    float64
dtype: object


In [25]:
full_pred_bivae.reset_index().to_feather('../data/full_pred_bivae_1m.feather')
full_pred_bpr.reset_index().to_feather('../data/full_pred_bpr_1m.feather')
i_100k_pred_bivae.reset_index().to_feather('../data/full_pred_bivae_100k.feather')
i_100k_pred_bpr.reset_index().to_feather('../data/full_pred_bpr_100k.feather')
