In [8]:
import numpy as np
import cornac
from cornac.datasets import movielens, amazon_digital_music
from cornac.eval_methods import RatioSplit
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

from cornac.models import ItemKNN, MF, BPR, BiVAECF, NeuMF
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP

In [9]:
SEED = 1066
RS_TRAILS = 10

### Prepare datasets with train / test split


In [10]:
ml_1mm = movielens.load_feedback(variant="1m")

ml_rs = RatioSplit(
    data=ml_1mm, 
    test_size=0.15, 
    val_size=0.1, 
    seed=SEED, 
    rating_threshold = 4.0
)

amzn = amazon_digital_music.load_feedback()

amzn_rs = RatioSplit(
    data=amzn,
    test_size=0.15, 
    val_size=0.1, 
    seed=SEED, 
    rating_threshold = 4.0
)

## CHANGE THIS TO CHANGE THE DATASET
rs = ml_rs

### Instantiate models

In [11]:
itemknn = ItemKNN(
    k=20, 
    similarity="cosine", 
    seed=SEED
)

mf = MF(
    k=20, 
    learning_rate=0.01, 
    early_stop=True, 
    seed=SEED
)

bpr = BPR(
    k=20, 
    learning_rate=0.01, 
    seed=SEED
)

bivae = BiVAECF(
    k=10, 
    encoder_structure=[200], 
    n_epochs=400, 
    batch_size=100, 
    learning_rate=0.001,
    use_gpu=True,
    seed=SEED
)

### Define hyperparameter search spaces

In [12]:
gs_itemknn = GridSearch(
    model=itemknn,
    space=[
        Discrete('k', np.arange(6, 21, 2))
    ],
    metric=RMSE(),
    eval_method=rs
)

rs_mf = RandomSearch(
    model=mf,
    space=[
        Discrete('k', np.arange(50, 151, 10)),
        Continuous("learning_rate", low = 0.0001, high = 0.01)
    ],
    metric=RMSE(),
    eval_method=rs,
    n_trails=RS_TRAILS
)

rs_bpr = RandomSearch(
    model=bpr,
    space=[
        Discrete('k', np.arange(50, 151, 10)),
        Continuous("learning_rate", low = 0.0001, high = 0.01)
    ],
    metric=AUC(),
    eval_method=rs,
    n_trails=RS_TRAILS
)

rs_bivae = RandomSearch(
    model=bivae,
    space=[
        Discrete('k', np.array([10, 20, 30])),
        Discrete('batch_size', np.array([16, 32, 64, 128])),
        Continuous('learning_rate', low = 0.0001, high = 0.01)
    ],
    metric=RMSE(),
    eval_method=rs,
    n_trails=RS_TRAILS
)

### Run all experiments

In [13]:
experiments_all = cornac.Experiment(
    eval_method=rs,
    models=[
        gs_itemknn,
        rs_mf,
        rs_bpr,
        rs_bivae
    ],
    metrics = [MAE(), RMSE(), Precision(k=10), Recall(k=10), NDCG(k=10), AUC(), MAP()],
    verbose = True
)

In [14]:
experiments_all.run()

Evaluating: {'k': 6}


  0%|          | 0/3673 [00:00<?, ?it/s]

Evaluating: {'k': 8}


  0%|          | 0/3673 [00:00<?, ?it/s]

Evaluating: {'k': 10}


  0%|          | 0/3673 [00:00<?, ?it/s]

Evaluating: {'k': 12}


  0%|          | 0/3673 [00:00<?, ?it/s]

Evaluating: {'k': 14}


  0%|          | 0/3673 [00:00<?, ?it/s]

Evaluating: {'k': 16}


  0%|          | 0/3673 [00:00<?, ?it/s]

Evaluating: {'k': 18}


  0%|          | 0/3673 [00:00<?, ?it/s]

Evaluating: {'k': 20}


  0%|          | 0/3673 [00:00<?, ?it/s]

Best parameter settings: {'k': 20}
RMSE = 1.0123

VALIDATION:
...
                     |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Time (s)
-------------------- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------
GridSearch_ItemKNN   | 0.8012 | 0.9647 | 0.7436 | 0.0129 |  0.0059 |       0.0068 |    0.0046 | 286.3399
RandomSearch_MF      | 0.6969 | 0.8343 | 0.7882 | 0.0291 |  0.0383 |       0.0293 |    0.0382 |  32.3292
RandomSearch_BPR     | 1.7668 | 1.9728 | 0.9355 | 0.0681 |  0.0791 |       0.0533 |    0.0915 |  32.6402
RandomSearch_BiVAECF | 1.6557 | 1.9022 | 0.9326 | 0.0641 |  0.0737 |       0.0497 |    0.0869 |  24.6502

TEST:
...
                     |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
-------------------- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
GridSearch_ItemKNN   | 0.7914 | 0.9673 | 0.7401 | 0.0166 | 

When done: print out what the best hyperparameters were, predict on full matrix for lassonet

In [16]:
print('BiVAECF:', rs_bivae.best_params)
print('MF:', rs_mf.best_params)
print('BPR:', rs_bpr.best_params)
print('ItemKNN:', gs_itemknn.best_params)

BiVAECF: {'batch_size': 64, 'k': 30, 'learning_rate': 0.00253293728177303}
MF: {'k': 150, 'learning_rate': 0.008371656128287192}
BPR: {'k': 120, 'learning_rate': 0.009890968453212743}
ItemKNN: {'k': 20}
