## Comparision of models on movielens 1M dataset


In [1]:
from buffalo.algo.als import ALS
from buffalo.algo.bpr import BPRMF
from buffalo.algo.cfr import CFR
from buffalo.algo.options import ALSOption
from buffalo.algo.options import BPRMFOption
from buffalo.algo.options import CFROption
import buffalo.data
from buffalo.data.mm import MatrixMarketOptions
from buffalo.data.stream import StreamOptions
from buffalo.misc import aux
from buffalo.misc import log
log.set_log_level(1) # set log level 3 or higher to check more information

In [2]:
als_opt = ALSOption().get_default_option()  
als_opt.num_workers = 4
als_opt.validation = aux.Option({'topk': 10})
als_opt.reg_u = 0.06
als_opt.reg_i = 0.06

bpr_opt = BPRMFOption().get_default_option()
bpr_opt.use_bias = False
bpr_opt.num_workers = 4
bpr_opt.validation = aux.Option({'topk': 10})
bpr_opt.reg_u = 0.02
bpr_opt.reg_j = bpr_opt.reg_i = 0.02
bpr_opt.reg_b = 0.1

cfr_opt = CFROption().get_default_option()
cfr_opt.num_workers = 4
cfr_opt.validation = aux.Option({'topk': 10})
cfr_opt.reg_u = 0.06
cfr_opt.reg_i = 0.06
cfr_opt.reg_c = 0.05

In [3]:
data_opt = MatrixMarketOptions().get_default_option()
data_opt.input.main = 'data/ml-1m/main.mtx'
data_opt.input.iid = 'data/ml-1m/iid'
data_opt.input.uid = 'data/ml-1m/uid'
data_opt.data.path = '.4_mm.h5py'
data_opt.data.validation.p = 0.1
data_opt.data.validation.max_samples = 10000

To use validation, `opt.validation` must be set like this
```python
    option.validation = aux.Option({'topk': 10}) # which metric will be used
                                                 # if topk is set to be 10, then NDCG@10, MAP@10 is calculated
    data_option.data.validation.p # fraction of the validation data
    data_option.data.validation.max_samples # the number of maximum validation data
```
the total number of validation samples is set to be $\text{min}(\text{nnz} * p, \text{max_samples})$

In [4]:
mm_data = buffalo.data.load(data_opt)
mm_data.create()

In [5]:
data_opt = StreamOptions().get_default_option()
data_opt.data.validation.name = "sample"
data_opt.data.sppmi = {"windows": 5, "k": 10}
data_opt.input.main = 'data/ml-1m/stream'
data_opt.input.uid = 'data/ml-1m/uid'
data_opt.input.iid = 'data/ml-1m/iid'
data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})
data_opt.data.path = '.4_stesam.h5py'
data_opt.data.internal_data_type = "matrix"
data_opt.data.validation.p = 0.1
data_opt.data.validation.max_samples = 10000

In [6]:
stream_data = buffalo.data.load(data_opt)
stream_data.create()

In [7]:
als = ALS(als_opt, data=mm_data)
als.initialize()

In [8]:
bpr = BPRMF(bpr_opt, data=mm_data)
bpr.initialize()

In [9]:
cfr = CFR(cfr_opt, data=stream_data)
cfr.initialize()

In [10]:
als_res = als.train()

In [11]:
bpr_res = bpr.train()

In [12]:
cfr_res = cfr.train()

In [13]:
cfr_res

{'train_loss': 0.0,
 'vali_ndcg': 0.10699514576320421,
 'vali_map': 0.06417434804366856,
 'vali_accuracy': 0.16763943065105189,
 'vali_rmse': 0.3810662058383934,
 'vali_error': 0.2963493851780891}

In [14]:
als_res

{'train_loss': 0.2806028804154141,
 'val_ndcg': 0.08137682343853958,
 'val_map': 0.04811094610256136,
 'val_accuracy': 0.13730048306770012,
 'val_rmse': 2.9210035418392146,
 'val_error': 2.7068358163237574}

In [15]:
bpr_res

{'train_loss': 0.0,
 'val_ndcg': 0.05350434864011935,
 'val_map': 0.032103983029096554,
 'val_accuracy': 0.07715742633439586,
 'val_rmse': 2.788766983419126,
 'val_error': 2.520328412902355}