## Comparison of models on movielens 1M dataset


In [21]:
import buffalo
from buffalo import ALS, BPRMF, CFR, ALSOption, BPRMFOption, CFROption 
from buffalo import MatrixMarketOptions, StreamOptions
from buffalo import aux, log
log.set_log_level(1) # set log level 3 or higher to check more information

In [22]:
als_opt = ALSOption().get_default_option()  
als_opt.num_workers = 4
als_opt.validation = aux.Option({'topk': 10})
als_opt.reg_u = 0.06
als_opt.reg_i = 0.06

bpr_opt = BPRMFOption().get_default_option()
bpr_opt.use_bias = False
bpr_opt.num_workers = 4
bpr_opt.validation = aux.Option({'topk': 10})
bpr_opt.reg_u = 0.02
bpr_opt.reg_j = bpr_opt.reg_i = 0.02
bpr_opt.reg_b = 0.1

cfr_opt = CFROption().get_default_option()
cfr_opt.num_workers = 4
cfr_opt.validation = aux.Option({'topk': 10})
cfr_opt.reg_u = 0.06
cfr_opt.reg_i = 0.06
cfr_opt.reg_c = 0.05

In [23]:
data_opt = MatrixMarketOptions().get_default_option()
data_opt.input.main = 'data/ml-1m/main.mtx'
data_opt.input.iid = 'data/ml-1m/iid'
data_opt.input.uid = 'data/ml-1m/uid'
data_opt.data.path = '.4_mm.h5py'
data_opt.data.validation.p = 0.1
data_opt.data.validation.max_samples = 10000

To use validation, `opt.validation` must be set like this
```python
    option.validation = aux.Option({'topk': 10}) # which metric will be used
                                                 # if topk is set to be 10, then NDCG@10, MAP@10 is calculated
    data_option.data.validation.p # fraction of the validation data
    data_option.data.validation.max_samples # the number of maximum validation data
```
the total number of validation samples is set to be $\text{min}(\text{nnz} * p, \text{max_samples})$

In [24]:
mm_data = buffalo.data.load(data_opt)
mm_data.create()

In [25]:
data_opt = StreamOptions().get_default_option()
data_opt.data.validation.name = "sample"
data_opt.data.sppmi = {"windows": 5, "k": 10}
data_opt.input.main = 'data/ml-1m/stream'
data_opt.input.uid = 'data/ml-1m/uid'
data_opt.input.iid = 'data/ml-1m/iid'
data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})
data_opt.data.path = '.4_stesam.h5py'
data_opt.data.internal_data_type = "matrix"
data_opt.data.validation.p = 0.1
data_opt.data.validation.max_samples = 10000

In [None]:
stream_data = buffalo.data.load(data_opt)
stream_data.create()

In [None]:
als = ALS(als_opt, data=mm_data)
als.initialize()

In [None]:
bpr = BPRMF(bpr_opt, data=mm_data)
bpr.initialize()

In [None]:
cfr = CFR(cfr_opt, data=stream_data)
cfr.initialize()

In [None]:
als_res = als.train()

In [None]:
bpr_res = bpr.train()

In [None]:
cfr_res = cfr.train()

In [None]:
cfr_res

In [None]:
als_res

In [None]:
bpr_res