In [2]:
"""USAGE: %(program)s PATH_TO_MOVIELENS_1M_DIR
"""

from datasets_mv.movielens import fetch_movielens
from flurs.recommender.fm import FMRecommender
from flurs.evaluator import Evaluator

import logging
import os
import sys
import pickle

data = fetch_movielens(data_home='datasets/ml-latest', size='latest')

n_batch_train = int(data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
n_batch_test = int(data.n_sample * 0.1)  # 10% for evaluation of pre-training
batch_tail = n_batch_train + n_batch_test
try:

    f = open('evaluator.pkl', 'rb')
    evaluator = pickle.load(f)
    f.close()
except FileNotFoundError:
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info('running %s' % ' '.join(sys.argv))

    logging.info('converting data into FluRS input object')


    logging.info('initialize recommendation model and evaluation module')
    rec = FMRecommender(p=sum(data.contexts.values()),  # number of dimensions of input vector
                        k=40,
                        l2_reg_w0=2.,
                        l2_reg_w=8.,
                        l2_reg_V=16.,
                        learn_rate=.004)
    rec.initialize()
    evaluator = Evaluator(rec, data.can_repeat)


    # pre-train
    # 20% for batch training | 10% for batch evaluate
    # after the batch training, 10% samples are used for incremental updating
    logging.info('batch pre-training before streaming input')
    evaluator.fit(
        data.samples[:n_batch_train],
        data.samples[n_batch_train:batch_tail],
        n_epoch=1  # single pass even for batch training
    )

    f = open('evaluator.pkl', 'wb')
    pickle.dump(evaluator, f)
    f.close()

Loading ratings.
Loading movies.
creating dataset
100000
3206
200000
3912
300000
4318
400000
4782
500000
5218
600000
5559
700000
5724
800000
5856
900000
6046
1000000
6199
1100000
6397
1200000
6612
1300000
6844
1400000
7075
1500000
7298
1600000
7587
1700000
8026
1800000
8576
1900000
8989
2000000
9361
2100000
9737
2200000
10135
2300000
10518
2400000
10922
2500000
11451
2600000
12018
2700000
12594
2800000
13013
2900000
13314
3000000
13817
3100000
14246
3200000
14680
3300000
15098
3400000
15442
3500000
15717
3600000
16108
3700000
16283
3800000
16288
3900000
16295
4000000
16305
4100000
16312
4200000
16319
4300000
16322
4400000
16325
4500000
16329
4600000
16334
4700000
16339
4800000
16341
4900000
16345


2018-05-26 23:16:16,912 : INFO : initialize recommendation model and evaluation module
2018-05-26 23:16:16,913 : INFO : batch pre-training before streaming input


In [3]:
# 70% incremental evaluation and updating
logging.info('incrementally predict, evaluate and update the recommender')
res = evaluator.evaluate(data.samples[batch_tail:])

In [4]:
i = 0
for r in res:
    if i > 100:
        break
    print(r)
    i = i + 1

(0.5546993380574596, 4191, 0.27080899999999986, 0.0843540000000047)
(0.5249181914665371, 2327, 0.2674260000000004, 0.0005330000000043356)
(0.6509093120101586, 3625, 0.1328189999999978, 0.0003180000000071459)
(0.4941958935790237, 395, 0.28833099999999945, 0.0005179999999995744)
(0.5474549723055692, 2672, 0.27903599999999074, 0.0004910000000108994)
(0.4953899942730341, 753, 0.3050959999999918, 0.0006350000000026057)
(0.6527158688856121, 3633, 0.13320700000001295, 0.00025800000000231194)
(0.5527675476742692, 969, 0.24676600000000803, 0.0004809999999935144)
(0.6495800320697502, 3447, 0.1355530000000016, 0.00040799999999308056)
(0.6452069899381694, 3884, 0.1366340000000008, 0.00024500000000671207)
(0.4600389552719856, 5662, 0.40829399999999794, 0.0010309999999975616)
(0.5698084684200708, 1003, 0.28228700000001083, 0.0005110000000030368)
(0.5094462172664997, 6074, 0.28418599999999117, 0.0007950000000107593)
(0.4806501056055422, 420, 0.4167580000000015, 0.0007429999999999382)
(0.4914736908018

In [49]:
from flurs.data.entity import User, Item, Event
import numpy as np
user = User(20, np.zeros(0))
evaluator.rec.recommend(user, np.array(range(7287)), [0 for x in range(0, 63)])
movies
    

(array([2933, 2728, 6592, ..., 7075, 6099, 4975]),
 array([0.67423447, 0.69953174, 0.69977516, ..., 0.8849211 , 0.88502663,
        0.88666584]))

In [22]:
data

{'can_repeat': False,
 'contexts': {'item': 18, 'others': 32, 'user': 0},
 'n_item': 3232,
 'n_sample': 226310,
 'n_user': 6014,
 'samples': [<flurs.data.entity.Event at 0x1512efacc0>,
  <flurs.data.entity.Event at 0x1512efad68>,
  <flurs.data.entity.Event at 0x1512efae10>,
  <flurs.data.entity.Event at 0x1512efaeb8>,
  <flurs.data.entity.Event at 0x1512efaf60>,
  <flurs.data.entity.Event at 0x1512efaa20>,
  <flurs.data.entity.Event at 0x1512efaba8>,
  <flurs.data.entity.Event at 0x110a309e8>,
  <flurs.data.entity.Event at 0x1512e634a8>,
  <flurs.data.entity.Event at 0x1512e65dd8>,
  <flurs.data.entity.Event at 0x1512e65e48>,
  <flurs.data.entity.Event at 0x1512e65b38>,
  <flurs.data.entity.Event at 0x1512e65e10>,
  <flurs.data.entity.Event at 0x1512e65d68>,
  <flurs.data.entity.Event at 0x1512e65ac8>,
  <flurs.data.entity.Event at 0x1512e65e80>,
  <flurs.data.entity.Event at 0x1517ef50b8>,
  <flurs.data.entity.Event at 0x1517ef5160>,
  <flurs.data.entity.Event at 0x1517ef5208>,
  <flu