In [2]:
"""USAGE: %(program)s PATH_TO_MOVIELENS_1M_DIR
"""

from datasets_mv.movielens import fetch_movielens
from flurs.recommender.fm import FMRecommender
from flurs.evaluator import Evaluator

import logging
import os
import sys
import pickle

data = fetch_movielens(data_home='datasets/ml-latest', size='latest')

n_batch_train = int(data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
n_batch_test = int(data.n_sample * 0.1)  # 10% for evaluation of pre-training
batch_tail = n_batch_train + n_batch_test
try:

    f = open('evaluator.pkl', 'rb')
    evaluator = pickle.load(f)
    f.close()
except FileNotFoundError:
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info('running %s' % ' '.join(sys.argv))

    logging.info('converting data into FluRS input object')


    logging.info('initialize recommendation model and evaluation module')
    rec = FMRecommender(p=sum(data.contexts.values()),  # number of dimensions of input vector
                        k=40,
                        l2_reg_w0=2.,
                        l2_reg_w=8.,
                        l2_reg_V=16.,
                        learn_rate=.004)
    rec.initialize()
    evaluator = Evaluator(rec, data.can_repeat)


    # pre-train
    # 20% for batch training | 10% for batch evaluate
    # after the batch training, 10% samples are used for incremental updating
    logging.info('batch pre-training before streaming input')
    evaluator.fit(
        data.samples[:n_batch_train],
        data.samples[n_batch_train:batch_tail],
        n_epoch=1  # single pass even for batch training
    )

    f = open('evaluator.pkl', 'wb')
    pickle.dump(evaluator, f)
    f.close()

Loading ratings.
Loading movies.
creating dataset
100000
3206
200000
3912
300000
4318
400000
4782
500000
5218
600000
5559
700000
5724
800000
5856
900000
6046
1000000
6199
1100000
6397
1200000
6612
1300000
6844
1400000
7075
1500000
7298
1600000
7587
1700000
8026
1800000
8576
1900000
8989
2000000
9361
2100000
9737
2200000
10135
2300000
10518
2400000
10922
2500000
11451
2600000
12018
2700000
12594
2800000
13013
2900000
13314
3000000
13817
3100000
14246
3200000
14680
3300000
15098
3400000
15442
3500000
15717
3600000
16108
3700000
16283
3800000
16288
3900000
16295
4000000
16305
4100000
16312
4200000
16319
4300000
16322
4400000
16325
4500000
16329
4600000
16334
4700000
16339
4800000
16341
4900000
16345


2018-05-26 23:16:16,912 : INFO : initialize recommendation model and evaluation module
2018-05-26 23:16:16,913 : INFO : batch pre-training before streaming input


In [3]:
# 70% incremental evaluation and updating
logging.info('incrementally predict, evaluate and update the recommender')
res = evaluator.evaluate(data.samples[batch_tail:])

In [81]:
i = 0
for r in res:
    if i > 100:
        break
    print(r)
    i = i + 1

(0.5768150621903636, 2375, 0.24264900000000011, 0.0005480000000090968)
(0.552731038649128, 5459, 0.2200019999999938, 0.0006070000000022446)
(0.4816653893306819, 3642, 0.3761570000000063, 0.0005380000000059226)
(0.5231092597839542, 1226, 0.2630789999999763, 0.0006790000000194141)
(0.46077896772388827, 1889, 0.3452270000000226, 0.0006309999999984939)
(0.6488771728684928, 51, 0.1391789999999844, 0.0004529999999931533)


KeyboardInterrupt: 

In [67]:
from flurs.data.entity import User, Item, Event
import numpy as np
user = User(20, np.zeros(0))
evaluator.rec.recommend(user, np.array(list(set(evaluator.item_buffer))), [0 for x in range(0, 63)])

(array([2933, 2728, 6592, ..., 7075, 6099, 4975]),
 array([0.67423447, 0.69953174, 0.69977516, ..., 0.8849211 , 0.88502663,
        0.88666584]))

In [61]:
evaluator.rec.users[20]['known_items']

{5,
 12,
 14,
 15,
 17,
 18,
 19,
 26,
 36,
 39,
 47,
 55,
 62,
 71,
 90,
 97,
 101,
 104,
 105,
 111,
 120,
 123,
 124,
 128,
 130,
 139,
 149,
 162,
 169,
 181,
 185,
 187,
 202,
 207,
 213,
 221,
 224,
 227,
 291,
 304,
 312,
 318,
 340,
 352,
 356,
 379,
 386,
 396,
 397,
 480,
 483,
 509,
 536,
 555,
 557,
 600,
 640,
 681,
 696,
 713,
 728,
 756,
 762,
 770,
 793,
 812,
 824,
 827,
 977,
 982,
 1018,
 1044,
 1196,
 1202,
 1216,
 1247,
 1336,
 1368,
 1371,
 1412,
 1424,
 1740,
 2120,
 2142,
 2275,
 2284,
 2494,
 2583,
 2710,
 2745,
 2798,
 3003,
 3230,
 3328,
 3483,
 3496,
 3596,
 3734,
 3741,
 3757,
 3758,
 3780,
 3809,
 3812,
 3821,
 3895,
 3909,
 3912,
 3945,
 3951,
 3961,
 4085,
 4207,
 4242,
 4249,
 4268,
 4271,
 4344,
 4352,
 4456,
 4502,
 4503,
 4572,
 4575,
 4636,
 4757,
 4763,
 4764,
 4879,
 4938,
 4947,
 5092,
 5155,
 5297,
 5306,
 5353}

In [65]:
set(evaluator.item_buffer)

{0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [114]:
len(evaluator.rec.items)

7287

In [112]:
from datetime import datetime, timedelta
import random
user_id =  random.randint(1000000,2000000)
last = pickle.load(open('last.pckl', 'rb'))
chosen = ['1', '2']
tfidfs = pickle.load(open('movies.pkl', 'rb'))
# 70% incremental evaluation and updating
logging.info('incrementally predict, evaluate and update the recommender')

items = []
user = User(len(evaluator.rec.users), np.zeros(0))

if evaluator.rec.is_new_user(user.index):
    evaluator.rec.register_user(user)

for item_id in chosen:
    index = list(tfidfs).index(int(item_id))
    item = Item(index, tfidfs[int(item_id)])
    if evaluator.rec.is_new_item(item.index):
        evaluator.rec.register_item(item)
    items.append(item)

events = []

# Calculate time of the week
date = datetime.now()
weekday_vec = np.zeros(7)
weekday_vec[date.weekday()] = 1

if user_id in last:
    last_item_vec = last[user_id]['item']
    last_weekday_vec = last[user_id]['weekday']
else:
    last_item_vec = np.zeros(49)
    last_weekday_vec = np.zeros(7)
    
for item in items:
    others = np.concatenate((weekday_vec, last_item_vec, last_weekday_vec))
    events.append(Event(user, item, 1, others))
    last[user_id] = {'item': item.feature, 'weekday': weekday_vec}

res = evaluator.evaluate(events)
candidates = list(set(evaluator.item_buffer))

In [105]:
len(last['2']['weekday'])

7

In [106]:
len(weekday_vec)

7

In [113]:
for e in events:
    evaluator.rec.update(e)

In [115]:
evaluator.rec.recommend(user, np.array(list(set(evaluator.item_buffer))), [0 for x in range(0, 63)])

(array([2933, 6592, 5678, ..., 7075, 4975, 6099]),
 array([0.67191387, 0.69494213, 0.69516691, ..., 0.88462505, 0.88507318,
        0.88601459]))