In [None]:
import numpy as np
import pandas as pd

from polara import get_movielens_data
from polara.preprocessing.dataframes import leave_one_out, reindex

from dataprep import transform_indices
from evaluation import topn_recommendations, model_evaluate, downvote_seen_items

from polara.lib.tensor import hooi
from polara.lib.sparse import tensor_outer_at

# Data preparation

In [None]:
data = get_movielens_data(include_time=True)

In [None]:
training_, holdout_ = leave_one_out(
    data, target='timestamp', sample_top=True, random_state=0
)

assert holdout_.set_index('userid')['timestamp'].ge(
    training_
    .groupby('userid')
    ['timestamp'].max()
).all()

In [None]:
training, data_index = transform_indices(training_, 'userid', 'movieid')
holdout = reindex(holdout_, data_index.values(), filter_invalid=True)
holdout = holdout.sort_values('userid')

In [None]:
data_description = dict(
    users = data_index['users'].name,
    items = data_index['items'].name,
    feedback = 'rating',
    n_users = len(data_index['users']),
    n_items = len(data_index['items']),
    n_ratings = training['rating'].nunique(),
    min_rating = training['rating'].min(),
    test_users = holdout[data_index['users'].name].drop_duplicates().values
)

In [None]:
def tf_model_build(config, data, data_description):
    userid = data_description["users"]
    itemid = data_description["items"]
    feedback = data_description["feedback"]

    idx = data[[userid, itemid, feedback]].values
    idx[:, -1] = idx[:, -1] - data_description['min_rating'] # works only for integer ratings!
    val = np.ones(idx.shape[0], dtype='f8')
    
    n_users = data_description["n_users"]
    n_items = data_description["n_items"]
    n_ratings = data_description["n_ratings"]
    shape = (n_users, n_items, n_ratings)
    core_shape = config['mlrank']
    num_iters = config["num_iters"]
    
    u0, u1, u2, g = hooi(
        idx, val, shape, core_shape,
        return_core=False, num_iters=num_iters,
        parallel_ttm=False, growth_tol=0.01,
    )
    return u0, u1, u2
        

In [None]:
config = {
    'mlrank': (10, 10, 2),
    "num_iters": 5,
}

In [None]:
tf_params = tf_model_build(config, training, data_description)

In [None]:
tf_params[-1]

In [None]:
userid = data_description['users']
seen_data = training.query(f'{userid} in @data_description["test_users"]')

In [None]:
def tf_scoring(params, data, data_description):
    user_factors, item_factors, feedback_factors = params
    userid = data_description["users"]
    itemid = data_description["items"]
    feedback = data_description["feedback"]

    data = data.sort_values(userid)
    useridx = data[userid].values
    itemidx = data[itemid].values
    ratings = data[feedback].values
    ratings = ratings - data_description['min_rating'] # works only for integer ratings!
    
    tensor_outer = tensor_outer_at('cpu')
    # use the fact that test data is sorted by users for reduction:
    scores = tensor_outer(
        1.0,
        item_factors,
        feedback_factors,
        itemidx,
        ratings
    )
    scores = np.add.reduceat(scores, np.r_[0, np.where(np.diff(useridx))[0]+1])
    scores = np.tensordot(
        scores,
        feedback_factors[-1, :],
        axes=(2, 0)
    ).dot(item_factors.T)
    return scores

In [None]:
tf_scores = tf_scoring(tf_params, seen_data, data_description)

In [None]:
downvote_seen_items(tf_scores, seen_data, data_description)

In [None]:
tf_recs = topn_recommendations(tf_scores, topn=10)
model_evaluate(tf_recs, holdout, data_description)

In [None]:
tf_recs = topn_recommendations(tf_scores, topn=10)
model_evaluate(tf_recs, holdout, data_description)

In [None]:
tf_recs = topn_recommendations(tf_scores, topn=10)
model_evaluate(tf_recs, holdout, data_description)