In [1]:
%%capture
!pip install --no-cache-dir --upgrade git+https://github.com/evfro/polara.git@develop#egg=polara

In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import polara
from polara import get_movielens_data
from polara.preprocessing.dataframes import leave_one_out, reindex

from dataprep import transform_indices
from evaluation import topn_recommendations, model_evaluate, downvote_seen_items

from polara.lib.tensor import hooi
from polara.lib.sparse import tensor_outer_at

from sa_hooi import sa_hooi, form_attention_matrix, get_scaling_weights, generate_position_projector

from scipy.sparse import csr_matrix

In [2]:
def full_preproccessing():
    data = get_movielens_data(include_time=True)
    test_timepoint = data['timestamp'].quantile(
    q=0.9, interpolation='nearest'
    )

    test_data_ = data.query('timestamp >= @test_timepoint')
    train_data_ = data.query(
    'userid not in @test_data_.userid.unique() and timestamp < @test_timepoint'
    )
    
    training, data_index = transform_indices(train_data_.copy(), 'userid', 'movieid')
    test_data = reindex(test_data_, data_index['items'])

    testset_, holdout_ = leave_one_out(
    test_data, target='timestamp', sample_top=True, random_state=0
    )
    testset_valid_, holdout_valid_ = leave_one_out(
        testset_, target='timestamp', sample_top=True, random_state=0
    )

    test_users_val = np.intersect1d(testset_valid_.userid.unique(), holdout_valid_.userid.unique())
    testset_valid = testset_valid_.query('userid in @test_users_val').sort_values('userid')
    holdout_valid = holdout_valid_.query('userid in @test_users_val').sort_values('userid')

    test_users = np.intersect1d(testset_valid_.userid.unique(), holdout_valid_.userid.unique())
    testset = testset_.query('userid in @test_users').sort_values('userid')
    holdout = holdout_.query('userid in @test_users').sort_values('userid')


    assert holdout_valid.set_index('userid')['timestamp'].ge(
        testset_valid
        .groupby('userid')
        ['timestamp'].max()
    ).all()

    data_description = dict(
        users = data_index['users'].name,
        items = data_index['items'].name,
        feedback = 'rating',
        n_users = len(data_index['users']),
        n_items = len(data_index['items']),
        n_ratings = training['rating'].nunique(),
        min_rating = training['rating'].min()
        #test_users_val = holdout_valid[data_index['users'].name].drop_duplicates().values,
        #test_users_test = holdout[data_index['users'].name].drop_duplicates().values
    )

    return training, testset_valid, holdout_valid, testset, holdout, data_description

In [3]:
training, testset_valid, holdout_valid, testset, holdout, data_description = full_preproccessing()

Filtered 113 invalid observations.


In [4]:
from IPython.utils import io

def tf_model_build(config, data, data_description, attention_matrix=np.array([]), exp_decay = False, decay=1):
    userid = data_description["users"]
    itemid = data_description["items"]
    feedback = data_description["feedback"]

    idx = data[[userid, itemid, feedback]].values
    idx[:, -1] = idx[:, -1] - data_description['min_rating'] # works only for integer ratings!
    val = np.ones(idx.shape[0], dtype='f8')
    
    n_users = data_description["n_users"]
    n_items = data_description["n_items"]
    n_ratings = data_description["n_ratings"]
    shape = (n_users, n_items, n_ratings)
    core_shape = config['mlrank']
    num_iters = config["num_iters"]
    
    if (len(attention_matrix) == 0):
        attention_matrix = form_attention_matrix(
            data_description['n_ratings'],
            #config["attention_decay"],
            format = 'csr',
            decay_factor=decay,
            exponential_decay=exp_decay
        )

    item_popularity = (
        data[itemid]
        .value_counts(sort=False)
        .reindex(range(n_items))
        .fillna(1)
        .values
    )
    scaling_weights = get_scaling_weights(item_popularity, scaling=config["scaling"])

    with io.capture_output() as captured:
        u0, u1, u2 = sa_hooi(
            idx, val, shape, config["mlrank"],
            attention_matrix = attention_matrix,
            scaling_weights = scaling_weights,
            max_iters = config["num_iters"],
            parallel_ttm = False,
            randomized = config["randomized"],
            growth_tol = config["growth_tol"],
            seed = config["seed"],
            iter_callback = None,
        )
    
    return u0, u1, u2, attention_matrix
        

config = {
    "scaling": 1,
    "mlrank": (30, 30, 5),
    "n_ratings": data_description['n_ratings'],
    "num_iters": 5,
    "attention_decay": 1,
    "randomized": True,
    "growth_tol": 1e-4,
    "seed": 42
}


def tf_scoring(params, data, data_description, context=["3+4+5"]):
    user_factors, item_factors, feedback_factors, attention_matrix = params
    userid = data_description["users"]
    itemid = data_description["items"]
    feedback = data_description["feedback"]

    data = data.sort_values(userid)
    useridx = data[userid]
    
    n_users = useridx.nunique()
    n_items = data_description['n_items']
    n_ratings = data_description['n_ratings']
    
    scores = np.zeros((n_users, n_items))
    inv_attention = np.linalg.inv(attention_matrix.A)
    for i, u in tqdm(enumerate(np.unique(useridx))):
        data_u = data[data.userid==u]
        P = csr_matrix((np.ones(data_u.shape[0]), (data_u[itemid].values, data_u[feedback].values - data_description['min_rating'])), (n_items, n_ratings))
        res = item_factors @ (item_factors.T @ (P @ (attention_matrix @ (feedback_factors @ (inv_attention.T @ feedback_factors).T))))
        if (context == "5"):
            scores[i] = np.sum(res[:, -1:], axis=1)
        elif (context == "4+5"):
            scores[i] = np.sum(res[:, -2:], axis=1)
        elif (context == "3+4+5"):
            scores[i] = np.sum(res[:, -3:], axis=1)
        elif (context == "2+3+4+5"):
            scores[i] = np.sum(res[:, -4:], axis=1)
        elif (context == "3+4+5-2-1"):
            scores[i] = np.sum(res[:, 2:], axis=1) - np.sum(res[:, :2], axis=1)
        
    return scores

def make_prediction(tf_scores, holdout, data_description, mode, context=""):
    if (mode):
        print(f"for context {context} evaluation:")
    for n in [5, 10, 20]:
        tf_recs = topn_recommendations(tf_scores, n)
        hr, mrr, cov = model_evaluate(tf_recs, holdout, data_description, topn=n)
        print(f"{mode} : HR@{n} = {hr:.4f}, MRR@{n} == {mrr:.4f}, Coverage@{n} = {cov:.4f}")
        if (n == 10):
            mrr10 = mrr
    return mrr10

def valid_mlrank(mlrank):
    '''
    Only allow ranks that are suitable for truncated SVD computations
    on unfolded compressed tensor (the result of ttm product in HOOI).
    '''
    r1, r2, r3 = mlrank
    return r1*r2 > r3 and r1*r3 > r2 and r2*r3 > r1

In [9]:
from tqdm import tqdm 
from polara.evaluation.pipelines import random_grid

def full_pipeline(config, training, data_description, testset_valid, holdout_valid, testset, holdout, attention_matrix, exp_decay, decay):
    
    config["mlrank"] = (30, 30, 5)
    
    print("Starting pipeline...")
    print("Training with different context in progress...")
    print("------------------------------------------------------")
    best_mrr_context = "3+4+5"
    best_mrr = 0.0
    for context in ["5", "4+5", "3+4+5", "2+3+4+5", "3+4+5-2-1"]:
        tf_params = tf_model_build(config, training, data_description, attention_matrix=attention_matrix, exp_decay = exp_decay, decay=decay)
        seen_data = testset_valid
        tf_scores = tf_scoring(tf_params, seen_data, data_description, context)
        downvote_seen_items(tf_scores, seen_data, data_description)
        cur_mrr = make_prediction(tf_scores, holdout_valid, data_description, "Validation", context)
        print("------------------------------------------------------")
        if (cur_mrr > best_mrr):
            best_mrr = cur_mrr
            best_mrr_context = context

    print(f"Tuning model with the best context influence on MRR@10 (context is {best_mrr_context})...")

    tf_hyper = {
    'r1': range(30, 56, 5),
    'r2': range(30, 56, 5),
    'r3': range(5, 6, 5),
    }

    grid, param_names = random_grid(tf_hyper, n=0)
    tf_grid = [tuple(mlrank) for mlrank in grid if valid_mlrank(mlrank)]

    hr_tf = {}
    mrr_tf = {}
    cov_tf = {}
    for mlrank in tqdm(tf_grid):
        with io.capture_output() as captured:
            config['mlrank'] = mlrank
            tf_params = tf_model_build(config, training, data_description, attention_matrix=attention_matrix, exp_decay = exp_decay, decay=decay)
            tf_scores = tf_scoring(tf_params, seen_data, data_description, best_mrr_context)
            downvote_seen_items(tf_scores, seen_data, data_description)
            tf_recs = topn_recommendations(tf_scores, topn=10)
            hr, mrr, cov = model_evaluate(tf_recs, holdout_valid, data_description, topn=10)
            hr_tf[mlrank] = hr
            mrr_tf[mlrank] = mrr
            cov_tf[mlrank] = cov

    print(f'Best HR={pd.Series(hr_tf).max():.4f} achieved with mlrank={pd.Series(hr_tf).idxmax()}')
    print(f'Best MRR={pd.Series(mrr_tf).max():.4f} achieved with mlrank={pd.Series(mrr_tf).idxmax()}')
    print(f'COV={pd.Series(cov_tf)[pd.Series(mrr_tf).idxmax()]:.4f} (based on best HR value)')
    print("---------------------------------------------------------")
    print("Evaluation of the best model on test holdout in progress...")
    
    config["mlrank"] = pd.Series(mrr_tf).idxmax()
    tf_params = tf_model_build(config, training, data_description, attention_matrix=attention_matrix, exp_decay = exp_decay, decay=decay)

    seen_data = testset
    tf_scores = tf_scoring(tf_params, seen_data, data_description, best_mrr_context)
    downvote_seen_items(tf_scores, seen_data, data_description)
    cur_mrr = make_prediction(tf_scores, holdout, data_description, "Test", best_mrr_context)
    print("Pipeline ended.")

## Linear attention, decay factor = 1

In [10]:
full_pipeline(config, training, data_description, testset_valid, holdout_valid, testset, holdout, attention_matrix=np.array([]), exp_decay=False, decay=1)

Starting pipeline...
Training with different context in progress...
------------------------------------------------------


1137it [00:00, 1323.06it/s]


for context 5 evaluation:
Validation : HR@5 = 0.0281, MRR@5 == 0.0170, Coverage@5 = 0.1073
Validation : HR@10 = 0.0484, MRR@10 == 0.0197, Coverage@10 = 0.1513
Validation : HR@20 = 0.0906, MRR@20 == 0.0226, Coverage@20 = 0.2132
------------------------------------------------------


1137it [00:00, 1217.18it/s]


for context 4+5 evaluation:
Validation : HR@5 = 0.0281, MRR@5 == 0.0181, Coverage@5 = 0.1062
Validation : HR@10 = 0.0598, MRR@10 == 0.0220, Coverage@10 = 0.1488
Validation : HR@20 = 0.1108, MRR@20 == 0.0257, Coverage@20 = 0.2036
------------------------------------------------------


1137it [00:01, 1115.75it/s]


for context 3+4+5 evaluation:
Validation : HR@5 = 0.0317, MRR@5 == 0.0184, Coverage@5 = 0.1136
Validation : HR@10 = 0.0633, MRR@10 == 0.0225, Coverage@10 = 0.1543
Validation : HR@20 = 0.1170, MRR@20 == 0.0261, Coverage@20 = 0.2094
------------------------------------------------------


1137it [00:01, 1128.29it/s]


for context 2+3+4+5 evaluation:
Validation : HR@5 = 0.0281, MRR@5 == 0.0180, Coverage@5 = 0.1172
Validation : HR@10 = 0.0633, MRR@10 == 0.0227, Coverage@10 = 0.1574
Validation : HR@20 = 0.1161, MRR@20 == 0.0263, Coverage@20 = 0.2151
------------------------------------------------------


1137it [00:01, 1069.89it/s]


for context 3+4+5-2-1 evaluation:
Validation : HR@5 = 0.0264, MRR@5 == 0.0166, Coverage@5 = 0.1128
Validation : HR@10 = 0.0554, MRR@10 == 0.0204, Coverage@10 = 0.1527
Validation : HR@20 = 0.1020, MRR@20 == 0.0236, Coverage@20 = 0.2105
------------------------------------------------------
Tuning model with the best context influence on MRR@10 (context is 2+3+4+5)...


100%|██████████| 36/36 [03:30<00:00,  5.86s/it]


Best HR=0.0792 achieved with mlrank=(55, 50, 5)
Best MRR=0.0288 achieved with mlrank=(55, 45, 5)
COV=0.1763 (based on best HR value)
---------------------------------------------------------
Evaluation of the best model on test holdout in progress...


1137it [00:01, 1013.29it/s]


for context 2+3+4+5 evaluation:
Test : HR@5 = 0.0299, MRR@5 == 0.0134, Coverage@5 = 0.1307
Test : HR@10 = 0.0563, MRR@10 == 0.0167, Coverage@10 = 0.1755
Test : HR@20 = 0.0967, MRR@20 == 0.0196, Coverage@20 = 0.2426
Pipeline ended.


## Exponential attention, decay factor = 1

In [11]:
config

{'scaling': 1,
 'mlrank': (55, 45, 5),
 'n_ratings': 5,
 'num_iters': 5,
 'attention_decay': 1,
 'randomized': True,
 'growth_tol': 0.0001,
 'seed': 42}

In [12]:
full_pipeline(config, training, data_description, testset_valid, holdout_valid, testset, holdout, attention_matrix=np.array([]), exp_decay=True, decay=1)

Starting pipeline...
Training with different context in progress...
------------------------------------------------------


1137it [00:01, 1088.54it/s]


for context 5 evaluation:
Validation : HR@5 = 0.0325, MRR@5 == 0.0180, Coverage@5 = 0.1191
Validation : HR@10 = 0.0510, MRR@10 == 0.0203, Coverage@10 = 0.1739
Validation : HR@20 = 0.0932, MRR@20 == 0.0231, Coverage@20 = 0.2297
------------------------------------------------------


1137it [00:01, 1099.97it/s]


for context 4+5 evaluation:
Validation : HR@5 = 0.0325, MRR@5 == 0.0194, Coverage@5 = 0.1150
Validation : HR@10 = 0.0642, MRR@10 == 0.0236, Coverage@10 = 0.1659
Validation : HR@20 = 0.1152, MRR@20 == 0.0271, Coverage@20 = 0.2237
------------------------------------------------------


1137it [00:01, 1136.42it/s]


for context 3+4+5 evaluation:
Validation : HR@5 = 0.0361, MRR@5 == 0.0215, Coverage@5 = 0.1230
Validation : HR@10 = 0.0712, MRR@10 == 0.0260, Coverage@10 = 0.1678
Validation : HR@20 = 0.1196, MRR@20 == 0.0292, Coverage@20 = 0.2278
------------------------------------------------------


1137it [00:01, 1049.69it/s]


for context 2+3+4+5 evaluation:
Validation : HR@5 = 0.0343, MRR@5 == 0.0215, Coverage@5 = 0.1285
Validation : HR@10 = 0.0739, MRR@10 == 0.0265, Coverage@10 = 0.1733
Validation : HR@20 = 0.1223, MRR@20 == 0.0298, Coverage@20 = 0.2380
------------------------------------------------------


1137it [00:01, 1037.08it/s]


for context 3+4+5-2-1 evaluation:
Validation : HR@5 = 0.0317, MRR@5 == 0.0188, Coverage@5 = 0.1180
Validation : HR@10 = 0.0624, MRR@10 == 0.0228, Coverage@10 = 0.1662
Validation : HR@20 = 0.1073, MRR@20 == 0.0258, Coverage@20 = 0.2239
------------------------------------------------------
Tuning model with the best context influence on MRR@10 (context is 2+3+4+5)...


100%|██████████| 36/36 [03:43<00:00,  6.21s/it]


Best HR=0.0739 achieved with mlrank=(55, 45, 5)
Best MRR=0.0265 achieved with mlrank=(55, 45, 5)
COV=0.1733 (based on best HR value)
---------------------------------------------------------
Evaluation of the best model on test holdout in progress...


1137it [00:01, 1087.02it/s]


for context 2+3+4+5 evaluation:
Test : HR@5 = 0.0237, MRR@5 == 0.0121, Coverage@5 = 0.1298
Test : HR@10 = 0.0528, MRR@10 == 0.0160, Coverage@10 = 0.1700
Test : HR@20 = 0.0959, MRR@20 == 0.0189, Coverage@20 = 0.2369
Pipeline ended.


## Eucledian distance attention

In [None]:
eucl_matrix = np.zeros((5, 5))

for i in range(5):
    for j in range(5):
        eucl_matrix[i, j] = abs(i - j) / np.exp(abs(i - j)) if i != j else 5 + 1e-2
        
a = np.linalg.cholesky(eucl_matrix)

for i in range(5):
    a[i, i] = 1e-5

attention_matrix = csr_matrix(a)

In [None]:
full_pipeline(config, training, data_description, testset_valid, holdout_valid, testset, holdout, attention_matrix=np.array([]), exp_decay=True, decay=1)

## Rating distribution attention