In [None]:
import numpy as np
from scipy.sparse import coo_matrix, diags
from scipy.sparse.linalg import norm as spnorm

from polara import get_movielens_data
from polara.preprocessing.dataframes import leave_one_out, reindex

from dataprep import transform_indices
from evaluation import topn_recommendations, model_evaluate, downvote_seen_items

# Preparing data

In [None]:
data = get_movielens_data(include_time=True)

In [None]:
training_, holdout_ = leave_one_out(data, target='timestamp', sample_top=True, random_state=0)

assert holdout_.set_index('userid')['timestamp'].ge(
    training_
    .groupby('userid')
    ['timestamp'].max()
).all()

In [None]:
training, data_index = transform_indices(training_, 'userid', 'movieid')
holdout = reindex(holdout_, data_index.values(), filter_invalid=True)
holdout = holdout.sort_values('userid')

# User-based KNN

In [None]:
def build_uknn_model(config, data, data_description):
    # get indices of observed data
    user_idx = data[data_description['users']].values
    item_idx = data[data_description['items']].values
    relscore = data[data_description['feedback']].values
    # construct rating matrix
    shape = (data_description['n_users'], data_description['n_items'])
    user_item_mtx = coo_matrix((relscore, (user_idx, item_idx)), shape=shape)
    # compute similarity matrix and normalization coefficients
    user_similarity = cosine_similarity(user_item_mtx)
    # R = K D A
    sim_weights = (
        user_similarity
        ._with_data(np.abs(user_similarity.data))
        .sum(axis=1)
        .A.squeeze()
    )
    normalizer = np.divide(1., sim_weights, where=sim_weights>0)
    return user_item_mtx.tocsr(), user_similarity, normalizer

def cosine_similarity(matrix):
    row_norm = spnorm(matrix, axis=1).squeeze()
    inv_norm = np.divide(1., row_norm, where=row_norm>0)
    matrix_normed = diags(inv_norm).dot(matrix)
    similarity = matrix_normed.dot(matrix_normed.T)
    similarity.setdiag(0)
    similarity.eliminate_zeros()
    return similarity.tocsr()

def uknn_model_scoring(params, testset, testset_description):
    user_item_mtx, user_similarity, normalizer = params
    test_users = testset_description['test_users']
    test_similarity = user_similarity[test_users, :].dot(diags(normalizer))
    scores = test_similarity.dot(user_item_mtx).A
    return scores


In [None]:
data_description = dict(
    users = data_index['users'].name,
    items = data_index['items'].name,
    feedback = 'rating',
    n_users = len(data_index['users']),
    n_items = len(data_index['items']),
    test_users = holdout[data_index['users'].name].values
)
data_description

In [None]:
uknn_params = build_uknn_model({}, training, data_description)

In [None]:
uknn_scores = uknn_model_scoring(uknn_params, None, data_description)

 ## Evaluation

In [None]:
seen_data = training.query('userid in @data_description["test_users"]')
downvote_seen_items(uknn_scores, seen_data, data_description)

In [None]:
uknn_recs = topn_recommendations(uknn_scores)

In [None]:
print('HR={:.3}, MRR={:.3}, COV={:.3}'.format(*model_evaluate(uknn_recs, holdout, data_description)))

<font color=green>

- In your opinion, how the evaluation scores will change if we sample holdout items randomly?

</font>

# Item-based KNN

In [None]:
def build_iknn_model(config, data, data_description):
    # get indices of observed data
    user_idx = data[data_description['users']].values
    item_idx = data[data_description['items']].values
    relscore = data[data_description['feedback']].values
    # construct rating matrix
    shape = (data_description['n_users'], data_description['n_items'])
    user_item_mtx = coo_matrix((relscore, (user_idx, item_idx)), shape=shape)
    # compute similarity matrix and normalization coefficients
    item_similarity = cosine_similarity(user_item_mtx.T)
    sim_weights = 
    normalizer = 
    return user_item_mtx.tocsr(), item_similarity, normalizer


def iknn_model_scoring(params, testset, testset_description):
    user_item_mtx, item_similarity, normalizer = params
    test_users = testset_description['test_users']
    test_similarity = 
    scores = 
    return scores


In [None]:
iknn_params = build_iknn_model({}, training, data_description)

In [None]:
iknn_scores = iknn_model_scoring(iknn_params, None, data_description)

 ## Evaluation

In [None]:
downvote_seen_items(iknn_scores, seen_data, data_description)

In [None]:
iknn_recs = topn_recommendations(iknn_scores)

In [None]:
print('HR={:.3}, MRR={:.3}, COV={:.3}'.format(*model_evaluate(iknn_recs, holdout, data_description)))