In [108]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
import pandas as pd
import pickle as pkl

sys.path.append('..')
from recommenders.matrix_based.recommenders import recommend_bpr_user_index, recommend_transe_user_index, recommend_rotate_user_index
import numpy as np
from evaluation_metrics import ndcg, calculate_precision_at_k, calculate_mrr, calculate_custom_precision_at_k, calculate_average_ndcg_at_k, calculate_average_precision_at_k, calculate_average_mrr, calculate_average_custom_precision_at_k

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
base_data_path = Path('../../datasets/GCF')
n_users = 6863

In [3]:
test_df = pd.read_feather(base_data_path / 'test.feather')

In [4]:
test_df.head()

Unnamed: 0,user_id,item_id,rating
0,7273650.161191158,powerpoint2013,1
1,38700632.16119125,wordxp,1
2,23529408.161176164,googlespreadsheets,1
3,23529408.161176164,googleaccount,1
4,23529408.161176164,powerpoint2016,1


In [5]:
test_df_courses = test_df.groupby('user_id', as_index=False).agg(list)

In [6]:
with open(base_data_path / 'entity2id.pkl', 'rb') as f:
    entity2id = pkl.load(f)

In [7]:
bpr_user_matrix = np.load(base_data_path / 'bpr_user_matrix_128.npy')
bpr_item_matrix = np.load(base_data_path / 'bpr_item_matrix_128.npy')

In [30]:
transe_node_embeddings = np.load(base_data_path / 'node_embeddings_gcf_128_transe.npy')
transe_relation_embeddings = np.load(base_data_path / 'relation_embeddings_gcf_128_transe.npy')

In [31]:
rotate_node_embeddings = np.load(base_data_path / 'node_embeddings_gcf_128_rotate.npy')
rotate_relation_embeddings = np.load(base_data_path / 'relation_embeddings_gcf_128_rotate.npy')

In [8]:
test_df_courses['user_id'] = test_df_courses['user_id'].map(entity2id)

In [9]:
test_df_courses['item_id'] = test_df_courses['item_id'].apply(lambda x: [entity2id[i] - n_users for i in x])

In [25]:
def get_recommendations_bpr(x):
    return recommend_bpr_user_index(bpr_user_matrix, bpr_item_matrix, 20, x)

In [32]:
def get_recommendations_transe(x):
    return recommend_transe_user_index(transe_node_embeddings, transe_relation_embeddings[-1, :], 20, x)

In [56]:
def get_recommendations_rotate(x):
    return recommend_rotate_user_index(rotate_node_embeddings, rotate_relation_embeddings[-1, :], 20, x)

In [12]:
def calculate_average_ndcg_df(x, k=5):
    return ndcg(x['predicted'], x['item_id'], k=k)

In [13]:
def calculate_average_mrr_df(x):
    return calculate_mrr(x['item_id'], x['predicted'])

In [14]:
def calculate_precision_df(x, k=5):
    return calculate_precision_at_k(x['item_id'], x['predicted'], k=k)

In [68]:
def calculate_custom_precision_df(x, k=5):
    return calculate_custom_precision_at_k(x['item_id'], x['predicted'], k=k)

# BPR Measurement

In [104]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_bpr)

In [105]:
ground_truth = test_df_courses['item_id'].tolist()

In [106]:
predicted = test_df_courses['predicted'].tolist()

In [107]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.20124410257829725

In [113]:
calculate_average_mrr(ground_truth, predicted)

0.14719306878852864

In [110]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.20124410257829725

In [114]:
calculate_average_custom_precision_at_k(ground_truth, predicted, 5)

0.6071858759531785

In [75]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [76]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [77]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [78]:
test_df_courses['custom_precision_at_5'] = test_df_courses.apply(calculate_custom_precision_df, axis=1)

In [79]:
test_df_courses['ndcg_at_5'].mean()

0.2012441025782957

In [80]:
test_df_courses['mrr'].mean()

0.14719306878852798

In [81]:
test_df_courses['precision_at_5'].mean()

0.19833891884015742

In [82]:
test_df_courses['custom_precision_at_5'].mean()

0.6071858759531789

# TransE

In [115]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_transe)

In [116]:
ground_truth = test_df_courses['item_id'].tolist()

In [117]:
predicted = test_df_courses['predicted'].tolist()

In [118]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.01587847033377773

In [119]:
calculate_average_mrr(ground_truth, predicted)

0.026193128266863316

In [120]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.01587847033377773

In [121]:
calculate_average_custom_precision_at_k(ground_truth, predicted, 5)

0.14254941959298703

In [84]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [85]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [86]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [87]:
test_df_courses['custom_precision_at_5'] = test_df_courses.apply(calculate_custom_precision_df, axis=1)

In [52]:
test_df_courses['ndcg_at_5'].mean()

0.015878470333777747

In [53]:
test_df_courses['mrr'].mean()

0.026193128266863274

In [54]:
test_df_courses['precision_at_5'].mean()

0.04720967506921171

In [88]:
test_df_courses['custom_precision_at_5'].mean()

0.14254941959298653

# RotatE

In [122]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_rotate)

In [123]:
ground_truth = test_df_courses['item_id'].tolist()

In [124]:
predicted = test_df_courses['predicted'].tolist()

In [125]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.029289252585624145

In [126]:
calculate_average_mrr(ground_truth, predicted)

0.031859107622635656

In [127]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.029289252585624145

In [128]:
calculate_average_custom_precision_at_k(ground_truth, predicted, 5)

0.11166399533731634

In [90]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [91]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [92]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [93]:
test_df_courses['custom_precision_at_5'] = test_df_courses.apply(calculate_custom_precision_df, axis=1)

In [94]:
test_df_courses['ndcg_at_5'].mean()

0.02928925258562421

In [95]:
test_df_courses['mrr'].mean()

0.031859107622635656

In [96]:
test_df_courses['precision_at_5'].mean()

0.03794259070377386

In [97]:
test_df_courses['custom_precision_at_5'].mean()

0.11166399533731602