In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
import pandas as pd
import pickle as pkl

sys.path.append('..')
from recommenders.matrix_based.recommenders import recommend_bpr_user_index, recommend_transe_user_index, recommend_rotate_user_index
import numpy as np
from evaluation_metrics import ndcg, calculate_precision_at_k, calculate_mrr

In [2]:
base_data_path = Path('../../datasets/GCF')
n_users = 6863

In [3]:
test_df = pd.read_feather(base_data_path / 'test.feather')

In [4]:
test_df.head()

Unnamed: 0,user_id,item_id,rating
0,7273650.161191158,powerpoint2013,1
1,38700632.16119125,wordxp,1
2,23529408.161176164,googlespreadsheets,1
3,23529408.161176164,googleaccount,1
4,23529408.161176164,powerpoint2016,1


In [5]:
test_df_courses = test_df.groupby('user_id', as_index=False).agg(list)

In [6]:
with open(base_data_path / 'entity2id.pkl', 'rb') as f:
    entity2id = pkl.load(f)

In [7]:
bpr_user_matrix = np.load(base_data_path / 'bpr_user_matrix_128.npy')
bpr_item_matrix = np.load(base_data_path / 'bpr_item_matrix_128.npy')

In [30]:
transe_node_embeddings = np.load(base_data_path / 'node_embeddings_gcf_128_transe.npy')
transe_relation_embeddings = np.load(base_data_path / 'relation_embeddings_gcf_128_transe.npy')

In [31]:
rotate_node_embeddings = np.load(base_data_path / 'node_embeddings_gcf_128_rotate.npy')
rotate_relation_embeddings = np.load(base_data_path / 'relation_embeddings_gcf_128_rotate.npy')

In [8]:
test_df_courses['user_id'] = test_df_courses['user_id'].map(entity2id)

In [9]:
test_df_courses['item_id'] = test_df_courses['item_id'].apply(lambda x: [entity2id[i] - n_users for i in x])

In [25]:
def get_recommendations_bpr(x):
    return recommend_bpr_user_index(bpr_user_matrix, bpr_item_matrix, 20, x)

In [32]:
def get_recommendations_transe(x):
    return recommend_transe_user_index(transe_node_embeddings, transe_relation_embeddings[-1, :], 20, x)

In [56]:
def get_recommendations_rotate(x):
    return recommend_rotate_user_index(rotate_node_embeddings, rotate_relation_embeddings[-1, :], 20, x)

In [12]:
def calculate_average_ndcg_df(x, k=5):
    return ndcg(x['predicted'], x['item_id'], k=k)

In [13]:
def calculate_average_mrr_df(x):
    return calculate_mrr(x['item_id'], x['predicted'])

In [14]:
def calculate_precision_df(x, k=5):
    return calculate_precision_at_k(x['item_id'], x['predicted'], k=k)

# BPR Measurement

In [26]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_bpr)

In [15]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [21]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [22]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [19]:
test_df_courses['ndcg_at_5'].mean()

0.2012441025782957

In [23]:
test_df_courses['mrr'].mean()

0.14719306878852798

In [24]:
test_df_courses['precision_at_5'].mean()

0.19833891884015742

# TransE

In [48]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_transe)

In [49]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [50]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [51]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [52]:
test_df_courses['ndcg_at_5'].mean()

0.015878470333777747

In [53]:
test_df_courses['mrr'].mean()

0.026193128266863274

In [54]:
test_df_courses['precision_at_5'].mean()

0.04720967506921171

# RotatE

In [60]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_rotate)

In [61]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [62]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [63]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [64]:
test_df_courses['ndcg_at_5'].mean()

0.02928925258562421

In [65]:
test_df_courses['mrr'].mean()

0.031859107622635656

In [66]:
test_df_courses['precision_at_5'].mean()

0.03794259070377386