In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
import pandas as pd
import pickle as pkl

sys.path.append('..')
from recommenders.matrix_based.recommenders import recommend_bpr_user_index, recommend_transe_user_index, recommend_rotate_user_index
import numpy as np
from evaluation_metrics import ndcg, calculate_precision_at_k, calculate_mrr, calculate_custom_precision_at_k, calculate_average_ndcg_at_k, calculate_average_precision_at_k, calculate_average_mrr, calculate_average_custom_precision_at_k

In [2]:
base_data_path = Path('../../datasets/GCF')
n_users = 6863

In [3]:
test_df = pd.read_feather(base_data_path / 'test.feather')

In [4]:
test_df.head()

Unnamed: 0,user_id,item_id,rating
0,7273650.161191158,powerpoint2013,1
1,38700632.16119125,wordxp,1
2,23529408.161176164,googlespreadsheets,1
3,23529408.161176164,googleaccount,1
4,23529408.161176164,powerpoint2016,1


In [5]:
test_df_courses = test_df.groupby('user_id', as_index=False).agg(list)

In [6]:
with open(base_data_path / 'entity2id.pkl', 'rb') as f:
    entity2id = pkl.load(f)

In [7]:
bpr_user_matrix = np.load(base_data_path / 'embeddings/bpr_user_matrix_128.npy')
bpr_item_matrix = np.load(base_data_path / 'embeddings/bpr_item_matrix_128.npy')

In [8]:
transe_node_embeddings = np.load(base_data_path / 'embeddings/node_embeddings_gcf_128_transe.npy')
transe_relation_embeddings = np.load(base_data_path / 'embeddings/relation_embeddings_gcf_128_transe.npy')

In [9]:
rotate_node_embeddings = np.load(base_data_path / 'embeddings/node_embeddings_gcf_128_rotate.npy')
rotate_relation_embeddings = np.load(base_data_path / 'embeddings/relation_embeddings_gcf_128_rotate.npy')

In [10]:
test_df_courses['user_id'] = test_df_courses['user_id'].map(entity2id)

In [11]:
test_df_courses

Unnamed: 0,user_id,item_id,rating
0,3850,"[mobile-device-tips, typing]","[1, 1]"
1,4552,[computerbasics],[1]
2,4022,[digital-media-literacy],[1]
3,1349,"[gcfteacherguides, email101]","[1, 1]"
4,710,[googledocuments],[1]
...,...,...,...
6858,508,"[word, word2016]","[1, 1]"
6859,5971,"[buying-computers, computer-science]","[1, 1]"
6860,5890,[excel2016],[1]
6861,370,"[useinformationcorrectly, internetsafetyforkids]","[1, 1]"


In [12]:
test_df_courses['item_id'] = test_df_courses['item_id'].apply(lambda x: [entity2id[i] - n_users for i in x])

In [13]:
def get_recommendations_bpr(x):
    return recommend_bpr_user_index(bpr_user_matrix, bpr_item_matrix, 20, x)

In [14]:
def get_recommendations_transe(x):
    return recommend_transe_user_index(transe_node_embeddings, transe_relation_embeddings[-1, :], 20, x)

In [15]:
def get_recommendations_rotate(x):
    return recommend_rotate_user_index(rotate_node_embeddings, rotate_relation_embeddings[-1, :], 20, x)

In [16]:
def calculate_average_ndcg_df(x, k=5):
    return ndcg(x['predicted'], x['item_id'], k=k)

In [17]:
def calculate_average_mrr_df(x):
    return calculate_mrr(x['item_id'], x['predicted'])

In [18]:
def calculate_precision_df(x, k=5):
    return calculate_precision_at_k(x['item_id'], x['predicted'], k=k)

In [19]:
def calculate_custom_precision_df(x, k=5):
    return calculate_custom_precision_at_k(x['item_id'], x['predicted'], k=k)

# BPR Measurement

In [20]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_bpr)

In [21]:
test_df_courses

Unnamed: 0,user_id,item_id,rating,predicted
0,3850,"[45, 33]","[1, 1]","[37, 33, 2, 9, 39, 36, 43, 30, 84, 67, 44, 35,..."
1,4552,[9],[1],"[9, 37, 39, 2, 7, 36, 43, 8, 12, 31, 33, 13, 3..."
2,4022,[22],[1],"[8, 13, 7, 32, 12, 31, 9, 1, 3, 23, 0, 39, 54,..."
3,1349,"[38, 30]","[1, 1]","[104, 187, 113, 150, 189, 178, 184, 144, 188, ..."
4,710,[54],[1],"[13, 7, 8, 12, 9, 1, 54, 32, 31, 2, 39, 0, 74,..."
...,...,...,...,...
6858,508,"[7, 8]","[1, 1]","[35, 128, 116, 68, 21, 84, 22, 101, 19, 30, 45..."
6859,5971,"[140, 62]","[1, 1]","[9, 7, 8, 33, 12, 2, 13, 30, 43, 39, 36, 31, 5..."
6860,5890,[13],[1],"[7, 8, 13, 9, 12, 32, 31, 39, 2, 1, 43, 54, 23..."
6861,370,"[53, 20]","[1, 1]","[21, 84, 30, 35, 33, 77, 36, 2, 54, 40, 128, 3..."


In [22]:
ground_truth = test_df_courses['item_id'].tolist()

In [23]:
predicted = test_df_courses['predicted'].tolist()

In [24]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.20124410257829725

In [25]:
calculate_average_mrr(ground_truth, predicted)

0.14719306878852864

In [26]:
calculate_average_custom_precision_at_k(ground_truth, predicted, 5)

0.2418184467434066

In [27]:
calculate_average_precision_at_k(ground_truth, predicted, 5)

0.19833891884017188

In [28]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [29]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [30]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [31]:
test_df_courses['custom_precision_at_5'] = test_df_courses.apply(calculate_custom_precision_df, axis=1)

In [32]:
test_df_courses['ndcg_at_5'].mean()

np.float64(0.2012441025782957)

In [33]:
test_df_courses['mrr'].mean()

np.float64(0.14719306878852798)

In [34]:
test_df_courses['precision_at_5'].mean()

np.float64(0.19833891884015742)

In [35]:
test_df_courses['custom_precision_at_5'].mean()

np.float64(0.24181844674340666)

# TransE

In [36]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_transe)

In [37]:
ground_truth = test_df_courses['item_id'].tolist()

In [38]:
predicted = test_df_courses['predicted'].tolist()

In [39]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.01587847033377773

In [40]:
calculate_average_mrr(ground_truth, predicted)

0.026193128266863316

In [41]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.01587847033377773

In [42]:
calculate_average_custom_precision_at_k(ground_truth, predicted, 5)

0.01942056437903735

In [43]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [44]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [45]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [46]:
test_df_courses['custom_precision_at_5'] = test_df_courses.apply(calculate_custom_precision_df, axis=1)

In [47]:
test_df_courses['ndcg_at_5'].mean()

np.float64(0.015878470333777747)

In [48]:
test_df_courses['mrr'].mean()

np.float64(0.026193128266863274)

In [49]:
test_df_courses['precision_at_5'].mean()

np.float64(0.04720967506921171)

In [50]:
test_df_courses['custom_precision_at_5'].mean()

np.float64(0.01942056437903735)

# RotatE

In [51]:
test_df_courses['predicted'] = test_df_courses['user_id'].apply(get_recommendations_rotate)

In [52]:
ground_truth = test_df_courses['item_id'].tolist()

In [53]:
predicted = test_df_courses['predicted'].tolist()

In [54]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.029289252585624145

In [55]:
calculate_average_mrr(ground_truth, predicted)

0.031859107622635656

In [56]:
calculate_average_ndcg_at_k(ground_truth, predicted, 5)

0.029289252585624145

In [57]:
calculate_average_custom_precision_at_k(ground_truth, predicted, 5)

0.02895478167953763

In [58]:
test_df_courses['ndcg_at_5'] = test_df_courses.apply(calculate_average_ndcg_df, axis=1)

In [59]:
test_df_courses['mrr'] = test_df_courses.apply(calculate_average_mrr_df, axis=1)

In [60]:
test_df_courses['precision_at_5'] = test_df_courses.apply(calculate_precision_df, axis=1)

In [61]:
test_df_courses['custom_precision_at_5'] = test_df_courses.apply(calculate_custom_precision_df, axis=1)

In [62]:
test_df_courses['ndcg_at_5'].mean()

np.float64(0.02928925258562421)

In [63]:
test_df_courses['mrr'].mean()

np.float64(0.031859107622635656)

In [64]:
test_df_courses['precision_at_5'].mean()

np.float64(0.03794259070377386)

In [65]:
test_df_courses['custom_precision_at_5'].mean()

np.float64(0.028954781679537616)