In [None]:
import pandas as pd                     
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

from constants import *
from author_ranker import AuthorRanker_1, AuthorRanker_2, AuthorRanker_3
from relevance import save_query_result, run_relevance_tests

In [None]:
with open(DOCID_TO_AUTHORID_PATH, 'rb') as f:
    docid_to_authorid = pickle.load(f)
    
with open(AUTHOR_COLLECTION_PATH, 'rb') as f:
    author_data = pickle.load(f)
    
with open(DOCID_TO_CITATION_PATH, 'rb') as f:
    docid_to_citation = pickle.load(f)
    
with open(ENCODED_TITLE_ARRAY_PATH, 'rb') as f:
    encoded_title_array = pickle.load(f)
    
with open(DOCID_TO_TITLE_VEC_ROWIDX_PATH, 'rb') as f:
    docid_to_rowidx = pickle.load(f)
    
with open(AUTHORID_TO_AUTHOR_NAME_PATH, "rb") as f:
    authorid_to_author_name = pickle.load(f)
    
with open(L2R_RANKER_FITTED_PATH, 'rb') as f:
    l2rRanker = pickle.load(f)

In [None]:
authorRanker_1 = AuthorRanker_1(
    l2rRanker, docid_to_authorid, author_data, PAPER_L2R_RANK_RESULT_PATH
)

authorRanker_2 = AuthorRanker_2(
    l2rRanker, docid_to_authorid, docid_to_citation, authorid_to_author_name, PAPER_L2R_RANK_RESULT_PATH
)
authorRanker_3 = AuthorRanker_3(
    BIENCODER_MODEL_NAME, encoded_title_array, docid_to_rowidx, docid_to_authorid, authorid_to_author_name, l2rRanker, PAPER_L2R_RANK_RESULT_PATH
)

In [None]:
save_query_result(TRAIN_TEST_AUTHOR_DATA_PATH, authorRanker_1, AUTHOR_1_RANK_RESULT_PATH)
save_query_result(TRAIN_TEST_AUTHOR_DATA_PATH, authorRanker_2, AUTHOR_2_RANK_RESULT_PATH)
save_query_result(TRAIN_TEST_AUTHOR_DATA_PATH, authorRanker_3, AUTHOR_3_RANK_RESULT_PATH)

In [None]:
authorRanker_1_eval = run_relevance_tests(TRAIN_TEST_AUTHOR_DATA_PATH, AUTHOR_1_RANK_RESULT_PATH, id_col='author_name')
authorRanker_2_eval = run_relevance_tests(TRAIN_TEST_AUTHOR_DATA_PATH, AUTHOR_2_RANK_RESULT_PATH, id_col='author_name')
authorRanker_3_eval = run_relevance_tests(TRAIN_TEST_AUTHOR_DATA_PATH, AUTHOR_3_RANK_RESULT_PATH, id_col='author_name')

eval_result = dict()
eval_result['authorRanker_1_eval'] = authorRanker_1_eval
eval_result['authorRanker_2_eval'] = authorRanker_2_eval
eval_result['authorRanker_3_eval'] = authorRanker_3_eval
with open(AUTHOR_EVAL_RESULT_PATH, 'wb') as f:
    pickle.dump(eval_result, f, protocol=pickle.HIGHEST_PROTOCOL)

score_length = len(authorRanker_1_eval['map'])
method_count = len(eval_result)

methods = ['MAP'] * score_length * method_count + ['NDCG'] * score_length * method_count

scores = []
scores += eval_result['authorRanker_1_eval']['map']
scores += eval_result['authorRanker_2_eval']['map']
scores += eval_result['authorRanker_3_eval']['map']
scores += eval_result['authorRanker_1_eval']['ndcg']
scores += eval_result['authorRanker_2_eval']['ndcg']
scores += eval_result['authorRanker_3_eval']['ndcg']

model_flags = (['Model 1'] * score_length + ['Model 2'] * score_length + ['Model 3'] * score_length) * 2

eval_df = pd.DataFrame({"methods" : methods, "scores" : scores, "model_flags" : model_flags})
barplot = sns.boxplot(x="methods", y="scores", hue="model_flags", data=eval_df)
plt.xlabel('Evaluation Method')
plt.ylabel('Score')
plt.savefig('author_eval.png')
plt.title('Model Evaluation')