# Load previous results

In [None]:
import pickle
with open("results.pkl", "rb") as fh:
    final_results = pickle.load(fh)

kw_threshholds = range(1, 21, 1)

In [None]:
with open("results.pkl", "wb") as fh:
    pickle.dump(final_results, fh)

In [None]:
from collections import namedtuple

def get_data(algorithm, corpus):
    Retrieval_scores = namedtuple("Retrieval_scores", "p r f a".split())
    scores = final_results[corpus]
    
    precision = [scores[i][algorithm]["precision"].mean() for i in kw_threshholds]
    recall = [scores[i][algorithm]["recall"].mean() for i in kw_threshholds]
    f1 = [scores[i][algorithm]["f1"].mean() for i in kw_threshholds]
    return Retrieval_scores(precision, recall, f1, algorithm)

corpus = "semeval"
X = get_data("tfidf", corpus)
Y = get_data("tfidfed_textrank", corpus)

### Relative enhancement

In [None]:
def publication_name(n):
    if n == "tfidfed_rake":
        return "$Rake_s$"
    elif n == "rake":
        return "$Rake$"
    elif n == "tfidfed_textrank":
        return "$Textrank_s$"
    elif n == "textrank":
        return "$Textrank$"
    elif n == "tfidf":
        return "$tf-idf$"
    elif n == "frankenrake":
        return "$Ensemble$"
    else:
        raise Exception(f"No proper name substitution available for {n}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

plt.rcParams["font.family"] = 'serif'

colors = sns.color_palette("Set1", 6)
fig, ax = plt.subplots(figsize=(8, 6))

ax.plot(kw_threshholds, X.p, ':v', c=colors[0], label=f"$\\pi$ {publication_name(X.a)}")
ax.plot(kw_threshholds, X.r, ':D', c=colors[1], label=f"$\\rho$ {publication_name(X.a)}")
ax.plot(kw_threshholds, X.f, ':d', c=colors[2], label=f"F1 {publication_name(X.a)}")

ax.plot(kw_threshholds, Y.p, '-v', c=colors[0], alpha=.4, label=f"$\pi$ {publication_name(Y.a)}")
ax.plot(kw_threshholds, Y.r, '-D', c=colors[1], alpha=.4, label=f"$\\rho$ {publication_name(Y.a)}")
ax.plot(kw_threshholds, Y.f, '-d', c=colors[2], alpha=.4, label=f"F1 {publication_name(Y.a)}")

ax.set_ylim(0.0, .6)
ax.set_xlabel('Number of Keyphrases', fontsize=16)
ax.set_ylabel('Score', fontsize=16)

ax.legend(fontsize=14, frameon=False)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)

ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
#ax.set_facecolor("white")

for spine in plt.gca().spines.values():
    spine.set_visible(True)

#plt.title(f"{corpus_name} without Fuzzy Matching + KW Removal", fontsize=18)

plt.xticks(kw_threshholds)
fig.savefig(f"result_plots/{corpus}/{publication_name(X.a)}_vs_{publication_name(Y.a)}.pdf", 
    format="pdf", transparent=True, bbox_inches="tight")
plt.show()

# Plotting

In [None]:
def plot_ranking_stats(num_kwds, metric, corpus, algorithm_a, algorithm_b, algorithm_c, algorithm_d):
    scores = final_results[corpus]
    
    y_a = scores[num_kwds][algorithm_a].sort_values(by=metric)[::-1][metric]
    mean_a = scores[num_kwds][algorithm_a][metric].mean()

    y_b = scores[num_kwds][algorithm_b].sort_values(by=metric)[::-1][metric]
    mean_b = scores[num_kwds][algorithm_b][metric].mean()
    
    y_c = scores[num_kwds][algorithm_c].sort_values(by=metric)[::-1][metric]
    mean_c = scores[num_kwds][algorithm_c][metric].mean()
    
    y_d = scores[num_kwds][algorithm_d].sort_values(by=metric)[::-1][metric]
    mean_d = scores[num_kwds][algorithm_d][metric].mean()    
    
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(range(y_a.values.shape[0]), y_a.values, label=f"{publication_name(algorithm_a)}")
    #ax.axhline(mean_a, color="red")
    
    ax.plot(range(y_b.values.shape[0]), y_b.values, label=f"{publication_name(algorithm_b)}")
    #ax.axhline(mean_b, color="red")
    
    ax.plot(range(y_c.values.shape[0]), y_c.values, label=f"{publication_name(algorithm_c)}")
    #ax.axhline(mean_b, color="red")

    ax.plot(range(y_d.values.shape[0]), y_d.values, label=f"{publication_name(algorithm_d)}")
    #ax.axhline(mean_b, color="red")

    
    ax.set_xlabel("Rank", fontsize=16)
    ax.set_ylabel("Score", fontsize=16)
    ax.set_ylim(-0.02, 1)
    
    ax.legend(fontsize=14, frameon=False)
    ax.tick_params(axis='x', labelsize=14)
    ax.tick_params(axis='y', labelsize=14)

    fig.savefig(f"result_plots/{corpus}/rank_plots_{metric}@{num_kwds}_Keywords.pdf", 
    format="pdf", transparent=True, bbox_inches="tight")
    plt.show()

In [None]:
plot_ranking_stats(5, "f1", "semeval", "rake", "tfidfed_rake", "textrank", "tfidfed_textrank")