In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import os
import itertools
from matplotlib.pyplot import cm 
#from matplotlib import rc
#rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
#rc('text', usetex=True)

store_path = "../../plots/results/"

In [None]:
def load_eval(path):
    eval_dfs = {}
    for file in os.listdir(path):
        if file.endswith(".csv"):
            df = pd.read_csv(os.path.join(path, file), sep='\t')
            file_name = os.path.splitext(file)[0]
            file_name = file_name.replace('test_14d_', '').replace('test_d14_', '')
            file_name = file_name.replace("r_no_0.0_", "")
            # baselines
            eval_dfs.update({file_name:df})
    return eval_dfs
        
path = "../../data/cb12/processed/eval/"

base_all_dfs = load_eval(path + "base/all/")
base_next_dfs = load_eval(path + "base/next/")
print("Loaded " + str(len(base_all_dfs)) + " eval cases")

all_dfs = load_eval(path + "all/")
next_dfs = load_eval(path + "next/")
print("Loaded " + str(len(all_dfs)) + " eval cases")

In [None]:
for key, value in base_all_dfs.items() :
    print(key)

In [None]:
baselines = ["bayes_test", "iknn", "cknn", "scknn", "vcknn", "gru", "pop", "bpr", "prnn"]

vaes = [
        "content_vae_topks_60",
        "vae_topks_60",
       ] 

top_ks = (np.arange(15) + 1 ) * 10
epochs = (np.arange(30) + 1 )

for key, value in next_dfs.items() :
    if key in vaes:
        base_next_dfs[key] = value
        
for key, value in all_dfs.items() :
    if key in vaes:
        base_all_dfs[key] = value
        
for key, value in base_all_dfs.items() :
    print(key)
    
base_all_dfs["pop"].head(1)

In [None]:
def plot_k_results(metric, title, df, file_name, colormap):
    f, ax = plt.subplots(1,1,figsize=(15,7))

    markers = itertools.cycle(('s', '^', '>', 'p', 'P', '8', 'D', 'h', 'X'))
    lines = itertools.cycle(('--', '-',  '-.', ':'))
    #colors = itertools.cycle(( 'c', 'g', 'm', 'y', 'k', 'r', 'b', 'y'))
    n = len(df) # Number of colors
    new_colors = [plt.get_cmap(colormap)(1. * i/n) for i in range(n)]
    colors=iter(new_colors)
    lw = 3
    ms = 12
    
    rows_list = []
    k = 20
    
    for vae in vaes:
        label = vae.replace("content_vae_topks_60", "$VAE_{Comb}$")
        label = label.replace("vae_topks_60", "$VAE_{Int}$")

        dfp = df[vae]

        md = {}
        md.update({"algo":label})
        md.update({"k":metric + "@" + str(k)})
        md.update({"val":dfp[dfp["k"] == k][metric].iloc[0]})
        
        tex = " & \cellcolor{green!5} " + str(round(dfp[dfp["k"] == k]["nDCG"].iloc[0],4)) + " & \cellcolor{green!5}" 
        tex += str(round(dfp[dfp["k"] == k]["MRR"].iloc[0],4)) + " & \cellcolor{cyan!5}"
        tex += str(round(dfp[dfp["k"] == k]["NovEPC"].iloc[0],4)) + " & \cellcolor{yellow!5}"
        tex += str(round(dfp[dfp["k"] == k]["SerEPD"].iloc[0],4)) + " \\\\ \hline"
        md.update({"tex":tex.replace("0.",".")})
        
        rows_list.append(md)
        
        if metric == "Diversity":
            dfp = df[vae].iloc[1:]
        
        dfp.plot(ax = ax, kind="line", x="k", y=metric, label=label, markersize=ms,
                   marker=next(markers), linestyle=next(lines), c=next(colors), linewidth=lw)

            
    for base in baselines:
        label = base.replace('gru', 'GRU4Rec').replace('iknn', 'iKNN').replace("prnn","pRNN")
        label = label.replace('pop', 'POP').replace("bpr","BPR-MF")
        label = label.replace('bayes_test', "Bayes")
        label = label.replace('scknn', 'S-sKNN').replace('vcknn', 'V-sKNN').replace('cknn', 'sKNN')

        label = label.replace('content_bayes_test', "Bayes")
        label = label.replace('bayes_test', "Bayes")
        
        if base not in df.keys():
            continue
            
        dfp = df[base]
        
        md = {}
        md.update({"algo":label})
        md.update({"k":metric + "@" + str(k)})
        md.update({"val":dfp[dfp["k"] == k][metric].iloc[0]})
        
        tex = " & \cellcolor{green!5} " + str(round(dfp[dfp["k"] == k]["nDCG"].iloc[0],4)) + " & \cellcolor{teal!5}" 
        tex += str(round(dfp[dfp["k"] == k]["MRR"].iloc[0],4)) + " & \cellcolor{cyan!5}"
        tex += str(round(dfp[dfp["k"] == k]["NovEPC"].iloc[0],4)) + " & \cellcolor{yellow!5}"
        tex += str(round(dfp[dfp["k"] == k]["SerEPD"].iloc[0],4)) + " \\\\ \hline"
        md.update({"tex":tex.replace("0.",".")})
        
        rows_list.append(md)
        
        # plot baselines
        dfp.plot(ax = ax, kind="line", x="k", y=metric, label=label, markersize=ms, 
                   marker=next(markers), linestyle=next(lines), c=next(colors), linewidth=lw)
    
    md_df = pd.DataFrame(rows_list, columns=["algo","tex"])
    pd.set_option('display.max_colwidth', -1)
    pd.set_option('display.expand_frame_repr', False)
    
    print(md_df)
    font = 40
    font_tick = 30

    for tick in ax.xaxis.get_major_ticks():
        tick.label.set_fontsize(font_tick) 
    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(font_tick) 

    #legend = ax.legend(loc='upper center', shadow=True, fontsize='x-large', bbox_to_anchor=(0.5, 1.12), ncol=5)
    #legend = ax.legend(loc='upper center', ncol=5, fontsize=20, bbox_to_anchor=(0.5, -0.12))
    #legend = ax.legend(loc='upper center', ncol=5, fontsize=20, bbox_to_anchor=(0.49, 1.23))
    legend = ax.legend(loc='upper center', ncol=6, fontsize=20, bbox_to_anchor=(0.43, 1.23))
    
    #ax.yaxis.set_label_position("right")
    ax.set_title("", fontsize = font)
    ax.set_xlabel('k', fontsize = font)
    ax.set_ylabel(metric.replace("Nov", "").replace("Ser",""), fontsize = font)
    ax.xaxis.grid(True, which='major', linestyle='-', linewidth=0.35)
    ax.yaxis.grid(True, which='major', linestyle='-', linewidth=0.35)
    ax.set_ylim(0)
    ax.set_xlim(0.7,20.3)
    ax.set_xticks(np.arange(20) + 1)
    #f.tight_layout()
    f.savefig(store_path + "cb12_" + file_name + "_" + metric + ".pdf", dpi=300, bbox_inches='tight')

In [None]:
colormap = 'tab10'
plot_k_results("nDCG", "Prediction experiment: REMAINING ITEMS", base_all_dfs, "all", colormap)

In [None]:
colormap = 'tab20b'
plot_k_results("nDCG", "Prediction experiment: NEXT ITEM", base_next_dfs, "next", colormap)
plot_k_results("MRR", "Prediction experiment: NEXT ITEM", base_next_dfs, "next", colormap)
plot_k_results("NovEPC", "Prediction experiment: REMAINING ITEMS", base_next_dfs, "next", colormap)
plot_k_results("SerEPD", "Prediction experiment: REMAINING ITEMS", base_next_dfs, "next", colormap)

In [None]:
def plot_aes(metric, title, eval_df, top_ks, epochs, k_to_report):
    f, ax = plt.subplots(1,1,figsize=(15,7))

    markers = itertools.cycle(('s', '^', '>', 'p', 'P', '8', 'D', 'h', 'X'))
    lines = itertools.cycle(('--', '-',  '-.', ':'))
    colors = itertools.cycle(( 'c', 'm', 'g', 'k', 'r', 'b', 'y'))
    
    n = 6 # Number of colors
    new_colors = [plt.get_cmap("tab20b")(1. * i/n) for i in range(n)]
    colors=iter(new_colors)

    lw = 2
    ms = 12
    
    rows_list = []

    top_ks = range(10,101,10)
    epochs = range(1,50,1)

    
    algos = ["ae","dae","vae"]
    #algos = ["ae","dae","vae", "att_ae","att_dae","att_vae"]
    #algos = ["att_ae","att_dae","att_vae"]

    for algo in algos:        
        interact_rows = []
        content_rows = []

        for k in top_ks:
            interact_df_ae = eval_df[algo + "_topks_" + str(k)]
            content_df_ae = eval_df["content_" + algo + "_topks_" + str(k)]

            ae_scatter_dict = {}
            ae_scatter_dict.update({"k":k})
            ae_scatter_dict.update({"val":interact_df_ae[interact_df_ae["k"] == k_to_report][metric].iloc[0]})
            interact_rows.append(ae_scatter_dict)
            
            ae_scatter_dict = {}
            ae_scatter_dict.update({"k":k})
            ae_scatter_dict.update({"val":content_df_ae[content_df_ae["k"] == k_to_report][metric].iloc[0]})
            content_rows.append(ae_scatter_dict)


        
        interact_df_ae = pd.DataFrame(interact_rows, columns=["k","val"])   
        content_df_ae = pd.DataFrame(content_rows, columns=["k","val"])   

        label_txt = algo.replace("_", "").upper() 
        interact_df_ae.plot(ax = ax, kind="line", x="k", y="val", label=label_txt+"$_{Int}$", markersize=ms,
                       marker=next(markers), linestyle=next(lines), c=next(colors), linewidth=lw)
        content_df_ae.plot(ax = ax, kind="line", x="k", y="val", label=label_txt+"$_{Comb}$", markersize=ms,
                       marker=next(markers), linestyle=next(lines), c=next(colors), linewidth=lw)
    

    font = 40
    font_tick = 30
    
    for tick in ax.xaxis.get_major_ticks():
        tick.label.set_fontsize(font_tick) 
    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(font_tick) 

    legend = ax.legend(loc='lower right',fontsize=25, ncol=3)


    #ax.yaxis.set_label_position("right")
    ax.set_title(title, fontsize = font)
    ax.set_xlabel('k', fontsize = font)
    ax.set_ylabel(metric + "@" + str(k_to_report), fontsize = font)
    ax.xaxis.grid(True, which='major', linestyle='-', linewidth=0.25)
    ax.yaxis.grid(True, which='major', linestyle='-', linewidth=0.25)


    f.tight_layout()
    f.savefig(store_path + "cb12_" + title + "_" + metric + "_ae_compare_full_all_users.pdf", dpi=300, bbox_inches='tight')

plot_aes("nDCG", "", all_dfs, top_ks, epochs, 20)