In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import os
import itertools
from matplotlib.pyplot import cm 
from matplotlib import rc
#rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
#rc('text', usetex=True)

store_path = "../../plots/results/"

In [None]:
def load_eval(path):
    eval_dfs = {}
    for file in os.listdir(path):
        if file.endswith(".csv"):
            df = pd.read_csv(os.path.join(path, file), sep='\t')
            file_name = os.path.splitext(file)[0]
            file_name = file_name.replace('test_14d_', '')
            file_name = file_name.replace('test_d14_', '')
            file_name = file_name.replace('r_no_0.0_', '')
            # baselines
            eval_dfs.update({file_name:df})
    return eval_dfs
    
dataset = "recsys17"
#dataset = "cb12"

path = "../../data/" + dataset + "/processed/eval/hyperparam/"

full_all_dfs = load_eval(path + "all/")
full_next_dfs = load_eval(path + "next/")
print("Loaded " + str(len(full_all_dfs)) + " eval cases")

In [None]:
baselines = ["SKNN","S-sKNN", "SVMKNN", "IKNN", "BPR", "GRU4Rec", "prnn"]

# Code to plot metric for different values of k

In [None]:
def plot_k_results(metric, title, dfs, file_name, colormap):
    f, axes = plt.subplots(7,figsize=(15,30))

    
    #colors = itertools.cycle(( 'c', 'g', 'm', 'y', 'k', 'r', 'b', 'y'))
    n = 9 # Number of colors
    new_colors = [plt.get_cmap(colormap)(1. * i/n) for i in range(n)]
    
    lw = 3
    ms = 12
    
    rows_list = []
    k = 10
    
    ax_count = 0
    for baseline in baselines:
        ax = axes[ax_count]
        ax_count += 1
        
        listOfKeys = [key  for (key, value) in dfs.items() if baseline in key]
        listOfKeys.sort()
        
        colors=iter(new_colors)
        markers = itertools.cycle(('s', '^', '>', 'p', 'P', '8', 'D', 'h', 'X'))
        lines = itertools.cycle(('--', '-',  '-.', ':'))
        
        for key in listOfKeys:
            dfp = dfs[key]
            
            label = key.replace(baseline + "_", "").replace("Smpl","Sampling")
            label = label.replace("random", "$_{RANDOM}$").replace("recent","$_{RECENT}$")
            label = label.replace("sim", " sim").replace("sim", "Sim").replace("cosine","$_{COSINE}$").replace("jaccard","$_{JACCARD}$")
            label = label.replace("Pop_boost0", "").replace("Pop_boost1", "PopBoost")
            if "BPR" in baseline:
                label = label.replace("ls", r'$\lambda_{SESSION}$=').replace("li", " li").replace("li", r'$\lambda_{ITEM}$=')
            if "IKNN" in baseline:
                label = label.replace("lmbd", r'$\lambda$=').replace("alpha", " alpha").replace("alpha", r'$\alpha$=')
            if "GRU" in baseline:
                label = label.replace("loss", "").replace("_layers", " [").replace("_", ",")
                label += "]"
            if "prnn" in baseline:
                label = label.replace("a_", '').replace("_ls_", " [").replace("_lr_", r'] $\alpha$=')
                
            #print(label)
            #print(dfp.head(k))
            #md = {}
            #md.update({"algo":label})
            #md.update({"k":metric + "@" + str(k)})
            #md.update({"val":dfp[dfp["k"] == k][metric].iloc[0]})
            #rows_list.append(md)

            # plot baselines
            dfp.head(k).plot(ax = ax, kind="line", x="k", y=metric, label=label, markersize=ms, 
                     marker=next(markers), linestyle=next(lines), c=next(colors), linewidth=lw)
        
        
        #md_df = pd.DataFrame(rows_list, columns=["algo","k","val"])   
        #print(md_df)
        font = 35
        font_tick = 30

        for tick in ax.xaxis.get_major_ticks():
            tick.label.set_fontsize(font_tick) 
        for tick in ax.yaxis.get_major_ticks():
            tick.label.set_fontsize(font_tick) 

        #legend = ax.legend(loc='upper center', shadow=True, fontsize='x-large', bbox_to_anchor=(0.5, 1.12), ncol=5)
        #legend = ax.legend(loc='upper center', ncol=5, fontsize=20, bbox_to_anchor=(0.5, -0.12))
        
        loc = "lower right"
        ncol = 3
        if "sknn" in baseline.lower() or "SVM" in baseline or "GRU" in baseline:
            ncol = 2
        if "BPR" in baseline and "recsys17" in dataset:
            loc = "center right"
        if "EPC" in metric or "EPD" in metric:
            loc = "upper right"
        if "cb12" in dataset:
            loc = "upper left"
        
        loc = "lower right"
        legend = ax.legend(loc=loc, ncol=ncol, fontsize=16)#, bbox_to_anchor=(0.49, 1.23))

        #ax.yaxis.set_label_position("right")
        title = baseline.replace("SKNN","sKNN")
        title = title.replace("SVMKNN","V-sKNN").replace("IKNN","iKNN").replace("prnn", "pRNN")
        ax.set_title(title, fontsize = 40)
        
        ax.set_xlabel('k', fontsize = font)
        
        metric_label = metric.replace("NovEPC","EPC").replace("SerEPD","EPD")
        ax.set_ylabel(metric_label, fontsize = font)
        
        ax.xaxis.grid(True, which='major', linestyle='-', linewidth=0.35)
        ax.yaxis.grid(True, which='major', linestyle='-', linewidth=0.35)
        #ax.set_ylim(0)
        ax.set_xlim(0.8,k + 0.2)
        ax.set_xticks(np.arange(k) + 1)
        
        
        f.tight_layout()
        f.savefig(store_path + dataset + "_" + file_name + "_" + metric + ".pdf", dpi=300, bbox_inches='tight')

In [None]:
colormap = 'tab10'
plot_k_results("nDCG", "Prediction experiment: REMAINING ITEMS", full_all_dfs, "hyp_opt_all", colormap)
plot_k_results("MRR", "Prediction experiment: REMAINING ITEMS", full_all_dfs, "hyp_opt_all", colormap)
plot_k_results("NovEPC", "Prediction experiment: REMAINING ITEMS", full_all_dfs, "hyp_opt_all", colormap)
plot_k_results("SerEPD", "Prediction experiment: REMAINING ITEMS", full_all_dfs, "hyp_opt_all", colormap)


# Code to give best configuration for each algorithm

In [None]:
def best_config(metric, dfs):
    print("Best config for: " + metric)
    rows_list = []
    # which k to use for comparison
    k = 20
    
    for baseline in baselines:        
        listOfKeys = [key  for (key, value) in dfs.items() if baseline in key]
        listOfKeys.sort()
                
        best_ndcg = -1
        best_dfp = None
        for key in listOfKeys:
            dfp = dfs[key]
            last_value = dfp.iloc[k-1][metric]
            if last_value > best_ndcg:
                best_ndcg = last_value
                best_dfp = key.replace("_", " ").replace("Smpl","").replace("Sim", " ").replace("Pop", " Pop")
        print(best_dfp)

In [None]:
best_config("nDCG", full_all_dfs)

In [None]:
import seaborn as sns
sns.set(style="whitegrid")

def change_width(ax, new_value) :
    for patch in ax.patches :
        current_width = patch.get_width()
        diff = current_width - new_value

        # we change the bar width
        patch.set_width(new_value)

        # we recenter the bar
        patch.set_x(patch.get_x() + diff * .5)

def plot_bar_results(metric, title, dfs, file_name, colormap):
    f, axes = plt.subplots(4,2,figsize=(30,20))

    
    #colors = itertools.cycle(( 'c', 'g', 'm', 'y', 'k', 'r', 'b', 'y'))
    n = 9 # Number of colors
    new_colors = [plt.get_cmap(colormap)(1. * i/n) for i in range(n)]
    
    lw = 3
    ms = 12
    
    rows_list = []
    k = 20
    
    ax_i = 0
    ax_j = 0
    for baseline in baselines:
        ax = axes[ax_i, ax_j]
        
        if ax_j == 1:
            ax_i += 1
        ax_j = 1 - ax_j
        
        listOfKeys = [key  for (key, value) in dfs.items() if baseline in key]
        listOfKeys.sort()
        
        colors=iter(new_colors)
        markers = itertools.cycle(('s', '^', '>', 'p', 'P', '8', 'D', 'h', 'X'))
        lines = itertools.cycle(('--', '-',  '-.', ':'))
        
        frames = []
        labels = []
        for key in listOfKeys:
            dfp = dfs[key]
            
            label = key.replace(baseline + "_", "").replace("Smpl","")
            label = label.replace("random", "rand ").replace("recent","recent ")
            label = label.replace("Sim", " ").replace("sim", "").replace("cosine","cos ").replace("jaccard","jac ")
            label = label.replace("Pop_boost0", "").replace("Pop_boost1", "boost")
            if "BPR" in baseline:
                label = label.replace("ls", r'$\lambda_{SESSION}$=').replace("li", " li").replace("li", r'$\lambda_{ITEM}$=')
            if "IKNN" in baseline:
                label = label.replace("lmbd", r'$\lambda$=').replace("alpha", " alpha").replace("alpha", r'$\alpha$=')
            if "GRU" in baseline:
                label = label.replace("loss", "").replace("_layers", " [").replace("_", ",")
                label += "]"
            if "prnn" in baseline:
                label = label.replace("a_", '').replace("_ls_", " [").replace("_lr_", r'] $\alpha$=')
                
            #print(label)
            #print(dfp.head(k))
            #md = {}
            #md.update({"algo":label})
            #md.update({"k":metric + "@" + str(k)})
            #md.update({"val":dfp[dfp["k"] == k][metric].iloc[0]})
            #rows_list.append(md)

            # plot baselines
            dfp["name"] = label
            #dfp.head(k).tail(1).plot(ax = ax, kind="bar", y=metric, width=0.7, label=label, x="name", color=next(colors))
            frames.append(dfp.head(k).tail(1))
            labels.append(label)
            
        frame_pd = pd.concat(frames)
        #frame_pd.plot(ax = ax, kind="scatter", y=metric, x="name")
        
        sns.barplot(ax = ax, data=frame_pd, y=metric, x="name", hue="name", dodge=False)
        #change_width(ax, .75)

        #md_df = pd.DataFrame(rows_list, columns=["algo","k","val"])   
        #print(md_df)
        font = 35
        font_tick = 30

        for tick in ax.xaxis.get_major_ticks():
            tick.label.set_fontsize(20) 
        for tick in ax.yaxis.get_major_ticks():
            tick.label.set_fontsize(font_tick) 
            
        ax.xaxis.set_tick_params(rotation=45)
        ax.set_xticklabels([""])

        #legend = ax.legend(loc='upper center', shadow=True, fontsize='x-large', bbox_to_anchor=(0.5, 1.12), ncol=5)
        #legend = ax.legend(loc='upper center', ncol=5, fontsize=20, bbox_to_anchor=(0.5, -0.12))
        
        loc = "lower right"
        ncol = 3
        if "sknn" in baseline.lower() or "SVM" in baseline or "GRU" in baseline:
            ncol = 3
        if "BPR" in baseline and "recsys17" in dataset:
            loc = "center right"
        if "EPC" in metric or "EPD" in metric:
            loc = "upper right"
        if "cb12" in dataset:
            loc = "upper left"
        
        loc = "lower right"
        #legend = ax.legend(labels, loc="best", ncol=ncol, fontsize=16)#, bbox_to_anchor=(0.49, 1.23))

        ax.legend(loc="lower right", ncol=ncol, fontsize=18)
        
        #ax.yaxis.set_label_position("right")
        title = baseline.replace("SKNN","sKNN")
        title = title.replace("SVMKNN","V-sKNN").replace("IKNN","iKNN").replace("prnn", "pRNN")
        ax.set_title(title, fontsize = 40)
        
        ax.set_xlabel('', fontsize = font)
        
        metric_label = metric.replace("NovEPC","EPC").replace("SerEPD","EPD")
        ax.set_ylabel(metric_label + "@" + str(k), fontsize = font)
        
        ax.xaxis.grid(True, which='major', linestyle='-', linewidth=0.35)
        ax.yaxis.grid(True, which='major', linestyle='-', linewidth=0.35)
        ymin = frame_pd[metric].min() * 0.98
        ymax = frame_pd[metric].max() * 1.01
        ax.set_ylim(ymin, ymax)
    
    f.delaxes(axes[3,1])
    f.tight_layout()
    f.savefig(store_path + dataset + "_" + file_name + "_" + metric + ".pdf", dpi=300, bbox_inches='tight')

In [None]:
colormap = 'tab10'
plot_bar_results("nDCG", "Prediction experiment: REMAINING ITEMS", full_all_dfs, "hyp_opt_all_b", colormap)