In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import math

from ast import literal_eval
from collections import defaultdict
import statistics

In [47]:
# Data loading and saving
def save_dataframe_csv(df, path, name):
    df.to_csv(path+name, index=False)

def load_dataframe_csv(path, name, index_col=None):
    return pd.read_csv(path+name, index_col=index_col)

# Get Avearge length and success rate
def get_average_length(df, n):
    df_s_f = df[(df['result'] == 'successful') | (df['result'] == 'fail')]
    iteration = df_s_f[df_s_f['target_rank']==n].groupby('user_id', as_index=False).agg({'iteration':'mean'})['iteration'].to_numpy()
    return (np.average(iteration), 1.96*np.std(iteration)/np.sqrt(len(iteration)))

def get_success_num(df, n):
    return len(df[(df['result'] == 'successful') & (df['target_rank'] == n)])

def get_fail_num(df, n):
    return len(df[(df['result'] == 'fail') & (df['target_rank'] == n)])

def get_success_rate(df, n):
    df_s_f = df[(df['result'] == 'successful') | (df['result'] == 'fail')]
    df_list_result = df_s_f[df_s_f['target_rank']==n].groupby('user_id')['result'].apply(list).reset_index(name='result')
    successful_rate = df_list_result['result'].apply(lambda r: r.count("successful")/len(r)).to_numpy()
    return (np.average(successful_rate), 1.96*np.std(successful_rate)/np.sqrt(len(successful_rate)))

# For creating latex table
def get_2_metric(df, topk = 1, print_result = True):
    if topk != 1:
        df = change_target_rank(df,topk=topk)
    if print_result:
        print ('avg_length: ',get_average_length(df,topk))
        print ('suc_rate: ',get_success_rate(df,topk))
        return 
    else:
        return get_average_length(df,topk), get_success_rate(df,topk)


def latex_row(df, table_name = "success_rate", value_precision = 3, uncertainty_precision = 4):
    row = {}
    for i in columns:
        sr, al = get_2_metric(df, topk = i,  print_result=False)
#         print (sr,al)
        if table_name == "success_rate":
            metric = al
        else:
            metric = sr
        row[i] = str(round(metric[0],value_precision)) + r'$\pm$' + str(round(metric[1],uncertainty_precision))
    return row
def latex_table_row(df, table_name = "success_rate", value_precision = 3, uncertainty_precision = 4):
    columns = [1,5,10,20,50] 
    table = pd.DataFrame(columns=columns)
    row = latex_row(df, 
                     table_name = table_name, 
                     value_precision = value_precision, 
                     uncertainty_precision = uncertainty_precision)
    table = table.append(row, ignore_index=True)
    print (table.to_latex(escape=False))

# Get tuning result 
def get_best_metric_with_single_lambda(data_path, lambs, dataset_name = "yelp", keyphrase_selection_method = "random",topk = 20, return_all = False, top_affected = None):
    avg_lengths = []
    suc_rates = []
    for lamb in lambs:
        table_name = '../tables/'  + dataset_name +   '/tune/tuning_at_lamb_'+ str(lamb)+'_with_'+ keyphrase_selection_method+'.csv'
        if top_affected != None:
            table_name = '../tables/'  + dataset_name +   '/tune_new_objective/topk_' + str(top_affected) + '/tuning_at_lamb_'+ str(lamb)+'_with_'+ keyphrase_selection_method+'.csv'
        df = load_dataframe_csv(data_path, table_name)
        avg_length, suc_rate = get_2_metric(df, topk = topk, print_result = False)

        avg_lengths.append(avg_length)
        suc_rates.append(suc_rate)
        
    if return_all:
        return avg_lengths, suc_rates
    else:
        suc_rates_temp = [suc_rate[0] for suc_rate in suc_rates]
        optimal_lambda_index = np.argmax(suc_rates_temp)
#         print (optimal_lambda_index)
        return lambs[optimal_lambda_index], avg_lengths[optimal_lambda_index], suc_rates[optimal_lambda_index]

columns = [1,5,10,20,50]

In [10]:
def get_drop_index(df, topk = 20):
    drop_index = []
    iter_flag = 0 
    for i in range(len(df)):
        if df["iteration"][i] == 0:
            iter_flag = 0
        if df['item_rank'][i] <= topk and iter_flag == 0:
            iter_flag = 1
        elif iter_flag == 1:
            drop_index.append(i)
    return drop_index 

def change_target_rank(df,topk = 20):
    modified_df = df.drop(get_drop_index(df,topk = topk))
    modified_df = modified_df.reset_index(drop=True)
    modified_df['target_rank'] = topk
    for i in range(len(modified_df)):
        if modified_df['item_rank'][i] <= modified_df['target_rank'][i]:
            modified_df.at[i,"result"] = "successful"
    return modified_df

In [11]:
def get_df_freq(df, precision = 1, theta_col = "theta"):
    """
    get theta freq for entire dataframe
    store in dictionary
    """
    theta_freq = defaultdict(int)
    for i in range(len(df)):
        # Theta line
#         try:
        theta_line = literal_eval(df[theta_col][i])
#             theta_freq = get_list_freqs(theta_freq, theta_line, precision=precision)
        for i in range(len(theta_line)):
            if precision == None:
                theta_freq[theta_line[i]] += 1
            else:
                val = round(theta_line[i], precision)
                theta_freq[val] += 1
#         except:
            continue
    return theta_freq

# Beer

In [49]:
# uac random
df = load_dataframe_csv("../tables/reproduce/beer/uac_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &               20 &                50 \\
\midrule
0 &  0.0171$\pm$0.023 &  0.0274$\pm$0.025 &  0.0686$\pm$0.037 &  0.106$\pm$0.041 &  0.2074$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.44$\pm$0.492 &  6.396$\pm$0.495 &  6.301$\pm$0.502 &  6.208$\pm$0.511 &  5.879$\pm$0.521 \\
\bottomrule
\end{tabular}



In [51]:
# uac diff
df = load_dataframe_csv("../tables/reproduce/beer/uac_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0286$\pm$0.032 &  0.0528$\pm$0.035 &  0.0792$\pm$0.039 &  0.1093$\pm$0.042 &  0.2142$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  9.806$\pm$0.216 &  9.622$\pm$0.263 &  9.349$\pm$0.331 &  9.075$\pm$0.359 &  8.082$\pm$0.484 \\
\bottomrule
\end{tabular}



In [65]:
# uac diff with no duplicate keyphrase selection
df = load_dataframe_csv("../tables/reproduce/beer/uac_diff2.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &               5  &                10 &                20 &                50 \\
\midrule
0 &  0.0193$\pm$0.023 &  0.057$\pm$0.036 &  0.0794$\pm$0.039 &  0.1114$\pm$0.042 &  0.2152$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &             1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  9.836$\pm$0.2 &  9.585$\pm$0.279 &  9.344$\pm$0.331 &  9.059$\pm$0.358 &  8.079$\pm$0.484 \\
\bottomrule
\end{tabular}



In [61]:
# llc score pop
df = load_dataframe_csv("../tables/reproduce/beer/lprating_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0126$\pm$0.013 &  0.0448$\pm$0.024 &  0.0875$\pm$0.039 &  0.1253$\pm$0.047 &  0.2325$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &              5  &               10 &               20 &               50 \\
\midrule
0 &  6.441$\pm$0.49 &  6.349$\pm$0.49 &  6.278$\pm$0.503 &  6.165$\pm$0.516 &  5.755$\pm$0.523 \\
\bottomrule
\end{tabular}



In [60]:
# llc_rank Random
df = load_dataframe_csv("../tables/reproduce/beer/llc_rank_lamb30_top50_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &               20 &              50 \\
\midrule
0 &  0.0123$\pm$0.013 &  0.0452$\pm$0.024 &  0.0578$\pm$0.026 &  0.1446$\pm$0.05 &  0.21$\pm$0.055 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &              10 &               20 &               50 \\
\midrule
0 &  6.455$\pm$0.487 &  6.393$\pm$0.488 &  6.365$\pm$0.49 &  6.168$\pm$0.509 &  5.939$\pm$0.514 \\
\bottomrule
\end{tabular}



# UAC

In [139]:
# random
df = load_dataframe_csv("../tables/beer/reproduce/uac_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &               20 &                50 \\
\midrule
0 &  0.0171$\pm$0.023 &  0.0274$\pm$0.025 &  0.0686$\pm$0.037 &  0.106$\pm$0.041 &  0.2074$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.44$\pm$0.492 &  6.396$\pm$0.495 &  6.301$\pm$0.502 &  6.208$\pm$0.511 &  5.879$\pm$0.521 \\
\bottomrule
\end{tabular}



In [140]:
# Diff
df = load_dataframe_csv("../tables/beer/reproduce/uac_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0286$\pm$0.032 &  0.0528$\pm$0.035 &  0.0792$\pm$0.039 &  0.1093$\pm$0.042 &  0.2142$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  9.806$\pm$0.216 &  9.622$\pm$0.263 &  9.349$\pm$0.331 &  9.075$\pm$0.359 &  8.082$\pm$0.484 \\
\bottomrule
\end{tabular}



In [141]:
# Pop
df = load_dataframe_csv("../tables/beer/reproduce/uac.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0171$\pm$0.023 &  0.0281$\pm$0.025 &  0.0669$\pm$0.037 &  0.1036$\pm$0.041 &  0.2071$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.44$\pm$0.492 &  6.395$\pm$0.495 &  6.307$\pm$0.501 &  6.215$\pm$0.511 &  5.882$\pm$0.521 \\
\bottomrule
\end{tabular}



# BAC

In [142]:
# random
df = load_dataframe_csv("../tables/beer/reproduce/bac_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &               10 &                20 &                50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0261$\pm$0.025 &  0.0556$\pm$0.03 &  0.0936$\pm$0.039 &  0.2008$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.446$\pm$0.49 &  6.396$\pm$0.495 &  6.315$\pm$0.498 &  6.217$\pm$0.511 &  5.895$\pm$0.521 \\
\bottomrule
\end{tabular}



In [143]:
# diff
df = load_dataframe_csv("../tables/beer/reproduce/bac_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &               20 &                50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0268$\pm$0.025 &  0.0543$\pm$0.031 &  0.0927$\pm$0.04 &  0.2089$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  9.897$\pm$0.116 &  9.758$\pm$0.224 &  9.513$\pm$0.275 &  9.184$\pm$0.343 &  8.122$\pm$0.481 \\
\bottomrule
\end{tabular}



In [144]:
# Pop
df = load_dataframe_csv("../tables/beer/reproduce/bac.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &               10 &               20 &                50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0268$\pm$0.025 &  0.0556$\pm$0.03 &  0.0927$\pm$0.04 &  0.2022$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &               5  &               10 &               20 &              50 \\
\midrule
0 &  6.446$\pm$0.49 &  6.395$\pm$0.495 &  6.317$\pm$0.498 &  6.221$\pm$0.511 &  5.89$\pm$0.522 \\
\bottomrule
\end{tabular}



# LP-rating

In [145]:
# Random
df = load_dataframe_csv("../tables/beer/reproduce/lprating_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0416$\pm$0.023 &  0.0656$\pm$0.033 &  0.1174$\pm$0.043 &  0.2152$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &               5  &               10 &               20 &              50 \\
\midrule
0 &  6.446$\pm$0.49 &  6.366$\pm$0.491 &  6.303$\pm$0.498 &  6.149$\pm$0.517 &  5.82$\pm$0.528 \\
\bottomrule
\end{tabular}



In [146]:
# diff
df = load_dataframe_csv("../tables/beer/reproduce/lprating_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &               50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0396$\pm$0.023 &  0.0634$\pm$0.033 &  0.1073$\pm$0.041 &  0.1967$\pm$0.05 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &              5  &               10 &               20 &               50 \\
\midrule
0 &  9.897$\pm$0.116 &  9.648$\pm$0.21 &  9.431$\pm$0.293 &  9.041$\pm$0.365 &  8.237$\pm$0.451 \\
\bottomrule
\end{tabular}



In [147]:
# Pop
df = load_dataframe_csv("../tables/beer/reproduce/lprating.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &               10 &               20 &                50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0416$\pm$0.023 &  0.063$\pm$0.032 &  0.112$\pm$0.042 &  0.2171$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &              5  &               10 &             20 &               50 \\
\midrule
0 &  6.446$\pm$0.49 &  6.359$\pm$0.49 &  6.308$\pm$0.498 &  6.16$\pm$0.52 &  5.795$\pm$0.526 \\
\bottomrule
\end{tabular}



# RankSVM

## Tuning 

In [184]:
# optimal lamb = 10
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "random",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=10)

(30,
 (6.235310786723426, 0.5349920881973466),
 (0.13514846309799314, 0.04706231352345508))

In [193]:
# Random
df = load_dataframe_csv("../tables/beer/test/rank_lamb30_top10_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &               5  &                10 &                20 &                50 \\
\midrule
0 &  0.0183$\pm$0.023 &  0.037$\pm$0.027 &  0.0713$\pm$0.037 &  0.1211$\pm$0.044 &  0.2247$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.431$\pm$0.492 &  6.381$\pm$0.498 &  6.295$\pm$0.501 &  6.139$\pm$0.517 &  5.794$\pm$0.518 \\
\bottomrule
\end{tabular}



In [185]:
# optimal lamb = 10
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "diff",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=10)

(70,
 (8.649177236065063, 0.4376352030858543),
 (0.15368070972254602, 0.04899896741604101))

In [194]:
# Diff
df = load_dataframe_csv("../tables/beer/test/rank_lamb70_top10_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &              5  &                10 &                20 &                50 \\
\midrule
0 &  0.0171$\pm$0.023 &  0.06$\pm$0.037 &  0.0784$\pm$0.039 &  0.1383$\pm$0.047 &  0.2323$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &              5  &               10 &               20 &               50 \\
\midrule
0 &  9.857$\pm$0.188 &  9.568$\pm$0.28 &  9.367$\pm$0.327 &  8.861$\pm$0.401 &  7.987$\pm$0.465 \\
\bottomrule
\end{tabular}



In [186]:
# optimal lamb = 10
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "pop",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=10)

(30,
 (6.214540495054776, 0.5372531328711424),
 (0.13423350355549937, 0.04705994565288307))

In [195]:
# Pop
df = load_dataframe_csv("../tables/beer/test/rank_lamb30_top10_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0181$\pm$0.023 &  0.0302$\pm$0.025 &  0.0687$\pm$0.037 &  0.1167$\pm$0.044 &  0.2245$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &              10 &               20 &               50 \\
\midrule
0 &  6.436$\pm$0.492 &  6.387$\pm$0.494 &  6.29$\pm$0.507 &  6.166$\pm$0.516 &  5.782$\pm$0.516 \\
\bottomrule
\end{tabular}



In [154]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "random",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=20)

(50,
 (6.255544750257476, 0.5367261415457383),
 (0.13447205899583065, 0.04707613186149233))

In [158]:
# Random
df = load_dataframe_csv("../tables/beer/test/rank_lamb50_top20_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &               50 \\
\midrule
0 &  0.0183$\pm$0.023 &  0.0366$\pm$0.026 &  0.0721$\pm$0.037 &  0.1224$\pm$0.044 &  0.227$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.431$\pm$0.492 &  6.399$\pm$0.497 &  6.298$\pm$0.501 &  6.136$\pm$0.518 &  5.796$\pm$0.518 \\
\bottomrule
\end{tabular}



In [156]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "diff",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=20)

(10,
 (8.644969780980881, 0.4376186993909842),
 (0.15368070972254602, 0.04899896741604101))

In [176]:
# Diff
df = load_dataframe_csv("../tables/beer/test/rank_lamb10_top20_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &              5  &                10 &                20 &                50 \\
\midrule
0 &  0.0286$\pm$0.032 &  0.06$\pm$0.037 &  0.0773$\pm$0.039 &  0.1355$\pm$0.047 &  0.2306$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &              5  &               10 &               20 &              50 \\
\midrule
0 &  9.811$\pm$0.207 &  9.568$\pm$0.28 &  9.373$\pm$0.327 &  8.882$\pm$0.399 &  7.986$\pm$0.47 \\
\bottomrule
\end{tabular}



In [160]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "pop",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=20)

(30,
 (6.217617627294421, 0.5366630386985729),
 (0.13443290535011554, 0.047045578751891616))

In [169]:
# Pop
df = load_dataframe_csv("../tables/beer/test/rank_lamb30_top20_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0181$\pm$0.023 &  0.0287$\pm$0.024 &  0.0687$\pm$0.037 &  0.1194$\pm$0.044 &  0.2252$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.436$\pm$0.492 &  6.408$\pm$0.494 &  6.293$\pm$0.507 &  6.163$\pm$0.516 &  5.795$\pm$0.516 \\
\bottomrule
\end{tabular}



In [187]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "random",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=50)

(30,
 (6.256079509615764, 0.5366992707073918),
 (0.13447205899583065, 0.04707613186149233))

In [59]:
# Random
df = load_dataframe_csv("../../RecSys20PaperBackups/Back_up_for_Recsys_Paper/ExperimentResults/beer/test_new_objective/rank_lamb30_top50_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0183$\pm$0.023 &  0.0375$\pm$0.027 &  0.0704$\pm$0.037 &  0.1204$\pm$0.044 &  0.2241$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &             5  &               10 &              20 &               50 \\
\midrule
0 &  6.431$\pm$0.492 &  6.4$\pm$0.497 &  6.304$\pm$0.501 &  6.151$\pm$0.52 &  5.819$\pm$0.519 \\
\bottomrule
\end{tabular}



In [188]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "diff",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=50)

(10,
 (8.65079111238482, 0.43558784480291385),
 (0.15413319841032883, 0.04897485747013321))

In [177]:
# Diff
df = load_dataframe_csv("../tables/beer/test/rank_lamb10_top50_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0286$\pm$0.032 &  0.0577$\pm$0.037 &  0.0781$\pm$0.039 &  0.1414$\pm$0.047 &  0.2332$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &              20 &               50 \\
\midrule
0 &  9.811$\pm$0.207 &  9.588$\pm$0.273 &  9.365$\pm$0.326 &  8.83$\pm$0.402 &  7.975$\pm$0.464 \\
\bottomrule
\end{tabular}



In [189]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "pop",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=50)

(30,
 (6.218152386652709, 0.5366399199667495),
 (0.13443290535011554, 0.047045578751891616))

In [168]:
# Pop
df = load_dataframe_csv("../tables/beer/test/rank_lamb30_top50_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0181$\pm$0.023 &  0.0308$\pm$0.024 &  0.0687$\pm$0.037 &  0.1174$\pm$0.044 &  0.2259$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &              50 \\
\midrule
0 &  6.436$\pm$0.492 &  6.408$\pm$0.494 &  6.294$\pm$0.507 &  6.171$\pm$0.515 &  5.79$\pm$0.516 \\
\bottomrule
\end{tabular}



In [190]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "random",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=100)

(5,
 (6.251623181630024, 0.5364245407792595),
 (0.1337367648781836, 0.047104610106343814))

In [197]:
# Random
df = load_dataframe_csv("../tables/beer/test/rank_lamb5_top100_random.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &               5  &                10 &                20 &                50 \\
\midrule
0 &  0.0183$\pm$0.023 &  0.031$\pm$0.024 &  0.0689$\pm$0.037 &  0.1212$\pm$0.044 &  0.2255$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.431$\pm$0.492 &  6.414$\pm$0.495 &  6.306$\pm$0.501 &  6.153$\pm$0.519 &  5.809$\pm$0.517 \\
\bottomrule
\end{tabular}



In [191]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "diff",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=100)

(5,
 (8.65179718507584, 0.43544228898240894),
 (0.15413319841032883, 0.04897485747013321))

In [198]:
# Diff
df = load_dataframe_csv("../tables/beer/test/rank_lamb5_top100_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0286$\pm$0.032 &  0.0577$\pm$0.037 &  0.0781$\pm$0.039 &  0.1372$\pm$0.047 &  0.2291$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  9.811$\pm$0.207 &  9.588$\pm$0.273 &  9.378$\pm$0.316 &  8.874$\pm$0.393 &  8.012$\pm$0.466 \\
\bottomrule
\end{tabular}



In [192]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "pop",
                                   topk = 20, 
                                   return_all = False,
                                   top_affected=100)

(5,
 (6.217817029089037, 0.5366865635289347),
 (0.13369761123246848, 0.04702990538792136))

In [175]:
# Pop
df = load_dataframe_csv("../tables/beer/test/rank_lamb5_top100_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0181$\pm$0.023 &  0.0315$\pm$0.024 &  0.0687$\pm$0.037 &  0.1174$\pm$0.044 &  0.2261$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.436$\pm$0.492 &  6.408$\pm$0.494 &  6.294$\pm$0.507 &  6.171$\pm$0.515 &  5.791$\pm$0.516 \\
\bottomrule
\end{tabular}



# New user keyphrase critiquing 

In [None]:
# Change to max(keyphrase_freq[user][q], keyphrase_freq[user].mean())

In [200]:
# Diff
df = load_dataframe_csv("../tables/beer/test/rank_lamb10_top20_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &              5  &                10 &                20 &                50 \\
\midrule
0 &  0.0286$\pm$0.032 &  0.06$\pm$0.037 &  0.0773$\pm$0.039 &  0.1355$\pm$0.047 &  0.2306$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &              5  &               10 &               20 &              50 \\
\midrule
0 &  9.811$\pm$0.207 &  9.568$\pm$0.28 &  9.373$\pm$0.327 &  8.882$\pm$0.399 &  7.986$\pm$0.47 \\
\bottomrule
\end{tabular}



In [202]:
# Diff
# Max(user keyphrase freq, 1)
df = load_dataframe_csv("../tables/beer/debug/rank_lamb10_top20_diff_newkeyphraserepresentation.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &              5  &                10 &                20 &                50 \\
\midrule
0 &  0.0286$\pm$0.032 &  0.06$\pm$0.037 &  0.0773$\pm$0.039 &  0.1355$\pm$0.047 &  0.2306$\pm$0.053 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &              5  &               10 &               20 &              50 \\
\midrule
0 &  9.811$\pm$0.207 &  9.568$\pm$0.28 &  9.375$\pm$0.327 &  8.882$\pm$0.399 &  7.986$\pm$0.47 \\
\bottomrule
\end{tabular}



In [203]:
# Diff
# Max(user keyphrase freq, mean freq)
df = load_dataframe_csv("../tables/beer/debug/rank_lamb10_top20_diff_newkeyphraserepresentation.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &              20 &                50 \\
\midrule
0 &  0.0305$\pm$0.032 &  0.0608$\pm$0.037 &  0.0773$\pm$0.039 &  0.14$\pm$0.047 &  0.2357$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  9.806$\pm$0.207 &  9.563$\pm$0.281 &  9.375$\pm$0.327 &  8.846$\pm$0.399 &  7.966$\pm$0.474 \\
\bottomrule
\end{tabular}



In [212]:
# Diff
# Max(user keyphrase freq, mean freq)
# LPRank#2
df = load_dataframe_csv("../tables/beer/debug/rank_lamb10_top20_diff_rank2.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &               20 &                50 \\
\midrule
0 &  0.0291$\pm$0.021 &  0.0557$\pm$0.027 &  0.0923$\pm$0.036 &  0.1718$\pm$0.05 &  0.2835$\pm$0.057 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  9.832$\pm$0.136 &  9.585$\pm$0.213 &  9.404$\pm$0.239 &  8.764$\pm$0.384 &  8.072$\pm$0.434 \\
\bottomrule
\end{tabular}



In [206]:
# diff
# Max(user keyphrase freq, mean freq)
df = load_dataframe_csv("../tables/beer/debug/lprating_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &               10 &                20 &               50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0396$\pm$0.023 &  0.065$\pm$0.033 &  0.1108$\pm$0.041 &  0.1967$\pm$0.05 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &              5  &               10 &               20 &               50 \\
\midrule
0 &  9.897$\pm$0.116 &  9.648$\pm$0.21 &  9.416$\pm$0.293 &  9.014$\pm$0.366 &  8.242$\pm$0.449 \\
\bottomrule
\end{tabular}



In [204]:
# diff
df = load_dataframe_csv("../tables/beer/reproduce/lprating_diff.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &               50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0396$\pm$0.023 &  0.0634$\pm$0.033 &  0.1073$\pm$0.041 &  0.1967$\pm$0.05 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &              5  &               10 &               20 &               50 \\
\midrule
0 &  9.897$\pm$0.116 &  9.648$\pm$0.21 &  9.431$\pm$0.293 &  9.041$\pm$0.365 &  8.237$\pm$0.451 \\
\bottomrule
\end{tabular}



In [207]:
# Pop
df = load_dataframe_csv("../tables/beer/test/rank_lamb30_top20_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0181$\pm$0.023 &  0.0287$\pm$0.024 &  0.0687$\pm$0.037 &  0.1194$\pm$0.044 &  0.2252$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &               10 &               20 &               50 \\
\midrule
0 &  6.436$\pm$0.492 &  6.408$\pm$0.494 &  6.293$\pm$0.507 &  6.163$\pm$0.516 &  5.795$\pm$0.516 \\
\bottomrule
\end{tabular}



In [208]:
# Pop
# Max(user keyphrase freq, mean freq)
df = load_dataframe_csv("../tables/beer/debug/rank_lamb30_top20_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0177$\pm$0.023 &  0.0589$\pm$0.035 &  0.0707$\pm$0.036 &  0.1375$\pm$0.046 &  0.2375$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &               1  &               5  &              10 &               20 &               50 \\
\midrule
0 &  6.449$\pm$0.489 &  6.362$\pm$0.502 &  6.27$\pm$0.506 &  6.134$\pm$0.507 &  5.758$\pm$0.514 \\
\bottomrule
\end{tabular}



In [209]:
# Pop
df = load_dataframe_csv("../tables/beer/reproduce/lprating.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &               10 &               20 &                50 \\
\midrule
0 &  0.0114$\pm$0.013 &  0.0416$\pm$0.023 &  0.063$\pm$0.032 &  0.112$\pm$0.042 &  0.2171$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &              5  &               10 &             20 &               50 \\
\midrule
0 &  6.446$\pm$0.49 &  6.359$\pm$0.49 &  6.308$\pm$0.498 &  6.16$\pm$0.52 &  5.795$\pm$0.526 \\
\bottomrule
\end{tabular}



In [211]:
# Pop
# Max(user keyphrase freq, mean freq)
df = load_dataframe_csv("../tables/beer/debug/lprating_pop.csv","")
latex_table_row(df, table_name = "success_rate", value_precision = 4, uncertainty_precision = 3)
latex_table_row(df, table_name = "avg_length", value_precision = 3, uncertainty_precision = 3)

\begin{tabular}{llllll}
\toprule
{} &                1  &                5  &                10 &                20 &                50 \\
\midrule
0 &  0.0126$\pm$0.013 &  0.0441$\pm$0.024 &  0.0714$\pm$0.033 &  0.1172$\pm$0.043 &  0.2327$\pm$0.054 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
{} &              1  &              5  &               10 &               20 &               50 \\
\midrule
0 &  6.445$\pm$0.49 &  6.354$\pm$0.49 &  6.299$\pm$0.499 &  6.182$\pm$0.512 &  5.735$\pm$0.526 \\
\bottomrule
\end{tabular}

