In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [2]:
def save_dataframe_csv(df, path, name):
    df.to_csv(path+name, index=False)


def load_dataframe_csv(path, name, index_col=None):
    return pd.read_csv(path+name, index_col=index_col)

In [3]:
def get_average_length(df, n):
    df_s_f = df[(df['result'] == 'successful') | (df['result'] == 'fail')]
    iteration = df_s_f[df_s_f['target_rank']==n].groupby('user_id', as_index=False).agg({'iteration':'mean'})['iteration'].to_numpy()
    return (np.average(iteration), 1.96*np.std(iteration)/np.sqrt(len(iteration)))

def get_success_num(df, n):
    return len(df[(df['result'] == 'successful') & (df['target_rank'] == n)])

def get_fail_num(df, n):
    return len(df[(df['result'] == 'fail') & (df['target_rank'] == n)])

def get_success_rate(df, n):
    df_s_f = df[(df['result'] == 'successful') | (df['result'] == 'fail')]
    df_list_result = df_s_f[df_s_f['target_rank']==n].groupby('user_id', as_index=False)['result'].apply(list).reset_index(name='result')
    successful_rate = df_list_result['result'].apply(lambda r: r.count("successful")/len(r)).to_numpy()
    return (np.average(successful_rate), 1.96*np.std(successful_rate)/np.sqrt(len(successful_rate)))


In [28]:
def get_2_metric(df, topk = 20, print_result = True):
    if print_result:
        print ('avg_length: ',get_average_length(df,topk))
        print ('suc_rate: ',get_success_rate(df,topk))
        return 
    else:
        return get_average_length(df,topk), get_success_rate(df,topk)

In [31]:
def get_best_metric_with_single_lambda(data_path, lambs, topk = 20, return_all = False):
    avg_lengths = []
    suc_rates = []
    for lamb in lambs:
        
        table_name = 'tuning_lamb_'+ str(lamb) + '.csv'
        df = load_dataframe_csv(data_path, table_name)
        avg_length, suc_rate = get_2_metric(df, topk = topk, print_result = False)
#         print ('avg_length', avg_length)
#         print ('suc_rate', suc_rate)
        avg_lengths.append(avg_length)
        suc_rates.append(suc_rate)
        
    if return_all:
        return avg_lengths, suc_rates
    else:
        suc_rates_temp = [suc_rate[0] for suc_rate in suc_rates]
        optimal_lambda_index = np.argmax(suc_rates_temp)
#         print (optimal_lambda_index)
        return lambs[optimal_lambda_index], avg_lengths[optimal_lambda_index], suc_rates[optimal_lambda_index]

In [32]:
table_path = '../tables/critiquing/multi_step_critiquing/beer/avg/avg_25user.csv'
df = load_dataframe_csv(table_path,"")     
get_2_metric(df,topk = 50)

avg_length:  (15.271784016636957, 2.284562766322932)
suc_rate:  (0.30891902215431627, 0.1422322315874033)


In [33]:
get_best_metric_with_single_lambda(data_path='../tables/critiquing/multi_step_critiquing/beer/ranksvm3/',
                                   lambs = [0.001,0.01,0.1,0.5,1,10,30,50,70,90,100,1000],
                                   topk=50,
                                   return_all=False)

(1,
 (15.140380697733638, 2.2842677996023304),
 (0.294980264833206, 0.13289815128618654))