In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import math

In [2]:
def save_dataframe_csv(df, path, name):
    df.to_csv(path+name, index=False)

def load_dataframe_csv(path, name, index_col=None):
    return pd.read_csv(path+name, index_col=index_col)

In [207]:
def get_average_length(df, n):
    df_s_f = df[(df['result'] == 'successful') | (df['result'] == 'fail')]
    iteration = df_s_f[df_s_f['target_rank']==n].groupby('user_id', as_index=False).agg({'iteration':'mean'})['iteration'].to_numpy()
    return (np.average(iteration), 1.96*np.std(iteration)/np.sqrt(len(iteration)))

def get_success_num(df, n):
    return len(df[(df['result'] == 'successful') & (df['target_rank'] == n)])

def get_fail_num(df, n):
    return len(df[(df['result'] == 'fail') & (df['target_rank'] == n)])

def get_success_rate(df, n):
    df_s_f = df[(df['result'] == 'successful') | (df['result'] == 'fail')]
    df_list_result = df_s_f[df_s_f['target_rank']==n].groupby('user_id', as_index=False)['result'].apply(list).reset_index(name='result')
    successful_rate = df_list_result['result'].apply(lambda r: r.count("successful")/len(r)).to_numpy()
    return (np.average(successful_rate), 1.96*np.std(successful_rate)/np.sqrt(len(successful_rate)))



In [216]:
def get_2_metric(df, topk = 1, print_result = True):
    if topk != 1:
        df = change_target_rank(df,topk=topk)
    if print_result:
        print ('avg_length: ',get_average_length(df,topk))
        print ('suc_rate: ',get_success_rate(df,topk))
        return 
    else:
        return get_average_length(df,topk), get_success_rate(df,topk)

In [160]:
def get_best_metric_with_single_lambda(data_path, lambs, dataset_name = "yelp", keyphrase_selection_method = "random",topk = 20, return_all = False):
    avg_lengths = []
    suc_rates = []
    for lamb in lambs:
        table_name = '../tables/tuning_lambda/'+dataset_name+'/tuning_'+dataset_name+'_at_lamb_'+ str(lamb)+'_with_'+ keyphrase_selection_method+'.csv'
        df = load_dataframe_csv(data_path, table_name)
        avg_length, suc_rate = get_2_metric(df, topk = topk, print_result = False)

        avg_lengths.append(avg_length)
        suc_rates.append(suc_rate)
        
    if return_all:
        return avg_lengths, suc_rates
    else:
        suc_rates_temp = [suc_rate[0] for suc_rate in suc_rates]
        optimal_lambda_index = np.argmax(suc_rates_temp)
#         print (optimal_lambda_index)
        return lambs[optimal_lambda_index], avg_lengths[optimal_lambda_index], suc_rates[optimal_lambda_index]

In [231]:
def get_drop_index(df, topk = 20):
    drop_index = []
    iter_flag = 0 

    for i in range(len(df)):
        if df["iteration"][i] == 0:
            iter_flag = 0
        if df['item_rank'][i] < topk and iter_flag == 0:
            iter_flag = 1
        elif iter_flag == 1:
            drop_index.append(i)
    return drop_index        
def change_target_rank(df,topk = 20):
    modified_df = df.drop(get_drop_index(df,topk = topk))
    modified_df = modified_df.reset_index(drop=True)
    modified_df['target_rank'] = topk
    for i in range(len(modified_df)):
        if modified_df['item_rank'][i] < modified_df['target_rank'][i]:
            modified_df.at[i,"result"] = "successful"
    return modified_df

In [135]:
dataset_name = "beer"
lamb = 1
keyphrase_selection_method = "diff"
table_path = '../tables/tuning_lambda/'+dataset_name+'/tuning_'+dataset_name+'_at_lamb_'+ str(lamb)+'_with_'+ keyphrase_selection_method+'.csv'
df = load_dataframe_csv(table_path,"")     
# get_2_metric(df,topk = 20)

In [146]:
table_path = "../tables/reproducing/beer/lp1simplified_top100_sample_25users.csv"
df = load_dataframe_csv(table_path,"")    

In [187]:
table_path = "../tables/reproducing/beer/lp1simplified_top100_50users_random.csv"
df = load_dataframe_csv(table_path,"")    

In [151]:
# new rating random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)
print ('top20 result:')
df_20 = change_target_rank(df,topk=20)
get_2_metric(df_20, topk = 20, print_result= True)

top1 result:
avg_length:  (19.74962962962963, 0.29107414958393807)
suc_rate:  (0.022623456790123456, 0.020989635923557414)
top20 result:
avg_length:  (17.746574074074076, 1.1476013615327938)
suc_rate:  (0.1270356803690137, 0.0616629059833558)


In [167]:
# average random
table_path = "../tables/reproducing/beer/avg_top100_sample_25users.csv"
df = load_dataframe_csv(table_path,"")  

In [168]:
 print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)
print ('top20 result:')
get_2_metric(df_20, topk = 20, print_result= True)

top1 result:
avg_length:  (19.958333333333332, 0.07994717584709247)
suc_rate:  (0.005208333333333333, 0.009993396980886561)
top20 result:
avg_length:  (17.770470787382553, 1.0594595506005924)
suc_rate:  (0.17312225719088464, 0.07581358661361602)


In [169]:
# RankSVM3 random
dataset_name = "beer"
lamb = 0.01
keyphrase_selection_method = "random"
table_path = '../tables/tuning_lambda/'+dataset_name+'/tuning_'+dataset_name+'_at_lamb_'+ str(lamb)+'_with_'+ keyphrase_selection_method+'.csv'
df = load_dataframe_csv(table_path,"")     

In [170]:
 print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)
print ('top20 result:')
get_2_metric(df_20, topk = 20, print_result= True)

top1 result:
avg_length:  (19.92509259259259, 0.09356016847696566)
suc_rate:  (0.00558641975308642, 0.006657888556071205)
top20 result:
avg_length:  (17.770470787382553, 1.0594595506005924)
suc_rate:  (0.17312225719088464, 0.07581358661361602)


In [137]:
get_2_metric(df, topk = 1, print_result= True)

avg_length:  (19.308145135547093, 0.510198578509671)
suc_rate:  (0.04709948366811111, 0.033635644108119644)


In [140]:
df_20 = change_target_rank(df,topk=20)
get_2_metric(df_20, topk = 20, print_result= True)

avg_length:  (16.003874304707637, 1.4707664579856916)
suc_rate:  (0.22928394408786568, 0.08431886915092991)


In [141]:
dataset_name = "beer"
lamb = 1
keyphrase_selection_method = "random"
table_path = '../tables/tuning_lambda/'+dataset_name+'/tuning_'+dataset_name+'_at_lamb_'+ str(lamb)+'_with_'+ keyphrase_selection_method+'.csv'
df = load_dataframe_csv(table_path,"")

In [142]:
get_2_metric(df, topk = 1, print_result= True)

avg_length:  (19.870587437254105, 0.14267671270303392)
suc_rate:  (0.018618572785239453, 0.02253631479002173)


In [143]:
df_20 = change_target_rank(df,topk=20)
get_2_metric(df_20, topk = 20, print_result= True)

avg_length:  (18.15803243053243, 1.0057396691448985)
suc_rate:  (0.10936757686757687, 0.05489371263025239)


# Yelp

## Random

### Avg

In [236]:
# average random
table_path = "../tables/reproducing/yelp/average_50_users_random.csv"
df = load_dataframe_csv(table_path,"")   

In [237]:
# avg random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.03559877104511, 0.3572120449980587)
suc_rate:  (0.015840222299200627, 0.009591226262474677)
top5 result:
avg_length:  (18.60254403037814, 0.37723308447177045)
suc_rate:  (0.0551611808861551, 0.01883349549045509)
top10 result:
avg_length:  (18.15496984411071, 0.465279928173111)
suc_rate:  (0.0845436633942826, 0.024319431068433355)
top20 result:
avg_length:  (17.453566172962457, 0.5241739324116091)
suc_rate:  (0.13157701856489268, 0.028787120650399345)
top50 result:
avg_length:  (16.14465360276119, 0.6586009718201147)
suc_rate:  (0.21532475636964804, 0.03851832752271652)


### Rating 

In [234]:
table_path = "../tables/reproducing/yelp/lp1simplified_top100_50users_random.csv"
df = load_dataframe_csv(table_path,"")   

In [235]:
# new rating random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.028766511600622, 0.3384511962364016)
suc_rate:  (0.011059850643183978, 0.008876222642763183)
top5 result:
avg_length:  (18.880373392374167, 0.3470727930073836)
suc_rate:  (0.022017180350513685, 0.012945332297826436)
top10 result:
avg_length:  (18.624498653078376, 0.3991413461197096)
suc_rate:  (0.03837325118026873, 0.019772108658030165)
top20 result:
avg_length:  (18.13328310793094, 0.5137145355516685)
suc_rate:  (0.06651036776165775, 0.023884815401298304)
top50 result:
avg_length:  (17.177852856780877, 0.6105745073551999)
suc_rate:  (0.12241955522786277, 0.034405594316651876)


### RankSVM3

In [208]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "yelp", 
                                   keyphrase_selection_method = "random",
                                   topk = 20, 
                                   return_all = False)

(100000,
 (17.389133431354796, 0.499215381635217),
 (0.16223344098473091, 0.04137193257843606))

In [227]:
table_path = "../tables/tuning_lambda/yelp/tuning_yelp_at_lamb_30_with_random.csv"
df = load_dataframe_csv(table_path,"")   

In [233]:
# ranksvm3 random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.034609094943203, 0.33549197179709683)
suc_rate:  (0.022188999272332605, 0.012952897978773284)
top5 result:
avg_length:  (18.52658623330192, 0.38175761171278694)
suc_rate:  (0.06590186454221543, 0.0256616527244365)
top10 result:
avg_length:  (18.103375154257506, 0.44793462922060046)
suc_rate:  (0.090069070792755, 0.031621431885388136)
top20 result:
avg_length:  (17.36721675455035, 0.5360957700454426)
suc_rate:  (0.14679590824456665, 0.035741131818330704)
top50 result:
avg_length:  (15.821626805474844, 0.7154176801839158)
suc_rate:  (0.25878101731429903, 0.052692963315011165)


## Diff

### Avg

In [247]:
# average random
table_path = "../tables/reproducing/yelp/average_50_users_diff.csv"
df = load_dataframe_csv(table_path,"")   

# avg diff 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (18.812405262781745, 0.3302589191939282)
suc_rate:  (0.10025091722451729, 0.028769890927680498)
top5 result:
avg_length:  (17.469156477253545, 0.5618742525336571)
suc_rate:  (0.1725893382055046, 0.04168937445645123)
top10 result:
avg_length:  (16.641559414060755, 0.6625164541548163)
suc_rate:  (0.22978683575626296, 0.04578745671961498)
top20 result:
avg_length:  (15.549882405055126, 0.7904222340790882)
suc_rate:  (0.29514030155729915, 0.04986112295320664)
top50 result:
avg_length:  (13.676527731851461, 0.9619422832228972)
suc_rate:  (0.39613395524100187, 0.05741273752075427)


### Rating

In [261]:
table_path = "../tables/reproducing/yelp/lp1simplified_top100_50users_diff.csv"
df = load_dataframe_csv(table_path,"")  

# rating diff 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.69089649072865, 0.18087670240336048)
suc_rate:  (0.023072894065372863, 0.013403120863550031)
top5 result:
avg_length:  (19.326414132720082, 0.3118213441873929)
suc_rate:  (0.04504713057765285, 0.020165883247737514)
top10 result:
avg_length:  (18.869447096133477, 0.4238667770155522)
suc_rate:  (0.08097138144755589, 0.02678793192479579)
top20 result:
avg_length:  (18.145935761808524, 0.5495414647468356)
suc_rate:  (0.11822160990622423, 0.034817509020406645)
top50 result:
avg_length:  (17.053157619589847, 0.687032093938541)
suc_rate:  (0.1754022981737008, 0.03968158044397756)


### RankSVM3

# Beer

## Random

### Avg

In [259]:
# average random
table_path = "../tables/reproducing/beer/avg_50users_random.csv"
df = load_dataframe_csv(table_path,"")   

# avg random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.757354497354495, 0.2779927766784297)
suc_rate:  (0.019944755680049796, 0.02258021880252023)
top5 result:
avg_length:  (19.5425, 0.3305248261097751)
suc_rate:  (0.04490948231144309, 0.032302378695209445)
top10 result:
avg_length:  (19.12864197530864, 0.49555452117671855)
suc_rate:  (0.0627489884842826, 0.03415633080480409)
top20 result:
avg_length:  (18.462072491778372, 0.5792931262511614)
suc_rate:  (0.12167948065006888, 0.04664426913400822)
top50 result:
avg_length:  (17.54504050234442, 0.7749913409029133)
suc_rate:  (0.17959713670497984, 0.059711756932172046)


### Rating

In [238]:
table_path = "../tables/reproducing/beer/lp1simplified_top100_50users_random.csv"
df = load_dataframe_csv(table_path,"")  

# rating random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.74962962962963, 0.29107414958393807)
suc_rate:  (0.022623456790123456, 0.020989635923557414)
top5 result:
avg_length:  (18.816529982363313, 0.9036814563363051)
suc_rate:  (0.06635361552028218, 0.05151346941071546)
top10 result:
avg_length:  (18.414528218694883, 1.0432722209167709)
suc_rate:  (0.08388447971781306, 0.054973102609672884)
top20 result:
avg_length:  (17.746574074074076, 1.1476013615327938)
suc_rate:  (0.1270356803690137, 0.0616629059833558)
top50 result:
avg_length:  (16.975411968206085, 1.1800766837586478)
suc_rate:  (0.1723705180077729, 0.0629333496347026)


### RankSVM3

In [242]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "random",
                                   topk = 20, 
                                   return_all = False)

(0.01,
 (17.770470787382553, 1.0594595506005924),
 (0.17312225719088464, 0.07581358661361602))

In [240]:
table_path = "../tables/tuning_lambda/beer/tuning_beer_at_lamb_0.01_with_random.csv"
df = load_dataframe_csv(table_path,"")

# ranksvm3 random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.92509259259259, 0.09356016847696566)
suc_rate:  (0.00558641975308642, 0.006657888556071205)
top5 result:
avg_length:  (19.250091451758117, 0.7464982051921484)
suc_rate:  (0.06044437044437044, 0.04870071177690609)
top10 result:
avg_length:  (18.499294686794688, 1.007070361355241)
suc_rate:  (0.10716259049592382, 0.06418012749394501)
top20 result:
avg_length:  (17.770470787382553, 1.0594595506005924)
suc_rate:  (0.17312225719088464, 0.07581358661361602)
top50 result:
avg_length:  (15.926921571529418, 1.3104969760739227)
suc_rate:  (0.27958704512626076, 0.08501716689840598)


## Diff

### Avg

In [260]:
# average diff
table_path = "../tables/reproducing/beer/avg_50users_diff.csv"
df = load_dataframe_csv(table_path,"")   

# avg random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (18.981453330619996, 0.7190037785221393)
suc_rate:  (0.07821292904626237, 0.053722202011929204)
top5 result:
avg_length:  (17.312430926009355, 1.3441390819310814)
suc_rate:  (0.17424553478475044, 0.0864047434366746)
top10 result:
avg_length:  (16.426035957506546, 1.5095761942759542)
suc_rate:  (0.22156556043810943, 0.08479871488892018)
top20 result:
avg_length:  (15.37963706953903, 1.6271043925616488)
suc_rate:  (0.3014591853317343, 0.09681698783849414)
top50 result:
avg_length:  (13.23296066315674, 1.626929072483989)
suc_rate:  (0.4003343497461144, 0.09134749284494897)


### Rating

In [262]:
table_path = "../tables/reproducing/beer/lp1simplified_top100_50users_diff.csv"
df = load_dataframe_csv(table_path,"")  

# rating diff 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.784197530864194, 0.2891664582151433)
suc_rate:  (0.011358024691358024, 0.01521928727448122)
top5 result:
avg_length:  (18.845101410934745, 0.6689630742150717)
suc_rate:  (0.08013227513227514, 0.05432908711783081)
top10 result:
avg_length:  (18.30531746031746, 1.0395149608590142)
suc_rate:  (0.09081128747795415, 0.0548829961707191)
top20 result:
avg_length:  (17.59619488536155, 1.1588406332651495)
suc_rate:  (0.1290299823633157, 0.0614884697126393)
top50 result:
avg_length:  (16.925220717916797, 1.1975878514748912)
suc_rate:  (0.16381600788463535, 0.06354448635932486)


### RankSVM3

In [248]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "beer", 
                                   keyphrase_selection_method = "diff",
                                   topk = 20, 
                                   return_all = False)

(0.0001,
 (15.759583361446104, 1.5417415098045595),
 (0.23833228137149703, 0.08512265198286892))

In [246]:
table_path = "../tables/tuning_lambda/beer/tuning_beer_at_lamb_0.0001_with_diff.csv"
df = load_dataframe_csv(table_path,"")

# ranksvm3 diff 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.00496204412871, 0.7102924877807976)
suc_rate:  (0.08116280596672754, 0.06397039354138734)
top5 result:
avg_length:  (17.597803461970127, 1.1630273528838047)
suc_rate:  (0.15395452949374516, 0.07644532943557877)
top10 result:
avg_length:  (16.913637504470838, 1.3778430179926433)
suc_rate:  (0.1794888408613899, 0.07794923950722438)
top20 result:
avg_length:  (15.759583361446104, 1.5417415098045595)
suc_rate:  (0.23833228137149703, 0.08512265198286892)
top50 result:
avg_length:  (13.755153258143455, 1.6641026938409798)
suc_rate:  (0.35802075012859325, 0.09329036252246106)


# CD 

## Random

### Average

In [263]:
# average random
table_path = "../tables/reproducing/cd/avg_50users_random.csv"
df = load_dataframe_csv(table_path,"")   

# avg random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (15.330252525252524, 0.9154090905344333)
suc_rate:  (0.02, 0.02715855666268)
top5 result:
avg_length:  (14.565560217560218, 1.0099158700747741)
suc_rate:  (0.06098290598290598, 0.04143737957985024)
top10 result:
avg_length:  (13.179090132090131, 1.0861944539214539)
suc_rate:  (0.1436153846153846, 0.05652049994804877)
top20 result:
avg_length:  (11.815252525252527, 1.2129030520469177)
suc_rate:  (0.2342820512820513, 0.076645945997584)
top50 result:
avg_length:  (9.189067599067599, 1.2666736042577258)
suc_rate:  (0.43297824397824397, 0.09498761451865143)


### Rating

In [265]:
table_path = "../tables/reproducing/cd/lp1simplified_top100_random.csv"
df = load_dataframe_csv(table_path,"")  

# rating random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (14.728030303030302, 0.9805020354671313)
suc_rate:  (0.04488888888888889, 0.032916648249033766)
top5 result:
avg_length:  (13.20812432012432, 1.124248430608588)
suc_rate:  (0.15496581196581197, 0.06096709454739042)
top10 result:
avg_length:  (12.257836829836831, 1.2004344612045517)
suc_rate:  (0.19645687645687646, 0.06751396921668301)
top20 result:
avg_length:  (11.25860606060606, 1.1764097751062257)
suc_rate:  (0.26455089355089356, 0.07639058068222888)
top50 result:
avg_length:  (9.500571872571873, 1.263131624214617)
suc_rate:  (0.379055167055167, 0.0906789451734067)


### RankSVM3

In [249]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "cd", 
                                   keyphrase_selection_method = "random",
                                   topk = 20, 
                                   return_all = False)

(1,
 (11.445679875679877, 1.1777676820860519),
 (0.25874358974358974, 0.0814482211780933))

In [251]:
table_path = "../tables/tuning_lambda/cd/tuning_cd_at_lamb_1_with_random.csv"
df = load_dataframe_csv(table_path,"")

# ranksvm3 random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (15.51269696969697, 0.8872857281923647)
suc_rate:  (0.00044444444444444447, 0.0008623560033670605)
top5 result:
avg_length:  (14.507654234654234, 1.0675023525914091)
suc_rate:  (0.06887179487179487, 0.04190257852605433)
top10 result:
avg_length:  (12.904295260295262, 1.0185065526439159)
suc_rate:  (0.17385470085470087, 0.06176702370735052)
top20 result:
avg_length:  (11.445679875679877, 1.1777676820860519)
suc_rate:  (0.25874358974358974, 0.0814482211780933)
top50 result:
avg_length:  (9.476691530691532, 1.095483144627841)
suc_rate:  (0.4123628593628594, 0.07778894528541312)


## Diff

### Average

In [266]:
# average random
table_path = "../tables/reproducing/cd/avg_50users_diff.csv"
df = load_dataframe_csv(table_path,"")   

# avg random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.80688888888889, 0.36858341191401006)
suc_rate:  (0.010444444444444445, 0.019404574377332576)
top5 result:
avg_length:  (18.88533333333333, 0.7358294917565255)
suc_rate:  (0.0782222222222222, 0.0461964521495956)
top10 result:
avg_length:  (17.133923076923075, 1.1366983255576752)
suc_rate:  (0.1738717948717949, 0.06721606111703819)
top20 result:
avg_length:  (15.454273504273504, 1.4980372058706233)
suc_rate:  (0.24950427350427348, 0.08179478911531693)
top50 result:
avg_length:  (11.965618492618495, 1.6157171906561767)
suc_rate:  (0.44497824397824404, 0.08505796618004202)


### Rating

In [267]:
table_path = "../tables/reproducing/cd/lp1simplified_top100_diff.csv"
df = load_dataframe_csv(table_path,"")  

# rating random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.108, 0.626048286455783)
suc_rate:  (0.05244444444444443, 0.03473969923038027)
top5 result:
avg_length:  (17.29931623931624, 1.0772688452562702)
suc_rate:  (0.15252136752136752, 0.05982914064609926)
top10 result:
avg_length:  (16.317216783216782, 1.2821504540765372)
suc_rate:  (0.20125174825174824, 0.06886181784401578)
top20 result:
avg_length:  (15.20365268065268, 1.3950310563510468)
suc_rate:  (0.2574568764568765, 0.07410571071925763)
top50 result:
avg_length:  (12.858071484071486, 1.5746680337512364)
suc_rate:  (0.39329448329448324, 0.08459444984284296)


### RankSVM3

In [255]:
get_best_metric_with_single_lambda(data_path = "", 
                                   lambs = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000],
                                   dataset_name = "cd", 
                                   keyphrase_selection_method = "diff",
                                   topk = 20, 
                                   return_all = False)

(0.5,
 (15.701196581196582, 1.3644102903578286),
 (0.24994871794871792, 0.07898130125943605))

In [256]:
table_path = "../tables/tuning_lambda/cd/tuning_cd_at_lamb_0.5_with_diff.csv"
df = load_dataframe_csv(table_path,"")

# ranksvm3 random 
print ("top1 result:")
get_2_metric(df, topk = 1, print_result= True)

print ("top5 result:")
get_2_metric(df, topk = 5, print_result= True)

print ("top10 result:")
get_2_metric(df, topk = 10, print_result= True)

print ('top20 result:')
get_2_metric(df, topk = 20, print_result= True)

print ('top50 result:')
get_2_metric(df, topk = 50, print_result= True)

top1 result:
avg_length:  (19.80377777777778, 0.36860848708114)
suc_rate:  (0.010888888888888887, 0.019444421546653188)
top5 result:
avg_length:  (18.973444444444443, 0.6850892426333598)
suc_rate:  (0.06744444444444443, 0.044688520031628744)
top10 result:
avg_length:  (17.21054700854701, 1.112399545918299)
suc_rate:  (0.1645384615384615, 0.06600664793263698)
top20 result:
avg_length:  (15.701196581196582, 1.3644102903578286)
suc_rate:  (0.24994871794871792, 0.07898130125943605)
top50 result:
avg_length:  (12.702292152292152, 1.5786776267931855)
suc_rate:  (0.40538228438228435, 0.0864953934289889)
