In [None]:
import glob
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import rbo


colors = {1 : '#8f2011', 2: '#2B4C85', 3: '#91CDB7', 4: '#E7D995', 5: '#D68A91', 
          6: '#E89762', 7: '#50AFC7', 8: '#CDB29E', 9: '#C7CBCC', 10: '#A6C5D6',
          11: '#6B7B8E', 12: '#957DAD'}

In [None]:
def start_evaluation():
    
    if (category == "Home_and_Kitchen" and nr_rev == 4130) or (category == "Video_Games" and nr_rev == 2593):
        print(f'-----No data available for {category} {nr_rev}-----',file=f)
        clust_cat.append(np.nan)
        run_cat.append(np.nan)
        len_ranking_cat.append(np.nan)
        perc_ranking_cat.append(np.nan)
        perc_step3_cat.append(np.nan)
        for r in range(1,49):
            f1_r_cat.append(np.nan)
            jacc_r_cat.append(np.nan)
            rbo_r_cat.append(np.nan)
        
        rbo_nr_rev_cat.append(np.nan)
        jacc_nr_rev_cat.append(np.nan)
        f1_nr_rev_cat.append(np.nan)
              
        for p in range(1,11):
            rbo_p_cat.append(np.nan)
            
    else:
        results = pd.read_csv(f'{path}/results.txt', sep='\n', header=None)

        step3_len = evaluate_clustering_and_time(results)

        evaluate_ranking(step3_len)

In [None]:
def evaluate_clustering_and_time(results):
    cat = [cat[2] for cat in results[results[0].str.contains("Category:")][0].str.split(" ")][0]
    asin = [asin[2] for asin in results[results[0].str.contains("Found asin_id:")][0].str.split(" ")][0]
    size = [size[4] for size in results[results[0].str.contains("Chosen product:")][0].str.split(" ")][0]

    time = [time[1] for time in results[results[0].str.contains("seconds")][0].str.split(" ")][0]
    clustered = [clus[-1] for clus in results[results[0].str.contains("mean:")][0].str.split(" ")][0]

    clust_cat.append(100-float(clustered))
    run_cat.append(float(time))
    
    step3_len = len(results[results[0].str.contains("\t\tstep 3")])

    print(f'-----{cat} {asin} {size}-----', file=f)
    
    return step3_len


In [None]:
def evaluate_ranking(step3_len):
    all_files = sorted(glob.glob(f'{path}/*.csv'))

    ranking = pd.read_csv(all_files[0], sep=';').drop_duplicates('review')
    ground_truth = pd.read_csv(all_files[1], sep=';', index_col=0)

    ranking['Model ranking'] = ranking['Model ranking'] + 1

    print(f"\nLength of final ranking:\n{len(ranking)}\n", file=f)

    len_ranking_cat.append(len(ranking))
    perc_ranking_cat.append(100*len(ranking)/nr_rev)
    if step3_len > 0:
        perc_step3_cat.append(100*len(ranking)/step3_len)
    else:
        perc_step3_cat.append(0)

    f1_score(ranking, ground_truth)
    
    jaccard_similarity(ranking)
    
    rbo_score(ranking)


In [None]:
def f1_score(ranking, ground_truth):
    
    F1_score = np.nan
    
    for r in range(1,len(ranking)+1):
        TP = 0
        TN = 0
        FP = 0
        FN = 0

        for elem in ranking['ranking'][:r]:
            if elem in range(1,len(ranking[:r])+1):
                TP +=1
            else:
                FP +=1

        for elem in ground_truth[~ground_truth.ranking.isin(list(ranking['ranking'][:r]))]['ranking']:
            if elem in range(1,len(ranking[:r])+1):
                FN +=1
            else:
                TN +=1

        print(f"For r={r}\n\tTP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}", file=f)

        accuracy = (TP+TN)/(TP+TN+FP+FN)
        precision = TP/(TP+FP)
        recall = TP/(TP+FN)

        if precision+recall > 0:
            F1_score = 2*(precision*recall)/(precision+recall)

        print(f"\tacc: {accuracy}, pre: {precision}, rec: {recall}, F1: {F1_score}\n", file=f)
        f1_r_cat.append(F1_score)
    
        
    f1_nr_rev_cat.append(F1_score)


In [None]:
def jaccard_similarity(ranking):
    
    similarity = 0
    
    for r in range(1,len(ranking)+1):
        A = set(ranking['ranking'][:r])
        B = set(ranking['Model ranking'][:r])

        #Find intersection of two sets
        nominator = A.intersection(B)

        #Find union of two sets
        denominator = A.union(B)

        #Take the ratio of sizes
        if len(denominator) > 0:
            similarity = len(nominator)/len(denominator)

        jacc_r_cat.append(similarity)
        
    jacc_nr_rev_cat.append(similarity)
    

In [None]:
def rbo_score(ranking):
    
    # rbo with fluctuating p
    for p in range(1,11):
        rbo_p_cat.append(rbo.RankingSimilarity(list(ranking['Model ranking']), list(ranking['ranking'])).rbo(p=p/10))
    
    rbo_score = np.nan
    
    # rbo with fluctuating r, p=1 (no weight)
    for r in range(1,len(ranking)+1):
        rbo_score = rbo.RankingSimilarity(list(ranking['Model ranking']), list(ranking['ranking'])).rbo()
        rbo_r_cat.append(rbo_score)
        
    rbo_nr_rev_cat.append(rbo_score)

In [None]:
def plot_clust_run(clust, run, to_plot):
    
    if to_plot == "category":
    
        # individual categories
        y1 = clust.mean(axis=0)
        y2 = run.mean(axis=0)
        fig, ax1 = plt.subplots(figsize=(15, 8))

        ax2 = ax1.twinx()
        line1, = ax1.plot(cols, y1, color=colors[1], marker='o', label="%cluster")
        line2, = ax2.plot(cols, y2, color=colors[2], marker='x', label="run time")

        ax1.set_xlabel('Number of reviews', fontsize=15)
        ax1.set_ylabel('% of clustered reviews', color=colors[1], fontsize=15)
        ax2.set_ylabel('Runtime in seconds', color=colors[2], fontsize=15)
        ax1.set_xticks(cols)
        ax2.set_xticks(cols)
        ax1.legend(handles=[line1, line2], loc=2)
        ax2.set_title(f"{question}: Cluster percentage & run time for {category}", fontsize=20)
        if save:
            plt.savefig(f'Evaluation/{question}/individual_clust_run_{category}.pdf')
    
    elif to_plot == "all":
        # clust + run average
        y1 = clust.mean(axis=0)
        y2 = run.mean(axis=0)
        fig, ax1 = plt.subplots(figsize=(15, 8))

        ax2 = ax1.twinx()
        line1, = ax1.plot(cols, y1, color=colors[1], marker='o', label="%cluster")
        line2, = ax2.plot(cols, y2, color=colors[2], marker='x', label="run time")

        ax1.set_xlabel('Number of reviews', fontsize=15)
        ax1.set_ylabel('% of clustered reviews', color=colors[1], fontsize=15)
        ax2.set_ylabel('Runtime in seconds', color=colors[2], fontsize=15)
        ax1.set_xticks(cols)
        ax2.set_xticks(cols)
        ax1.legend(handles=[line1, line2], loc=2)
        ax2.set_title(f"{question}: Average cluster percentage & run time", fontsize=20)
        if save:
            plt.savefig(f'Evaluation/{question}/clust_run_all.pdf')
        
        # clust + run stdev
        y1 = clust.mean(axis=0)
        y2 = run.mean(axis=0)
        fig, ax1 = plt.subplots(figsize=(15, 8))

        ax2 = ax1.twinx()
        line1, = ax1.plot(cols, y1, color=colors[1], marker='o', label="%cluster")
        line2, = ax2.plot(cols, y2, color=colors[2], marker='x', label="run time")
        
        ax1.fill_between(cols, y1+clust.std(axis=0), y1-clust.std(axis=0), color=colors[8], alpha=0.3)
        ax2.fill_between(cols, y2+run.std(axis=0), y2-run.std(axis=0), color=colors[3], alpha=0.3)

        ax1.set_xlabel('Number of reviews', fontsize=15)
        ax1.set_ylabel('% of clustered reviews', color=colors[1], fontsize=15)
        ax2.set_ylabel('Runtime in seconds', color=colors[2], fontsize=15)
        ax1.set_xticks(cols)
        ax2.set_xticks(cols)
        ax1.legend(handles=[line1, line2], loc=2)
        ax2.set_title(f"{question}: Average cluster percentage & run time with their standard deviations", fontsize=20) #, {test}')
        if save:
            plt.savefig(f'Evaluation/{question}/clust_run_all_stdev.pdf')

        # clust + category lines
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('% of clustered reviews', fontsize=15)
        plt.title(f"{question}: Cluster percentages", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, clust.mean(axis=0), color=colors[1], marker='o', label="avg")
        for i in range(len(clust)):
            plt.plot(cols, list(clust.loc[i,:]), linestyle='--', color=colors[i+2], label=categories[i])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/clust_categories_all.pdf')

        # run + category lines
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('Runtime in seconds', fontsize=15)
        plt.title(f"{question}: Runtimes", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, run.mean(axis=0), color=colors[2], marker='x', label="avg")
        for i in range(len(run)):
            plt.plot(cols, list(run.loc[i,:]), linestyle='--', color=colors[i+2], label=categories[i])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/run_categories_all.pdf')
        
        # clust + stdev
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('% of clustered reviews', fontsize=15)
        plt.title(f"{question}: Cluster percentage with its standard deviation", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, clust.mean(axis=0), color=colors[1], marker='o', label="avg")
        plt.fill_between(cols, clust.mean(axis=0)+clust.std(axis=0), clust.mean(axis=0)-clust.std(axis=0), color=colors[8], alpha=0.3)
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/clust_stdev_all.pdf')

        # run + stdev
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('Runtime in seconds', fontsize=15)
        plt.title(f"{question}: Runtime with its standard deviation", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, run.mean(axis=0), color=colors[2], marker='x', label="avg")
        plt.fill_between(cols, run.mean(axis=0)+run.std(axis=0), run.mean(axis=0)-run.std(axis=0), color=colors[3], alpha=0.3)
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/run_stdev_all.pdf')
        

In [None]:
def plot_ranking(data_len, data_perc, data_step3):
    
    # len + categories
    plt.figure(figsize=(15, 8))
    plt.rc('font', size=12)
    plt.xlabel('Number of reviews', fontsize=15)
    plt.ylabel('Number of reviews in final ranking', fontsize=15)
    plt.title(f"{question}: Number of reviews", fontsize=20)
    plt.xticks(range(len(cols)), labels=cols)
    plt.plot(cols, data_len.mean(axis=0), color=colors[1], marker='o', label="avg")
    for i in range(len(data_len)):
        plt.plot(cols, list(data_len.loc[i,:]), linestyle='--', color=colors[i+3], label=categories[i])
    plt.legend(loc=2)
    if save:
        plt.savefig(f'Evaluation/{question}/len_categories.pdf')

    # perc + categories
    plt.figure(figsize=(15, 8))
    plt.rc('font', size=12)
    plt.xlabel('Number of reviews', fontsize=15)
    plt.ylabel('Percentage of reviews in final ranking', fontsize=15)
    plt.title(f"{question}: Percentage of reviews", fontsize=20)
    plt.xticks(range(len(cols)), labels=cols)
    plt.plot(cols, data_perc.mean(axis=0), color=colors[2], marker='x', label="avg")
    for i in range(len(data_perc)):
        plt.plot(cols, list(data_perc.loc[i,:]), linestyle='--', color=colors[i+3], label=categories[i])
    plt.legend(loc=2)
    if save:
        plt.savefig(f'Evaluation/{question}/perc_categories.pdf')

    # len + perc + stdev
    y1 = data_len.mean(axis=0)
    y2 = data_perc.mean(axis=0)
    fig, ax1 = plt.subplots(figsize=(15, 8))

    ax2 = ax1.twinx()
    line1, = ax1.plot(cols, y1, color=colors[1], marker='o', label="size")
    line2, = ax2.plot(cols, y2, color=colors[2], marker='x', label="%")
    
    ax1.fill_between(cols, y1+data_len.std(axis=0), y1-data_len.std(axis=0), color=colors[8], alpha=0.3)
    ax2.fill_between(cols, y2+data_perc.std(axis=0), y2-data_perc.std(axis=0), color=colors[3], alpha=0.3)

    ax1.set_xlabel('Number of reviews', fontsize=15)
    ax1.set_ylabel('Number of reviews in final ranking', color=colors[1], fontsize=15)
    ax2.set_ylabel('Percentage of reviews in final ranking', color=colors[2], fontsize=15)
    ax1.set_xticks(cols)
    ax2.set_xticks(cols)
    ax1.legend(handles=[line1, line2], loc=2)
    ax2.set_title(f"{question}: Number & percentage of reviews with their standard deviations", fontsize=20) #, {test}')
    if save:
        plt.savefig(f'Evaluation/{question}/len_perc_stdev.pdf')

    # step3 + categories
    plt.figure(figsize=(15, 8))
    plt.rc('font', size=12)
    plt.xlabel('Number of reviews', fontsize=15)
    plt.ylabel('Percentage of successful step 3s', fontsize=15)
    plt.title(f"{question}: Percentage of successful step 3s", fontsize=20)
    plt.xticks(range(len(cols)), labels=cols)
    plt.plot(cols, data_step3.mean(axis=0), color=colors[1], marker='o', label="avg")
    for i in range(len(data_step3)):
        plt.plot(cols, list(data_step3.loc[i,:]), linestyle='--', color=colors[i+3], label=categories[i])
    plt.legend(loc=2)
    if save:
        plt.savefig(f'Evaluation/{question}/step3_categories.pdf')

    # step3 + stdev
    plt.figure(figsize=(15, 8))
    plt.rc('font', size=12)
    plt.xlabel('Number of reviews', fontsize=15)
    plt.ylabel('Percentage of successful step 3s', fontsize=15)
    plt.title(f"{question}: Percentage of successful step 3s with its standard deviation", fontsize=20)
    plt.xticks(range(len(cols)), labels=cols)
    plt.plot(cols, data_step3.mean(axis=0), color=colors[1], marker='o', label="avg")
    plt.fill_between(cols, data_step3.mean(axis=0)+data_step3.std(axis=0), data_step3.mean(axis=0)-data_step3.std(axis=0), color=colors[8], alpha=0.3)
    if save:
        plt.savefig(f'Evaluation/{question}/step3_stdev.pdf')


In [None]:
def plot_metrics(data_f1, data_jacc, data_rbo, to_plot):
    
    if to_plot == "category":
        # individual categories with all metrics
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('Metric scores', fontsize=15)
        plt.title(f"{question}: Number of reviews with their performances for {category}", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, data_f1.mean(axis=0), color=colors[11], marker='o', label="F1")
        plt.plot(cols, data_jacc.mean(axis=0), color=colors[12], marker='x', label="Jaccard")
        plt.plot(cols, data_rbo.mean(axis=0), color=colors[9], marker='s', label="RBO")
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/individual_metrics_{category}.pdf')
    
    elif to_plot == "all":
        # F1 + categories
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('F1-score', fontsize=15)
        plt.title(f"{question}: Number of reviews versus average F1 score", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, data_f1.mean(axis=0), color=colors[11], marker='o', label="avg")
        for i in range(len(data_f1)):
            plt.plot(cols, list(data_f1.loc[i,:]), linestyle='--', color=colors[i+3], label=categories[i])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/F1_categories.pdf')

        # Jaccard + categories
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('Jaccard similarity', fontsize=15)
        plt.title(f"{question}: Number of reviews versus average Jaccard similarities", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, data_jacc.mean(axis=0), color=colors[12], marker='x', label="avg")
        for i in range(len(data_jacc)):
            plt.plot(cols, list(data_jacc.loc[i,:]), linestyle='--', color=colors[i+3], label=categories[i])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/Jacc_categories.pdf')

        # RBO + categories
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('RBO score', fontsize=15)
        plt.title(f"{question}: Number of reviews versus average RBO score", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, data_rbo.mean(axis=0), color=colors[9], marker='s', label="avg")
        for i in range(len(data_rbo)):
            plt.plot(cols, list(data_rbo.loc[i,:]), linestyle='--', color=colors[i+3], label=categories[i])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/RBO_categories.pdf')

        # all metrics
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('Metric scores', fontsize=15)
        plt.title(f"{question}: Number of reviews with their performances", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, data_f1.mean(axis=0), color=colors[11], marker='o', label="F1")
        plt.plot(cols, data_jacc.mean(axis=0), color=colors[12], marker='x', label="Jaccard")
        plt.plot(cols, data_rbo.mean(axis=0), color=colors[9], marker='s', label="RBO")
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/all_metrics.pdf')

        # all metrics + stdev
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('Number of reviews', fontsize=15)
        plt.ylabel('Metric scores', fontsize=15)
        plt.title(f"{question}: Number of reviews with their performances with their standard deviations", fontsize=20)
        plt.xticks(range(len(cols)), labels=cols)
        plt.plot(cols, data_f1.mean(axis=0), color=colors[11], marker='o', label="F1")
        plt.plot(cols, data_jacc.mean(axis=0), color=colors[12], marker='x', label="Jaccard")
        plt.plot(cols, data_rbo.mean(axis=0), color=colors[9], marker='s', label="RBO")
        plt.fill_between(cols, data_f1.mean(axis=0)+data_f1.std(axis=0), data_f1.mean(axis=0)-data_f1.std(axis=0), color=colors[11], alpha=0.1)
        plt.fill_between(cols, data_jacc.mean(axis=0)+data_jacc.std(axis=0), data_jacc.mean(axis=0)-data_jacc.std(axis=0), color=colors[12], alpha=0.2)
        plt.fill_between(cols, data_rbo.mean(axis=0)+data_rbo.std(axis=0), data_rbo.mean(axis=0)-data_rbo.std(axis=0), color=colors[9], alpha=0.2)
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/all_metrics_stdev.pdf')

        

In [None]:
def plot_r_metrics(data_f1, data_jacc, data_rbo, to_plot):
    if to_plot == "category":
        # individual categories with avg metrics
        for i in range(1,len(categories)+1):
            plt.figure(figsize=(15, 8))
            plt.rc('font', size=12)
            plt.xlabel('@r', fontsize=15)
            plt.ylabel('Metric scores', fontsize=15)
            plt.title(f"{question}: Average metric performances @r for {categories[i-1]}", fontsize=20)
            plt.xticks(data_f1.columns+1)
            plt.plot(data_f1.columns+1, list(data_f1.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), color=colors[11], marker='o', label="F1")
            plt.plot(data_jacc.columns+1, list(data_jacc.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), color=colors[12], marker='x', label="Jaccard")
            plt.plot(data_rbo.columns+1, list(data_rbo.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), color=colors[9], marker='s', label="RBO")
            plt.legend(loc=2)
            if save:
                plt.savefig(f'Evaluation/{question}/individual_@r_{categories[i-1]}.pdf')

    elif to_plot == "all":
        # whole RQ1
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@r', fontsize=15)
        plt.ylabel('Metric scores', fontsize=15)
        plt.title(f"{question}: Average metric performances @r", fontsize=20)
        plt.xticks(data_f1.columns+1)
        plt.plot(data_f1.columns+1, data_f1.mean(axis=0), color=colors[11], marker='o', label="F1")
        plt.plot(data_jacc.columns+1, data_jacc.mean(axis=0), color=colors[12], marker='x', label="Jaccard")
        plt.plot(data_rbo.columns+1, data_rbo.mean(axis=0), color=colors[9], marker='s', label="RBO")
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/@r_all_metrics.pdf')

        # whole RQ1 with stdev
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@r', fontsize=15)
        plt.ylabel('Metric scores', fontsize=15)
        plt.title(f"{question}: Average metric performances @r with their standard deviations", fontsize=20)
        plt.xticks(data_f1.columns+1)
        plt.plot(data_f1.columns+1, data_f1.mean(axis=0), color=colors[11], marker='o', label="F1")
        plt.plot(data_jacc.columns+1, data_jacc.mean(axis=0), color=colors[12], marker='x', label="Jaccard")
        plt.plot(data_rbo.columns+1, data_rbo.mean(axis=0), color=colors[9], marker='s', label="RBO")
        plt.fill_between(data_f1.columns+1, data_f1.mean(axis=0)+data_f1.std(axis=0), data_f1.mean(axis=0)-data_f1.std(axis=0), color=colors[11], alpha=0.1)
        plt.fill_between(data_jacc.columns+1, data_jacc.mean(axis=0)+data_jacc.std(axis=0), data_jacc.mean(axis=0)-data_jacc.std(axis=0), color=colors[12], alpha=0.2)
        plt.fill_between(data_rbo.columns+1, data_rbo.mean(axis=0)+data_rbo.std(axis=0), data_rbo.mean(axis=0)-data_rbo.std(axis=0), color=colors[9], alpha=0.2)
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/@r_all_metrics_stdev.pdf')

        # F1 + categories
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@r', fontsize=15)
        plt.ylabel('F1-scores', fontsize=15)
        plt.title(f"{question}: F1-scores @r", fontsize=20)
        plt.xticks(data_f1.columns+1)
        plt.plot(data_f1.columns+1, data_f1.mean(axis=0), color=colors[11], marker='o', label="avg")
        for i in range(1,len(categories)+1):
            plt.plot(data_f1.columns+1, list(data_f1.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), linestyle='--', color=colors[i+2], label=categories[i-1])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/@r_F1_categories.pdf')

        # Jaccard + categories
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@r', fontsize=15)
        plt.ylabel('Jaccard similarity', fontsize=15)
        plt.title(f"{question}: Jaccard similarities @r", fontsize=20)
        plt.xticks(data_jacc.columns+1)
        plt.plot(data_jacc.columns+1, data_jacc.mean(axis=0), color=colors[12], marker='x', label="avg")
        for i in range(1,len(categories)+1):
            plt.plot(data_jacc.columns+1, list(data_jacc.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), linestyle='--', color=colors[i+2], label=categories[i-1])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/@r_jacc_categories.pdf')

        # rbo + categories
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@r', fontsize=15)
        plt.ylabel('RBO', fontsize=15)
        plt.title(f"{question}: RBO @r", fontsize=20)
        plt.xticks(data_rbo.columns+1)
        plt.plot(data_rbo.columns+1, data_rbo.mean(axis=0), color=colors[9], marker='s', label="avg")
        for i in range(1,len(categories)+1):
            plt.plot(data_rbo.columns+1, list(data_rbo.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), linestyle='--', color=colors[i+2], label=categories[i-1])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/@r_rbo_categories.pdf')

In [None]:
def plot_p_metrics(data_rbo, to_plot):
    if to_plot == "category":
        # individual categories with avg metrics
        for i in range(1,len(categories)+1):
            plt.figure(figsize=(15, 8))
            plt.rc('font', size=12)
            plt.xlabel('@p', fontsize=15)
            plt.ylabel('RBO similarity', fontsize=15)
            plt.title(f"{question}: Average RBO similarity @p for {categories[i-1]}", fontsize=20)
            plt.xticks((data_rbo.columns+1)/10)
            plt.plot((data_rbo.columns+1)/10, list(data_rbo.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), color=colors[9], marker='s', label="RBO")
            plt.legend(loc=2)
            if save:
                plt.savefig(f'Evaluation/{question}/individual_rbo@p_{categories[i-1]}.pdf')

    elif to_plot == "all":
        # whole RQ1
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@p', fontsize=15)
        plt.ylabel('RBO similarity', fontsize=15)
        plt.title(f"{question}: Average RBO similarity @p", fontsize=20)
        plt.xticks((data_rbo.columns+1)/10)
        plt.plot((data_rbo.columns+1)/10, data_rbo.mean(axis=0), color=colors[9], marker='s', label="RBO")
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/rbo@p_all_metrics.pdf')

        # whole RQ1 with stdev
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@p', fontsize=15)
        plt.ylabel('RBO similarity', fontsize=15)
        plt.title(f"{question}: Average RBO similarity @p with its standard deviation", fontsize=20)
        plt.xticks((data_rbo.columns+1)/10)
        plt.plot((data_rbo.columns+1)/10, data_rbo.mean(axis=0), color=colors[9], marker='s', label="RBO")
        plt.fill_between((data_rbo.columns+1)/10, data_rbo.mean(axis=0)+data_rbo.std(axis=0), data_rbo.mean(axis=0)-data_rbo.std(axis=0), color=colors[9], alpha=0.2)
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/rbo@p_all_metrics_stdev.pdf')

        # rbo + categories
        plt.figure(figsize=(15, 8))
        plt.rc('font', size=12)
        plt.xlabel('@p', fontsize=15)
        plt.ylabel('RBO', fontsize=15)
        plt.title(f"{question}: RBO similarity @p", fontsize=20)
        plt.xticks((data_rbo.columns+1)/10)
        plt.plot((data_rbo.columns+1)/10, data_rbo.mean(axis=0), color=colors[9], marker='s', label="avg")
        for i in range(1,len(categories)+1):
            plt.plot((data_rbo.columns+1)/10, list(data_rbo.iloc[(len(cols)*i-len(cols)):len(cols)*i].mean(axis=0)), linestyle='--', color=colors[i+2], label=categories[i-1])
        plt.legend(loc=2)
        if save:
            plt.savefig(f'Evaluation/{question}/rbo@p_categories.pdf')

In [None]:
def plot_clust_perc(clust, step3, perc):
    y1 = clust.mean(axis=0)
    y2 = step3.mean(axis=0)
    y3 = perc.mean(axis=0)
    
    plt.figure(figsize=(15, 8))
    plt.rc('font', size=12)
    plt.xlabel('Number of reviews', fontsize=15)
    plt.ylabel('Percentage', fontsize=15)
    plt.title(f"{question}: Percentage of cluster, successful step3s & reviews with their standard deviations", fontsize=20)
    plt.xticks(range(len(cols)), labels=cols)
    plt.plot(cols, y1, color=colors[1], marker='o', label="%cluster")
    plt.plot(cols, y2, color=colors[12], marker='h', label="%successful step3s")
    plt.plot(cols, y3, color=colors[2], marker='x', label="%reviews")
    plt.fill_between(cols, y1+clust.std(axis=0), y1-clust.std(axis=0), color=colors[8], alpha=0.3)
    plt.fill_between(cols, y2+step3.std(axis=0), y2-step3.std(axis=0), color=colors[12], alpha=0.2)
    plt.fill_between(cols, y3+perc.std(axis=0), y3-perc.std(axis=0), color=colors[3], alpha=0.3)
    plt.legend(loc=2)
    if save:
            plt.savefig(f'Evaluation/{question}/clust_step3_perc_stdev.pdf')
        
    plt.figure(figsize=(15, 8))
    plt.rc('font', size=12)
    plt.xlabel('Number of reviews', fontsize=15)
    plt.ylabel('Percentage', fontsize=15)
    plt.title(f"{question}: Percentage of cluster, successful step3s & reviews with their standard deviations", fontsize=20)
    plt.xticks(range(len(cols)), labels=cols)
    plt.plot(cols, y1, color=colors[1], marker='o', label="%cluster")
    plt.plot(cols, y2, color=colors[12], marker='h', label="%successful step3s")
    plt.plot(cols, y3, color=colors[2], marker='x', label="%reviews")
    plt.legend(loc=2)
    if save:
            plt.savefig(f'Evaluation/{question}/clust_step3_perc.pdf')

In [None]:
warnings.filterwarnings("ignore")
save = False # save figures or not

questions = {"RQ1": ["CDs_and_Vinyl","Electronics","Home_and_Kitchen","Movies_and_TV","Video_Games"]}

for question, categories in questions.items():

    cols = ['10', '20', '50', '100', 'Â±250', '430-500', '>1000']

    clust_RQ = pd.DataFrame(columns=cols)
    run_RQ = pd.DataFrame(columns=cols)
    len_ranking_RQ = pd.DataFrame(columns=cols)
    perc_ranking_RQ = pd.DataFrame(columns=cols)
    perc_step3_RQ = pd.DataFrame(columns=cols)
    f1_r_RQ = [] 
    f1_nr_rev_RQ = pd.DataFrame(columns=cols)
    jacc_r_RQ = []
    jacc_nr_rev_RQ = pd.DataFrame(columns=cols)
    rbo_r_RQ = []
    rbo_p_RQ = []
    rbo_nr_rev_RQ = pd.DataFrame(columns=cols)

    for category in categories:
        with open(f'Evaluation/{question}/evaluations.txt', 'a') as f:
            clust_cat = []
            run_cat = []
            len_ranking_cat = []
            perc_ranking_cat = []
            perc_step3_cat = []
            f1_nr_rev_cat = []
            jacc_nr_rev_cat = []
            rbo_nr_rev_cat = []

            nr_revs = [10, 20, 50, 100]
            if category == "CDs_and_Vinyl":
                nr_revs.extend([253, 499, 1045])
            elif category == "Electronics":
                nr_revs.extend([250, 481, 1475])
            elif category == "Home_and_Kitchen":
                nr_revs.extend([257, 459, 4130])
            elif category == "Movies_and_TV":
                nr_revs.extend([247, 499, 1491])
            elif category == "Video_Games":
                nr_revs.extend([251, 431, 2593])

            for nr_rev in nr_revs:
                f1_r_cat = []
                jacc_r_cat = []
                rbo_r_cat = []
                rbo_p_cat = []
                
                path = f'Results/{question}/{category}/{nr_rev}'

                start_evaluation()

                print(f'\nClustered:\n{clust_cat}', file=f)
                print(f'\nRun time:\n{run_cat}\n\n', file=f)
                print(f'\nLength of final ranking:\n{len_ranking_cat}\n', file=f)
                print(f'\nPercentage of final ranking:\n{perc_ranking_cat}\n', file=f)
                print(f'\nPercentage of successful step3s:\n{perc_step3_cat}\n', file=f)
                print(f'\nF1-scores:\n{f1_nr_rev_cat}\n', file=f)
                print(f'\nJaccard distances:\n{jacc_nr_rev_cat}\n', file=f)
                print(f'\nRBO distances:\n{rbo_nr_rev_cat}\n', file=f)

                f1_r_RQ.append(f1_r_cat)
                jacc_r_RQ.append(jacc_r_cat)
                rbo_r_RQ.append(rbo_r_cat)
                rbo_p_RQ.append(rbo_p_cat)
            
        clust_RQ.loc[len(clust_RQ)] = clust_cat
        run_RQ.loc[len(run_RQ)] = run_cat
        len_ranking_RQ.loc[len(len_ranking_RQ)] = len_ranking_cat
        perc_ranking_RQ.loc[len(perc_ranking_RQ)] = perc_ranking_cat
        perc_step3_RQ.loc[len(perc_step3_RQ)] = perc_step3_cat
        f1_nr_rev_RQ.loc[len(f1_nr_rev_RQ)] = f1_nr_rev_cat
        jacc_nr_rev_RQ.loc[len(jacc_nr_rev_RQ)] = jacc_nr_rev_cat
        rbo_nr_rev_RQ.loc[len(rbo_nr_rev_RQ)] = rbo_nr_rev_cat
            

        plot_clust_run(pd.DataFrame([clust_cat]), pd.DataFrame([run_cat]), "category")
        plot_metrics(pd.DataFrame([f1_nr_rev_cat]), pd.DataFrame([jacc_nr_rev_cat]), pd.DataFrame([rbo_nr_rev_cat]), "category")

    plot_clust_run(clust_RQ, run_RQ, "all")
    plot_ranking(len_ranking_RQ, perc_ranking_RQ, perc_step3_RQ)
    plot_metrics(f1_nr_rev_RQ, jacc_nr_rev_RQ, rbo_nr_rev_RQ, "all")
    plot_r_metrics(pd.DataFrame(f1_r_RQ), pd.DataFrame(jacc_r_RQ), pd.DataFrame(rbo_r_RQ), "category")
    plot_r_metrics(pd.DataFrame(f1_r_RQ), pd.DataFrame(jacc_r_RQ), pd.DataFrame(rbo_r_RQ), "all")
    plot_p_metrics(pd.DataFrame(rbo_p_RQ), "category")
    plot_p_metrics(pd.DataFrame(rbo_p_RQ), "all")
    plot_clust_perc(clust_RQ, perc_step3_RQ, perc_ranking_RQ)
    