In [1]:
import pandas as pd
import numpy as np

### Merge times and answer results

In [19]:
def mergeTimesAndResults():
    df_original = pd.read_csv('../data/eval-quant-out.csv')
    df_topone = pd.read_csv('../data/eval-quant-out-topone.csv')
    df_original[['ft_span_topone', 'kg_topone','ft_wiki_topone', 'ft_wiki_top3', 'ft_nernel_topone', 'ft_nernel_top3']] = df_topone[['ft_span_topone', 'kg_topone','ft_wiki_topone', 'ft_wiki_top3', 'ft_nernel_topone', 'ft_nernel_top3']]
    df_original.drop(['ft_answer', 'kg_answer'], inplace=True, axis=1)
    return df_original

In [20]:
df_final = mergeTimesAndResults()

In [21]:
df_final.to_csv('../data/eval-quant-out-final.csv')

### Compute performances

In [5]:
df = pd.read_csv('../data/eval-quant-out-final.csv')

In [120]:
def countAnswers(df):
    return {
        'kg_topone': df['kg_topone'].value_counts().sort_index(ascending=True),
        'ft_span_topone': df['ft_span_topone'].value_counts().sort_index(ascending=True),
        'ft_wiki_topone': df['ft_wiki_topone'].value_counts().sort_index(ascending=True),
        'ft_wiki_top3': df['ft_wiki_top3'].value_counts().sort_index(ascending=True),
        'ft_nernel_topone': df['ft_nernel_topone'].value_counts().sort_index(ascending=True),
        'ft_nernel_top3': df['ft_nernel_top3'].value_counts().sort_index(ascending=True)
    }

def getAvgs(df):
    return {
        'kg_topone': df['kg_topone'].mean(),
        'ft_span_topone': df['ft_span_topone'].mean(),
        'ft_wiki_topone': df['ft_wiki_topone'].mean(),
        'ft_wiki_top3': df['ft_wiki_top3'].mean(),
        'ft_nernel_topone': df['ft_nernel_topone'].mean(),
        'ft_nernel_top3': df['ft_nernel_top3'].mean()
    }

def getTimes(df):
    return {
        'kg': df['kg_time'].mean(),
        'ft_span': df['ft_time'].mean(),
        'ft_wiki': df['ft_wiki_time'].mean(),
        'ft_nernel': df['ft_nernel_time'].mean(),
    }

def percentages(df):
    value_counts = countAnswers(df)
    percentages = dict()
    for key, value in value_counts.items():
        percentages[key] = {}
        for index, n in enumerate(value.values):
            s = np.sum(value.values)
            perc = n/s * 100
            percentages[key][index] = round(perc, 1)
            if len(value.values) < 3 and index == len(value.values)-1:
                percentages[key][index + 1] = 0
    return percentages

def getAcceptable(df):
    value_counts = countAnswers(df)
    perc = percentages(df)
    acceptable = dict()
    for key, value in value_counts.items():
        n = 0
        if 1 in value:
            n = value[1] + value[2]
        acceptable[key] = {'n': n, 'percentage': round(perc[key][1] + perc[key][2], 1)}
    return acceptable

In [121]:
getAvgs(df)

{'kg_topone': 1.2045454545454546,
 'ft_span_topone': 1.2954545454545454,
 'ft_wiki_topone': 0.6590909090909091,
 'ft_wiki_top3': 1.0,
 'ft_nernel_topone': 0.5,
 'ft_nernel_top3': 0.9318181818181818}

In [122]:
getTimes(df)

{'kg': 1.3815106316046282,
 'ft_span': 0.5716171969066967,
 'ft_wiki': 15.452782281609469,
 'ft_nernel': 24.358425974845886}

In [123]:
percentages(df)

{'kg_topone': {0: 36.4, 1: 6.8, 2: 56.8},
 'ft_span_topone': {0: 34.1, 1: 2.3, 2: 63.6},
 'ft_wiki_topone': {0: 63.6, 1: 6.8, 2: 29.5},
 'ft_wiki_top3': {0: 40.9, 1: 18.2, 2: 40.9},
 'ft_nernel_topone': {0: 75.0, 1: 25.0, 2: 0},
 'ft_nernel_top3': {0: 43.2, 1: 20.5, 2: 36.4}}

In [124]:
getAcceptable(df)

{'kg_topone': {'n': 28, 'percentage': 63.6},
 'ft_span_topone': {'n': 29, 'percentage': 65.9},
 'ft_wiki_topone': {'n': 16, 'percentage': 36.3},
 'ft_wiki_top3': {'n': 26, 'percentage': 59.1},
 'ft_nernel_topone': {'n': 0, 'percentage': 25.0},
 'ft_nernel_top3': {'n': 25, 'percentage': 56.9}}