In [83]:
import numpy as np
import pandas as pd

In [84]:
num_data = {
    'netifier': 7773,
    'dat': 500,
    'hsd': 13169,
    'casa': 1080,
    'hoasa': 2854,
}

In [85]:
def average_results(results):
    return pd.concat(results).groupby(level=0).mean().astype(float)

In [86]:
def load_result(dataset, ignored_method=None):
    methods = ['lc', 'mc', 'kmeans', 'coreset', 'comal', 'besra', 'random']
    result = {}

    if ignored_method is not None:
        for method in ignored_method:
            methods.remove(method)
    
    for method in methods:
        for i in range (1, 6):
            if i == 1:
                result[method] = [pd.read_csv(f"../{dataset}/metrics/{dataset}-{method}-{i}-results.csv")]
            else:
                result[method].append(pd.read_csv(f"../{dataset}/metrics/{dataset}-{method}-{i}-results.csv"))

    for method in methods:
        result[method] = average_results(result[method])
    
    passive_result = pd.read_csv(f"../{dataset}/metrics/{dataset}-passive-results.csv")
    result['passive'] = pd.DataFrame({
        'Accuracy': [np.mean(passive_result['Accuracy']) for i in range(len(result['lc']))],
        'F1 Micro': [np.mean(passive_result['F1 Micro']) for i in range(len(result['lc']))],
        'F1 Macro': [np.mean(passive_result['F1 Macro']) for i in range(len(result['lc']))],
    })

    return result

In [87]:
def highlight_cells(val):
        # For non-percentage, you can set your own thresholds
    if val > 0:  # >5% better than passive
        return 'background-color:lightgreen;color:black'
    elif val < 0:  # >5% worse than passive
        return 'background-color:lightcoral;color:black'
    else:
        return 'background-color:lightyellow;color:black'
    return ''

def compare_results(result, num_data, key, checkpoint_only=False, ignored_method=None):
    results = {}
    index = []

    passive_value = result['passive'][key].values[0]

    for method in result.keys():
        if (ignored_method is not None and method in ignored_method) or method == 'passive':
            continue

        filtered = result[method][~result[method]['Data Used'].astype(int).isin([9216, 9217])]
        if checkpoint_only:
            index = []
            for i in range(len(filtered[key])):
                if round(filtered['Data Used'].iloc[i] / num_data, 2) in [0.50, 0.60, 0.70, 0.80]:
                    index.append(i)

            results['passive'] = result['passive'][key].iloc[index].values
            results[method] = 100 * (filtered[key].iloc[index].values - passive_value) / passive_value
            # results[method] = filtered[key].iloc[index].values - result['passive'][key]
            index = np.round(result['lc']['Data Used'].iloc[index].values / num_data * 100, 1)
        else:
            results['passive'] = [0 for i in range(len(filtered['Data Used']))]
            # results[method] = filtered[key].values
            results[method] = 100 * (filtered[key].values - passive_value)
            index = np.round(filtered['Data Used'].values / num_data * 100, 1)
            
    df = pd.DataFrame(results, index=index)
    return df.style.apply(lambda x: ['background-color: lightgray' if col == 'passive' 
                                    else highlight_cells(x[col]) for col in df.columns], axis=1)\
                  .format("{:+.2f}%")\
                  .format_index("{:.0f}%")
    # return df.style.highlight_max(axis=1, props='color:white; background-color:darkgreen;').highlight_min(axis=1, props='color:white; background-color:darkred;').format("{:+.2f}%").format_index("{:.0f}%")

# NETIFIER

In [88]:
result = load_result('netifier')

In [89]:
print("F1 Micro")
compare_results(result, num_data['netifier'], 'F1 Micro')

F1 Micro


Unnamed: 0,passive,lc,mc,kmeans,coreset,comal,besra,random
5%,+0.00%,-9.58%,-9.58%,-9.58%,-9.58%,-12.19%,-9.96%,-9.51%
12%,+0.00%,-7.38%,-3.51%,-6.35%,-8.03%,-6.37%,-4.57%,-5.48%
19%,+0.00%,-6.18%,-2.69%,-3.48%,-7.22%,-4.85%,-3.24%,-3.40%
25%,+0.00%,-4.69%,-1.12%,-3.01%,-5.17%,-3.51%,-2.24%,-2.69%
31%,+0.00%,-3.68%,-0.54%,-1.99%,-3.39%,-3.75%,-1.79%,-2.07%
36%,+0.00%,-2.24%,-0.67%,-1.45%,-2.35%,-3.39%,-1.29%,-1.39%
40%,+0.00%,-1.52%,-0.33%,-1.26%,-1.79%,-2.65%,-0.97%,-1.23%
44%,+0.00%,-1.07%,-0.05%,-0.74%,-1.21%,-1.95%,-0.64%,-1.02%
48%,+0.00%,-0.62%,+0.16%,-0.53%,-1.13%,-1.38%,-0.44%,-0.81%
50%,+0.00%,-0.71%,+0.37%,-0.42%,-0.84%,-0.94%,-0.35%,-1.03%


# DOCTOR'S ANSWER TEXT

In [90]:
result = load_result('dat')

In [91]:
print("F1 Micro")
compare_results(result, num_data['dat'], 'F1 Micro')

F1 Micro


Unnamed: 0,passive,lc,mc,kmeans,coreset,comal,besra,random
5%,+0.00%,-0.65%,-0.65%,-0.65%,-0.65%,-0.68%,-0.65%,-0.65%
13%,+0.00%,-0.56%,-0.65%,-0.65%,-0.58%,-0.58%,-0.62%,-0.62%
19%,+0.00%,-0.42%,-0.46%,-0.39%,-0.39%,-0.50%,-0.48%,-0.58%
26%,+0.00%,-0.39%,-0.51%,-0.50%,-0.30%,-0.47%,-0.44%,-0.53%
31%,+0.00%,-0.37%,-0.20%,-0.37%,-0.20%,-0.34%,-0.27%,-0.44%
36%,+0.00%,-0.17%,-0.22%,-0.16%,-0.27%,-0.34%,-0.16%,-0.43%
41%,+0.00%,-0.15%,-0.20%,-0.21%,-0.28%,-0.45%,-0.15%,-0.37%
45%,+0.00%,-0.13%,+0.00%,-0.18%,-0.05%,-0.35%,-0.12%,-0.27%
48%,+0.00%,-0.03%,-0.17%,+0.01%,-0.03%,-0.38%,-0.03%,-0.23%
50%,+0.00%,-0.05%,+0.04%,-0.03%,-0.14%,-0.28%,-0.07%,-0.27%


# HATE SPEECH DETECTION

In [92]:
result = load_result('hsd')

In [93]:
print("F1 Micro")
compare_results(result, num_data['hsd'], 'F1 Micro')

F1 Micro


Unnamed: 0,passive,lc,mc,kmeans,coreset,comal,besra,random
5%,+0.00%,-19.38%,-19.38%,-18.43%,-19.55%,-15.04%,-21.52%,-19.55%
12%,+0.00%,-17.51%,-8.25%,-9.87%,-12.70%,-8.50%,-10.39%,-10.71%
19%,+0.00%,-16.41%,-5.39%,-7.31%,-8.71%,-6.27%,-7.44%,-7.36%
25%,+0.00%,-13.47%,-3.79%,-4.75%,-7.38%,-4.73%,-5.61%,-5.53%
31%,+0.00%,-12.19%,-2.59%,-3.65%,-5.97%,-3.62%,-4.42%,-4.31%
36%,+0.00%,-10.67%,-2.04%,-2.84%,-4.55%,-3.00%,-3.55%,-3.43%
40%,+0.00%,-9.17%,-1.24%,-2.13%,-3.90%,-2.54%,-2.61%,-2.82%
44%,+0.00%,-7.31%,-0.68%,-1.78%,-3.12%,-2.03%,-2.33%,-2.56%
48%,+0.00%,-5.13%,-0.45%,-1.40%,-2.99%,-2.14%,-2.01%,-2.06%
50%,+0.00%,-4.09%,-0.27%,-1.33%,-2.52%,-1.46%,-1.69%,-1.97%


# CASA

In [94]:
result = load_result('casa')

In [95]:
print("F1 Micro")
compare_results(result, num_data['casa'], 'F1 Micro')

F1 Micro


Unnamed: 0,passive,lc,mc,kmeans,coreset,comal,besra,random
5%,+0.00%,-16.48%,-16.48%,-16.48%,-16.48%,-18.38%,-16.82%,-16.48%
12%,+0.00%,-7.70%,-8.49%,-8.01%,-8.01%,-8.55%,-7.80%,-8.19%
19%,+0.00%,-3.64%,-3.47%,-4.81%,-4.40%,-3.84%,-3.22%,-3.77%
25%,+0.00%,-2.13%,-1.93%,-2.61%,-2.87%,-2.92%,-2.07%,-2.15%
31%,+0.00%,-1.64%,-1.45%,-1.60%,-2.18%,-2.28%,-1.46%,-1.57%
36%,+0.00%,-1.02%,-0.65%,-1.17%,-1.70%,-1.98%,-0.88%,-1.37%
40%,+0.00%,-0.76%,-0.59%,-0.73%,-1.30%,-1.73%,-1.01%,-0.96%
44%,+0.00%,-0.56%,-0.52%,-0.71%,-0.94%,-1.36%,-0.78%,-0.85%
48%,+0.00%,-0.48%,-0.45%,-0.43%,-0.83%,-1.88%,-0.66%,-0.49%
50%,+0.00%,-0.49%,-0.35%,-0.35%,-0.90%,-1.71%,-0.47%,-0.62%


# HOASA

In [96]:
result = load_result('hoasa')

In [97]:
print("F1 Micro")
compare_results(result, num_data['hoasa'], 'F1 Micro')

F1 Micro


Unnamed: 0,passive,lc,mc,kmeans,coreset,comal,besra,random
5%,+0.00%,-12.38%,-12.50%,-12.38%,-12.38%,-10.47%,-13.06%,-12.38%
12%,+0.00%,-4.55%,-4.77%,-4.83%,-4.50%,-4.21%,-4.96%,-4.85%
19%,+0.00%,-2.74%,-2.42%,-2.71%,-2.65%,-2.67%,-2.94%,-2.62%
25%,+0.00%,-1.78%,-1.58%,-1.65%,-1.93%,-2.15%,-2.14%,-1.88%
31%,+0.00%,-1.42%,-1.04%,-1.30%,-1.42%,-1.86%,-1.43%,-1.55%
36%,+0.00%,-1.07%,-0.68%,-0.98%,-1.15%,-1.57%,-1.17%,-0.97%
40%,+0.00%,-0.89%,-0.55%,-0.76%,-0.86%,-1.42%,-1.00%,-0.95%
44%,+0.00%,-0.79%,-0.41%,-0.69%,-0.83%,-1.37%,-0.82%,-0.63%
48%,+0.00%,-0.69%,-0.45%,-0.52%,-0.63%,-1.32%,-0.58%,-0.56%
50%,+0.00%,-0.55%,-0.28%,-0.51%,-0.60%,-1.15%,-0.59%,-0.69%
