In [2]:
import matplotlib
matplotlib.use('pdf')

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import os
plt.style.use('default')

In [61]:
def human_format(x):
    sigpart = 0
    suffix = ""
    if x < 1000:
        return "{:.3g}".format(x)
    elif x < 1000000:
        sigpart = x / 1000
        suffix = "k"
    elif x < 1000000000:
        sigpart = x / 1000000
        suffix = "M"
    else:
        sigpart = x / 1000000000
        suffix = "B"
    return "{:.3g}{}".format(sigpart, suffix)
def get_algorithm(alg):
    if alg == "ic2":
        return "tkdc"
    else:
        return alg
def get_dataset(path):
    datasets = {
        "bgauss": "gauss",
        "hep": "hep",
        "tmy3_4": "tmy3_4",
        "tmy3.": "tmy3",
        "mnist_64": "mnist",
        "home": "home",
        "energy": "tmy3",
        "shuttle": "shuttle"
    }
    for dataset in datasets.keys():
        if dataset in path:
            return datasets[dataset]
algorder = ["tkdc", "simple", "sklearn", "rkde", "nocut", "ks"]

In [52]:
df = pd.read_csv("../analysis/results/rawacc_2.csv")
df.head(2)

Unnamed: 0,algorithm,dataset,dimension,true_below,est_below,tp_below,precision,recall,f_score
0,sklearn_t0,energy,2,500,500,500,1.0,1.0,1.0
1,sklearn_t0,energy,4,500,500,500,1.0,1.0,1.0


In [67]:
df["r_dataset"] = df["dataset"].map(get_dataset)
df["r_algorithm"] = df["algorithm"].map(get_algorithm)

In [73]:
algorithms = ["tkdc", "sklearn", "ks"]
datasets = ["tmy3", "home", "shuttle"]
# colors = ["0.2", "0.7", "1.0"]
dimensions = [[2], [4], [7,8]]
colors = ["C1", "C2", "C3", "C0"]
hatches = ["", "//", "\\\\"]

In [74]:
plt.style.use('default')

In [76]:
fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(5,5),dpi=300)
fig.subplots_adjust(hspace=0.5)
for k in range(len(dimensions)):
    dims = dimensions[k]
    ax = axs[k]
    for di in range(len(datasets)):
        dname = datasets[di]
        data = df[(df["r_dataset"] == dname) & (df.dimension.isin(dims))]
        data = data[data.r_algorithm.isin(algorithms)]
        xlocs = np.arange(0,len(algorithms))*4 + di
        if len(data) == 2:
            xlocs = np.arange(0,len(algorithms)-1) * 4 + di
        bars = ax.bar(
            left=xlocs,
            height=data.f_score,
            label=dname,
            color="0.7",
            edgecolor="0.0",
            hatch=hatches[di]
        )
        for bar, label in zip(bars, data.f_score):
                height = bar.get_height()
                ax.text(
                    bar.get_x() + bar.get_width()/2, 
                    height,
                    human_format(float(label)),
                    ha='center', va='bottom', size = 8,
                )
    ax.set_xticks(np.arange(0,len(algorithms)) * 4 + 1)
    ax.set_xlim(np.array([-.2,2.8])*4)
    ax.set_xticklabels(data.r_algorithm)
    ax.set_ylim(0.0,1.25)
    ax.yaxis.grid(True, ls="dotted", alpha=.5)
    if k == 2:
        ax.set_xlabel("Algorithm")
    ax.set_ylabel("F-score")
    ax.set_title("Dimensions: {}".format(dims))
    if k == 2:
        ax.legend(loc=0)
fig.tight_layout()
fig.savefig("accuracy.pdf")

