In [1]:
import numpy as np
import pandas as pd
import os
import json

In [2]:
base_dir = "/Users/teliov/TUD/Thesis/Medvice/Notebooks/data/07_27_reports/eval-runs"
fig_dir = os.path.join(base_dir, "figures")
table_dir = os.path.join(base_dir, "tables")

In [3]:
from glob import glob

In [4]:
filenames = sorted(glob(os.path.join(base_dir, "*.csv")))

In [5]:
dirmaps = [
    "output_basic_15k",
    "output_basic_2_cnt_15k",
    "output_basic_3_cnt_15k",
    "output_basic_4_cnt_15k",
    "output_basic_avg_cnt_15k",
    "output_basic_inc_1_15k",
    "output_basic_inc_2_15k",
    "output_basic_inc_3_15k",
    "output_basic_pct_10_15k",
    "output_basic_pct_20_15k",
    "output_basic_pct_30_15k",
    "output_basic_pct_50_15k",
    "output_basic_pct_70_15k",
]

In [6]:
nicknames = [
    "Baseline",
    "Min. 2 Symptoms",
    "Min. 3 Symptoms",
    "Min. 4 Symptoms",
    "Min. 5 Symptoms",
    "Mean Injected",
    "Max Injected",
    "Min Injected",
    "Perturbed-10%",
    "Perturbed-20%",
    "Perturbed-30%",
    "Perturbed-50%",
    "Perturbed-70%"
]

In [7]:
dl_results_tpl = "/Users/teliov/TUD/Thesis/Medvice/Notebooks/data/07_27_reports/dl_eval/{}_dl_eval.json"

In [8]:
dl_results = []
for item in dirmaps:
    filename = dl_results_tpl.format(item)
    with open(filename) as fp:
        obj = json.load(fp)
    dl_results.append(obj)

In [9]:
nicknames_map = {dirmaps[idx]: nickname for idx, nickname in enumerate(nicknames)}

In [10]:
dfs = [pd.read_csv(filename) for filename in filenames]

In [11]:
headers = ["dataset", "nb_acc", "rf_acc", "mlp_acc", "nb_prec", "rf_prec", "mlp_prec", "nb_top5", "rf_top5", "mlp_top5"]

In [12]:
_data = {item: [] for item in headers}

In [13]:
df = dfs[0]
for idx, dirname in enumerate(dirmaps):
    rf_values = df[df['model'] == "random_forest"].mean()
    nb_values = df[df['model'] == 'naive_bayes'].mean()

    acc_score = "%s_accuracy_score" % dirname
    prec_score = "%s_precision_weighted_score" % dirname
    top5_score = "%s_top_5_score" % dirname

    _data["nb_acc"].append(nb_values[acc_score])
    _data["nb_prec"].append(nb_values[prec_score])
    _data["nb_top5"].append(nb_values[top5_score])
    _data["rf_acc"].append(rf_values[acc_score])
    _data["rf_prec"].append(rf_values[prec_score])
    _data["rf_top5"].append(rf_values[top5_score])

    _data["dataset"].append(nicknames[idx])

In [14]:
len(_data["dataset"])

13

In [15]:
dl_results[0]

{'precision': 0.6304336786270142,
 'accuracy': 0.588480532169342,
 'top5': 0.8553074164024476}

In [16]:
for item in dl_results:
    _data["mlp_acc"].append(item['accuracy'])
    _data["mlp_prec"].append(item['precision'])
    _data["mlp_top5"].append(item['top5'])

In [17]:
df = pd.DataFrame(_data)

In [18]:
df.to_csv(os.path.join(table_dir, "dl_nb_rf.csv"), float_format="%.3f", index=False)

In [19]:
df

Unnamed: 0,dataset,nb_acc,rf_acc,mlp_acc,nb_prec,rf_prec,mlp_prec,nb_top5,rf_top5,mlp_top5
0,Baseline,0.587808,0.570958,0.588481,0.63304,0.6116,0.630434,0.85295,0.845333,0.855307
1,Min. 2 Symptoms,0.66903,0.638435,0.662125,0.689971,0.659914,0.690885,0.912903,0.901174,0.911241
2,Min. 3 Symptoms,0.733777,0.699029,0.729088,0.763902,0.725736,0.763307,0.940772,0.930048,0.940992
3,Min. 4 Symptoms,0.789332,0.752272,0.784982,0.827145,0.792578,0.826582,0.957288,0.946805,0.955286
4,Min. 5 Symptoms,0.835615,0.802202,0.832559,0.875735,0.841791,0.874279,0.966103,0.956687,0.966326
5,Mean Injected,0.312353,0.28588,0.332997,0.379986,0.339699,0.392489,0.563293,0.559795,0.585252
6,Max Injected,0.09938,0.099521,0.127096,0.206609,0.177465,0.228431,0.233775,0.270916,0.274561
7,Min Injected,0.480018,0.451291,0.49236,0.514897,0.477554,0.521109,0.753518,0.742924,0.765534
8,Perturbed-10%,0.593631,0.580365,0.594752,0.641889,0.623432,0.638876,0.860754,0.855248,0.863086
9,Perturbed-20%,0.600099,0.589358,0.600969,0.652992,0.636419,0.64985,0.867482,0.864554,0.870134
