In [15]:
import numpy as np
import pandas as pd
import time
import os
import sys

from collections import defaultdict

parent_folder = os.path.dirname(os.path.abspath("./"))
sys.path.append(parent_folder)

from data._metrics import *
from data._utils import *
from data.real_world_datasets import Datasets as RealWorldDatasets

from mpire.pool import WorkerPool

In [16]:
df = pd.concat(
    [
        pd.read_csv(f"results/real_world/{dataset.id}.csv")
        for dataset in RealWorldDatasets
        if os.path.exists(f"results/real_world/{dataset.id}.csv")
    ]
)
df

Unnamed: 0,dataset,measure,run,value,time,process_time
0,Synth_low,DISCO,0,0.762296,5.631008,5.561633
1,Synth_low,DC_DUNN,0,1.691528,5.950858,4.355495
2,Synth_low,DBCV,0,0.039533,4.357423,16.296332
3,Synth_low,DCSI,0,0.971977,7.452429,2.802214
4,Synth_low,S_DBW,0,0.092163,1.915402,0.421332
...,...,...,...,...,...,...
125,HAR,DSI,9,0.593152,20.295095,21.312760
126,HAR,SILHOUETTE,9,0.061707,3.309683,10.935886
127,HAR,DUNN,9,0.078785,0.327571,5.393635
128,HAR,DB,9,3.560995,0.123580,2.029655


In [17]:
def convert_dict(df, data_dict, col, agg):
    for ((dataset, eval_method), value) in getattr(df.groupby(["dataset", "measure"])[col], agg)().to_dict().items():
        data_dict[(dataset, agg)][(eval_method, col)] = value

data_dict = defaultdict(dict)
for col, agg in [(col, agg) for col in ["value", "time", "process_time"] for agg in ["mean", "std"]]:
    convert_dict(df, data_dict, col, agg)

df_data = pd.DataFrame.from_dict(data_dict, orient="index")

In [18]:
datanames = []
for dataset in RealWorldDatasets:
    datanames.append((dataset.name, "mean"))
    datanames.append((dataset.name, "std"))

METRICS = [
    "DISCO",
    "DC_DUNN",
    ### Competitors
    "DBCV",
    "DCSI",
    "S_DBW",
    "CDBW",
    "CVDD",
    "CVNN",
    "DSI",
    ### Gauss
    "SILHOUETTE",
    "DUNN",
    "DB",
    "CH",
]

df2 = df_data
df2 = df2.reindex(datanames)
df2 = df2.reindex(columns=df2.columns.reindex(METRICS, level=0)[0])
df2 = df2.reindex(columns=df2.columns.reindex(["value"], level=1)[0])
df2 = df2.round(3)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,DISCO,DC_DUNN,DBCV,DCSI,S_DBW,CDBW,CVDD,CVNN,DSI,SILHOUETTE,DUNN,DB,CH
Unnamed: 0_level_1,Unnamed: 1_level_1,value,value,value,value,value,value,value,value,value,value,value,value,value
Synth_low,mean,0.762,1.692,0.04,0.972,0.092,0.487,0.0,1.038,0.978,0.614,1.001,0.608,47742.142
Synth_low,std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Synth_high,mean,0.811,1.867,0.037,0.959,0.309,9.357,0.0,1.743,0.814,0.498,0.467,0.831,3698.814
Synth_high,std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HAR,mean,-0.014,0.233,-0.063,0.493,0.602,0.0,0.0,40.721,0.593,0.062,0.079,3.561,3662.527
HAR,std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
letterrec.,mean,,,,,,,,,,,,,
letterrec.,std,,,,,,,,,,,,,
htru2,mean,,,,,,,,,,,,,
htru2,std,,,,,,,,,,,,,


In [19]:
from clustpy.utils import evaluation_df_to_latex_table

# evaluation_df_to_latex_table(df2, "summary.tex", color_by_value="Green")
evaluation_df_to_latex_table(df2, "summary.tex", best_in_bold=False, second_best_underlined=False, in_percent=False, decimal_places=2)

!sed -i -e 's/table/table\*/g' summary.tex
!sed -i -e 's/_/\\_/g' summary.tex
!perl -pi -e '1 while s/\$nan \\pm nan\$/-/g' summary.tex

# Remove second level of first index
!perl -pi -e 's/^(.*?& ).*?& /$1/g' summary.tex
!perl -pi -e 's/\\midrule\n//g' summary.tex
!perl -pi -e 's/(\{tabular\}\{l\|)l\|/$1/g' summary.tex
# !perl -pi -e 's/(^\\textbf\{Dataset\}.*?$)/$1\n\\midrule/g' summary.tex

# Categories
!perl -pi -e 's/(\{tabular\}\{)/$1r/g' summary.tex
!perl -pi -e 's/^(.*?& )/& $1/g' summary.tex
!perl -pi -e 's/(^& Synth\\_low)/\\midrule\n\\parbox[t]\{2mm\}\{\\multirow\{8\}\{*\}\{\\rotatebox[origin=c]\{90\}\{Tabular data\}\}\}\n$1/g' summary.tex
!perl -pi -e 's/(^& Weizmann)/\\midrule\n\\parbox[t]\{2mm\}\{\\multirow\{2\}\{*\}\{\\rotatebox[origin=c]\{90\}\{Video\}\}\}\n$1/g' summary.tex
!perl -pi -e 's/(^& COIL20)/\\midrule\n\\parbox[t]\{2mm\}\{\\multirow\{3\}\{*\}\{\\rotatebox[origin=c]\{90\}\{Image\}\}\}\n$1/g' summary.tex
!perl -pi -e 's/(^& Optdigits)/\\midrule\n\\parbox[t]\{2mm\}\{\\multirow\{5\}\{*\}\{\\rotatebox[origin=c]\{90\}\{MNIST\}\}\}\n$1/g' summary.tex

!perl -pi -e 's/(\\begin\{table\*\})/\\renewcommand\{\\arraystretch\}\{1.2\}\n\n\n$1/g' summary.tex
!perl -pi -e 's/(\\end\{table\*\}.*$)/$1\n\n\\renewcommand\{\\arraystretch\}\{1\}\n/g' summary.tex
