## Converts `/usr/bin/time` output to `csv` file

In [1]:
# -*- coding: utf-8 -*-

import pandas as pd
import os

In [2]:
DATASET = [
    "banana",
    "breast_cancer",
    "diabetis",
    "flare_solar",
    "german",
    "heart",
    "image",
    "ringnorm",
    "splice",
    "thyroid",
    "titanic",
    "twonorm",
    "waveform",
]

BOOSTERS = ["lpb", "erlpb", "mlpb(ss_only)", "mlpb(pfw_only)", "mlpb(ss)", "mlpb(pfw)"]

PATH = "./time"

FOLD_SIZE = 5

TIME_LIMIT = 20_000

shape_df = pd.read_csv("./benchmark_summary.csv", index_col="Dataset")
shape_df

Unnamed: 0_level_0,Size,Feature
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1
banana,5300,3
breast_cancer,263,10
diabetis,768,9
flare_solar,144,10
german,1000,21
heart,270,14
image,2086,19
ringnorm,7400,21
splice,2991,61
thyroid,215,6


In [3]:
keywords = [
    "User time (seconds): ",
    "System time (seconds): ",
]


ss_reached_to_limit = dict([(file, []) for file in DATASET])
pfw_reached_to_limit = dict([(file, []) for file in DATASET])

columns = dict()

for booster in BOOSTERS:
    columns[booster] = list()


for booster in BOOSTERS:
    
    if booster == "mlpb":
        prefix = f"{PATH}/pairwise/{booster}"
    else:
        prefix = f"{PATH}/{booster}"


    for data in DATASET:
        acc = 0.0
        for ratio in [f"0.{k+1}" for k in range(FOLD_SIZE)]:
            file = f"{prefix}_{data}_{ratio}.txt"

            if os.stat(file).st_size == 0:
                acc += TIME_LIMIT
                if booster == "mlpb(ss_only)":
                    ss_reached_to_limit[data].append(ratio)
                elif booster == "mlpb(pfw_only)":
                    pfw_reached_to_limit[data].append(ratio)
                else:
                    print("Something is wrong")
            else:
                with open(file) as f:
                    lines = filter(lambda line: any([keyword in line for keyword in keywords]), f.readlines())

                    lines = list(map(lambda line: float(line.split(': ')[-1].strip()), lines))


                    acc += sum(lines)

        columns[booster].append(acc / FOLD_SIZE)


print("-----")
print("REACHED TO THE TIME LIMIT (SS)")
for k, v in ss_reached_to_limit.items():
    if len(v) == 0:
        continue
    print(f"\t{k:<8}: {v}")
print("-----")
print("REACHED TO THE TIME LIMIT (PFW)")

is_empty = True
for k, v in pfw_reached_to_limit.items():
    if len(v) != 0:
        is_empty = False
        print(f"\t{k:<8}: {v}")

if is_empty:
    print("\tNone")
print("-----")
    
df = pd.DataFrame(data=columns, index=DATASET)


df.head(15)

-----
REACHED TO THE TIME LIMIT (SS)
	banana  : ['0.1', '0.2']
	german  : ['0.1', '0.2', '0.3']
	ringnorm: ['0.1', '0.2']
	twonorm : ['0.1', '0.2', '0.3', '0.4', '0.5']
	waveform: ['0.1', '0.2', '0.3', '0.4']
-----
REACHED TO THE TIME LIMIT (PFW)
	None
-----


Unnamed: 0,lpb,erlpb,mlpb(ss_only),mlpb(pfw_only),mlpb(ss),mlpb(pfw)
banana,168.256,3434.746,13899.056,31453.072,1418.406,1398.678
breast_cancer,3.606,73.452,180.162,270.498,23.426,19.814
diabetis,47.528,1478.768,10102.338,3471.766,201.458,270.508
flare_solar,2.296,2.456,13.336,80.732,31.638,46.452
german,77.56,1391.908,15063.942,5692.322,181.432,201.88
heart,10.028,193.576,2089.018,183.09,44.108,24.258
image,8.252,107.524,3698.038,502.832,32.01,10.514
ringnorm,22.09,1148.162,15013.532,3350.872,26.758,36.728
splice,19.346,490.916,10317.42,943.978,122.082,37.876
thyroid,0.7,0.662,367.514,0.352,2.708,0.614


In [4]:
ss_limits = {
    "banana":   ">10^4",
    "diabetis": ">10^4",
    "german":   ">10^4",
    "splice":   ">10^4",
    "heart":    ">10^3",
    "image":    ">10^3",
    "ringnorm": ">10^4",
    "twonorm":  ">10^4",
    "waveform": ">10^4",
}

pfw_limits = {
    "banana":   ">10^4",
    "waveform": ">10^4",
}

# ss_limits = {
#     "banana":   "TLE",
#     "diabetis": ">10^4",
#     "german":   "TLE",
#     "splice":   ">10^4",
#     "heart":    ">10^3",
#     "image":    ">10^3",
#     "ringnorm": "TLE",
#     "twonorm":  "TLE",
#     "waveform": "TLE",
# }

# pfw_limits = {
#     "banana":   "TLE",
#     "waveform": "TLE",
# }

In [5]:
ROW_NAMES = {
    "banana":        "Banana",
    "breast_cancer": "B.Cancer",
    "diabetis":      "Diabetes",
    "flare_solar":   "F.Solar",
    "german":        "German",
    "heart":         "Heart",
    "image":         "Image",
    "ringnorm":      "R.norm",
    "splice":        "Splice",
    "thyroid":       "Thyroid",
    "titanic":       "Titanic",
    "twonorm":       "Twonorm",
    "waveform":      "Waveform",
}

COL_NAMES = {
    "lpb": "LPB.",
    "erlpb": "ERLPB.",
    "mlpb(ss)": "MLPB. (SS)",
    "mlpb(pfw)": "MLPB. (PFW)",
    "mlpb(ss_only)": "MLPB. (SS only)",
    "mlpb(pfw_only)": "MLPB. (PFW only)"

}

MAX_ROW_SIZE = max([len(name) for name in ROW_NAMES.values()])
MAX_COL_SIZE = max([len(name) for name in COL_NAMES.values()])-5

## Create a `TeX` table (Including PFW and SS)

In [6]:
ROW_NAMES = {
    "banana":        "Banana",
    "breast_cancer": "B.Cancer",
    "diabetis":      "Diabetes",
    "flare_solar":   "F.Solar",
    "german":        "German",
    "heart":         "Heart",
    "image":         "Image",
    "ringnorm":      "R.norm",
    "splice":        "Splice",
    "thyroid":       "Thyroid",
    "titanic":       "Titanic",
    "twonorm":       "Twonorm",
    "waveform":      "Waveform",
}

COL_NAMES = {
    "lpb":            "\\begin{tabular}{c} LPB. \\end{tabular}",
    "erlpb":          "\\begin{tabular}{c} ERLPB. \\end{tabular}",
    "mlpb(ss)":       "\\begin{tabular}{c} MLPB. \\\\ (SS) \\end{tabular}",
    "mlpb(pfw)":      "\\begin{tabular}{c} MLPB. \\\\ (PFW) \\end{tabular}",
    "mlpb(ss_only)":  "\\begin{tabular}{c} MLPB. \\\\ (SS only) \\end{tabular}",
    "mlpb(pfw_only)": "\\begin{tabular}{c} MLPB. \\\\ (PFW only) \\end{tabular}",
}


MAX_ROW_SIZE = max([len(name) for name in ROW_NAMES.values()])
MAX_COL_SIZE = 16

SHAPE_SIZE = 12

COL_ALIGNMENT = "{lr" + ("r" * len(COL_NAMES)) + "}\n"


PREFIX = "\\begin{tabular}" + COL_ALIGNMENT + "    \\toprule\n"
SUFFIX = "    \\bottomrule\n\\end{tabular}"


# ++++++
# HEADER
SHAPE = "\\begin{tabular}{c} Shape \\end{tabular}"
# HEADER = " " * MAX_ROW_SIZE + f"& {SAMPLE:>{MAX_COL_SIZE}} & {FEATURE:>{MAX_COL_SIZE}}"
HEADER = " " * MAX_ROW_SIZE + f"& {SHAPE:>{SHAPE_SIZE}}"
for booster in BOOSTERS:
    HEADER = f"{HEADER} & {COL_NAMES[booster]:>{MAX_COL_SIZE}}"
HEADER = f"    {HEADER}\n \\\\ \\midrule \\addlinespace[0.5em]\n"

contents = ""

# ++++++
# TABLE
for index, item in df.iterrows():
    # row = "\\addlinespace[0.5em]\n"
    row = f"    {ROW_NAMES[index]:<{MAX_ROW_SIZE}}"
    # row += f"& {shape_df['Size'][index]:>{MAX_COL_SIZE}} & {shape_df['Feature'][index]:>{MAX_COL_SIZE}}"
    m = shape_df["Size"][index]
    d = shape_df["Feature"][index]
    shape = f"$({m}, {d})$"
    row += f"& {shape:>{SHAPE_SIZE}}"
    for booster in BOOSTERS:
        s = f"${item[booster]:>.2f}$"
        if booster == "mlpb(ss_only)" and index in ss_limits:
            s = f"{ss_limits[index]}"
            if s != "TLE":
                s = f"${s}$"
        elif booster == "mlpb(pfw_only)" and index in pfw_limits:
            s = f"{pfw_limits[index]}"
            if s != "TLE":
                s = f"${s}$"

        row = f"{row} & {s:>{MAX_COL_SIZE}}"
    row = f"{row}\n        \\\\ \\addlinespace[0.5em]\n"
    contents += row


contents = PREFIX + HEADER + contents + SUFFIX
print(contents)


file = "./../table/time.tex"
with open(file, mode='w') as f:
    f.write(contents)


\begin{tabular}{lrrrrrrr}
    \toprule
            & \begin{tabular}{c} Shape \end{tabular} & \begin{tabular}{c} LPB. \end{tabular} & \begin{tabular}{c} ERLPB. \end{tabular} & \begin{tabular}{c} MLPB. \\ (SS only) \end{tabular} & \begin{tabular}{c} MLPB. \\ (PFW only) \end{tabular} & \begin{tabular}{c} MLPB. \\ (SS) \end{tabular} & \begin{tabular}{c} MLPB. \\ (PFW) \end{tabular}
 \\ \midrule \addlinespace[0.5em]
    Banana  &  $(5300, 3)$ &         $168.26$ &        $3434.75$ &          $>10^4$ &          $>10^4$ &        $1418.41$ &        $1398.68$
        \\ \addlinespace[0.5em]
    B.Cancer&  $(263, 10)$ &           $3.61$ &          $73.45$ &         $180.16$ &         $270.50$ &          $23.43$ &          $19.81$
        \\ \addlinespace[0.5em]
    Diabetes&   $(768, 9)$ &          $47.53$ &        $1478.77$ &          $>10^4$ &        $3471.77$ &         $201.46$ &         $270.51$
        \\ \addlinespace[0.5em]
    F.Solar &  $(144, 10)$ &           $2.30$ &           $2.46$ 