In [66]:
import sys
import os
import pandas as pd
from pathlib import Path
import re


def replace_cname(df, oldname, newname):
    new_names = { c: c.replace(oldname, newname).strip() for c in df.columns if oldname in c}
    return df.rename(columns=new_names)

In [67]:
grid_search = pd.DataFrame()

RES_PATH = Path(f"../res/")
files = [os.path.join(path, name) for path, subdirs, files in os.walk(RES_PATH) for name in files]
grid_files = filter(lambda x: "gridsearch" in x, files)

for f in grid_files:
    if "gridsearch" not in f:
        continue
    df = pd.read_pickle(RES_PATH / f)
    
    additional_features = f.replace(".pkl", "").split("__")[1:]
    additional_features = map(lambda x: x.split("_"), additional_features)
    
    for f, v in additional_features:
        df[f"param_{f}"] = v
        
    grid_search = pd.concat([grid_search, df], ignore_index=True)

In [68]:
features_cols = [ c for c in grid_search.columns if "param_" in c ]
grid_search = grid_search[features_cols + ["mean_test_score"]]

grid_search = grid_search.rename(columns={"param_module": "architecture"})
grid_search = replace_cname(grid_search, "_", " ")
grid_search = replace_cname(grid_search, "param", "")
grid_search = replace_cname(grid_search, "module", "")

best_results = grid_search.sort_values("mean test score", ascending=True)
best_results = best_results.groupby("pool").head(5)

In [69]:
latex_str = best_results.to_latex(index=False, float_format="{:0.3f}".format)
print(re.sub(' +',' ', latex_str))

\begin{tabular}{llllllllrrllr}
\toprule
batch size & lr & input size & latent size & pool & rnn layers & rnn size & feature set & overlapping & context len & discretized & architecture & mean test score \\
\midrule
 4096 & 0.000 & 19 & 64 & mean & 3 & 64 & MAGIK & 0.950 & 80 & False & GruLinear & 0.067 \\
 4096 & 0.001 & 19 & 64 & mean & 3 & 64 & MAGIK & 0.950 & 80 & False & GruLinear & 0.069 \\
 4096 & 0.000 & 19 & 128 & mean & 3 & 128 & MAGIK & 0.950 & 80 & False & GruLinear & 0.069 \\
 4096 & 0.000 & 19 & 64 & mean & 3 & 128 & MAGIK & 0.950 & 80 & False & GruLinear & 0.071 \\
 4096 & 0.000 & 19 & 128 & mean & 3 & 64 & MAGIK & 0.950 & 80 & False & GruLinear & 0.071 \\
 4096 & 0.001 & 19 & 128 & last & 3 & 128 & MAGIK & 0.950 & 80 & False & GruLinear & 0.082 \\
 4096 & 0.001 & 19 & 128 & last & 3 & 64 & MAGIK & 0.950 & 80 & False & GruLinear & 0.082 \\
 4096 & 0.001 & 19 & 64 & last & 3 & 128 & MAGIK & 0.950 & 80 & False & GruLinear & 0.084 \\
 4096 & 0.000 & 19 & 64 & last & 3 & 128 