In [41]:
import sys
import os
import pandas as pd
from pathlib import Path
import re


def replace_cname(df, oldname, newname):
    new_names = { c: c.replace(oldname, newname).strip() for c in df.columns if oldname in c}
    return df.rename(columns=new_names)

In [45]:
grid_search = pd.DataFrame()

RES_PATH = Path(f"../res/")
files = [os.path.join(path, name) for path, subdirs, files in os.walk(RES_PATH) for name in files]
grid_files = filter(lambda x: "gridsearch" in x, files)

for f in grid_files:
    if "gridsearch" not in f:
        continue
    df = pd.read_pickle(RES_PATH / f)
    
    additional_features = f.replace(".pkl", "").split("__")[1:]
    additional_features = map(lambda x: x.split("_"), additional_features)
    
    for f, v in additional_features:
        df[f"param_{f}"] = v
        
    grid_search = pd.concat([grid_search, df], ignore_index=True)

In [46]:
features_cols = [ c for c in grid_search.columns if "param_" in c ]
grid_search = grid_search[features_cols + ["mean_test_score"]]

grid_search = grid_search.rename(columns={"param_module": "architecture"})
grid_search = replace_cname(grid_search, "_", " ")
grid_search = replace_cname(grid_search, "param", "")
grid_search = replace_cname(grid_search, "module", "")

best_results = grid_search.sort_values("mean test score", ascending=True)
best_results = best_results.groupby("pool").head(5)

In [47]:

best_results = best_results.drop(["input size", "feature set", "discretized", "architecture", "context len", "overlapping"], axis=1)
best_results = best_results[best_results["pool"]=="last"]


In [48]:
latex_str = best_results.head(5).to_latex(index=False, float_format="{:0.4f}".format)
print(re.sub(' +',' ', latex_str))

\begin{tabular}{llllllr}
\toprule
batch size & lr & latent size & pool & rnn layers & rnn size & mean test score \\
\midrule
 4096 & 0.0005 & 128 & last & 3 & 128 & 0.0820 \\
 4096 & 0.0005 & 128 & last & 3 & 64 & 0.0822 \\
 4096 & 0.0005 & 64 & last & 3 & 128 & 0.0841 \\
 4096 & 0.0001 & 64 & last & 3 & 128 & 0.0849 \\
 4096 & 0.0001 & 64 & last & 3 & 64 & 0.0852 \\
\bottomrule
\end{tabular}

