In [15]:
import sys
import os
import pandas as pd
from pathlib import Path
import re


def replace_cname(df, oldname, newname):
    new_names = { c: c.replace(oldname, newname) for c in df.columns if oldname in c}
    return df.rename(columns=new_names)

In [16]:
grid_search = pd.DataFrame()

RES_PATH = Path(f"../res/")
files = [os.path.join(path, name) for path, subdirs, files in os.walk(RES_PATH) for name in files]
grid_files = filter(lambda x: "gridsearch" in x, files)

for f in grid_files:
    if "gridsearch" not in f:
        continue
    df = pd.read_pickle(RES_PATH / f)
    
    additional_features = f.replace(".pkl", "").split("__")[1:]
    additional_features = map(lambda x: x.split("_"), additional_features)
    
    for f, v in additional_features:
        df[f"param_{f}"] = v
        
    grid_search = pd.concat([grid_search, df], ignore_index=True)

In [42]:
best_results = grid_search.sort_values("mean_test_score", ascending=True)
best_results = best_results.groupby("param_discretized").head(15)

best_results = best_results.drop(columns=["params", "param_feature_set", 
                                          "param_module__input_size", "param_overlapping",
                                          "param_context_len", "param_discretized"])
features_cols = [c for c in best_results.columns if "param" in c]
best_results = best_results[features_cols + ["mean_test_score"]]

best_results = replace_cname(best_results, "_", " ")
best_results = replace_cname(best_results, "param", "")
best_results = replace_cname(best_results, "module", "")

In [56]:
1e-4

0.0001

In [55]:
best_results

Unnamed: 0,batch size,lr,latent size,pool,rnn layers,rnn size,Unnamed: 7,mean test score
18,4096,0.0001,64,mean,3,64,GruLinear,0.067333
2,4096,0.0005,64,mean,3,64,GruLinear,0.06902
27,4096,0.0001,128,mean,3,128,GruLinear,0.069217
19,4096,0.0001,64,mean,3,128,GruLinear,0.070714
26,4096,0.0001,128,mean,3,64,GruLinear,0.071374
16,4096,0.0001,64,mean,1,64,GruLinear,0.072831
0,4096,0.0005,64,mean,1,64,GruLinear,0.073356
17,4096,0.0001,64,mean,1,128,GruLinear,0.075949
24,4096,0.0001,128,mean,1,64,GruLinear,0.076982
3,4096,0.0005,64,mean,3,128,GruLinear,0.077747


In [54]:
latex_str = best_results.to_latex(index=False, float_format="{:0.3f}".format)
print(re.sub(' +',' ', latex_str))

\begin{tabular}{lllllllr}
\toprule
 batch size & lr & latent size & pool & rnn layers & \multicolumn{2}{l}{rnn size} & mean test score \\
\midrule
 4096 & 0.000 & 64 & mean & 3 & 64 & GruLinear & 0.067 \\
 4096 & 0.001 & 64 & mean & 3 & 64 & GruLinear & 0.069 \\
 4096 & 0.000 & 128 & mean & 3 & 128 & GruLinear & 0.069 \\
 4096 & 0.000 & 64 & mean & 3 & 128 & GruLinear & 0.071 \\
 4096 & 0.000 & 128 & mean & 3 & 64 & GruLinear & 0.071 \\
 4096 & 0.000 & 64 & mean & 1 & 64 & GruLinear & 0.073 \\
 4096 & 0.001 & 64 & mean & 1 & 64 & GruLinear & 0.073 \\
 4096 & 0.000 & 64 & mean & 1 & 128 & GruLinear & 0.076 \\
 4096 & 0.000 & 128 & mean & 1 & 64 & GruLinear & 0.077 \\
 4096 & 0.001 & 64 & mean & 3 & 128 & GruLinear & 0.078 \\
 4096 & 0.000 & 128 & mean & 1 & 128 & GruLinear & 0.081 \\
 4096 & 0.001 & 64 & mean & 1 & 128 & GruLinear & 0.082 \\
 4096 & 0.001 & 128 & last & 3 & 128 & GruLinear & 0.082 \\
 4096 & 0.001 & 128 & mean & 3 & 64 & GruLinear & 0.082 \\
 4096 & 0.001 & 128 & last &