In [1]:
# Stdlib imports
import json
import os
from typing import List, Tuple

# library imports
import numpy as np
import pandas as pd

In [2]:
# Load metadata
PARAMETER_PATH = os.getenv("PARAMETER_PATH")
EXPERIMENT_PATH = os.getenv("EXPERIMENT_PATH")

if PARAMETER_PATH == None: PARAMETER_PATH = "../../parameters"
if EXPERIMENT_PATH == None: EXPERIMENT_PATH = "../../experiments"

In [25]:
def get_rows(params, columns) -> List[Tuple[str]]: 

    
    model_path = os.path.join(EXPERIMENT_PATH, params["name"])
    f = [x[:-3] for x in next(os.walk(model_path))[2] if x.endswith(".sd")]
    f = [x for x in f if "step" in x]
    model_sd_files = [x + ".sd" for x in f]
    model_json_files = [x + ".json" for x  in f]
    
    x = [params[x] for x in columns]
    x.append(model_path)
    
    model_json_files
    
    res = []
    for model_file, json_file in zip(model_sd_files, model_json_files):
        assert model_file[:-3] == json_file[:-5], f"{model_file} != {json_file}"
        y = x.copy()
        y.append(model_file)
    
        
        # TODO here
        json_file = os.path.join(model_path, json_file)
        json_file = json.load(open(json_file, mode='r'))
        train_performance = json_file["train_performance"]
        test_performance = json_file["test_performance"]
        
        y.append(train_performance)
        y.append(test_performance)
        
        res.append(y)
    return res

In [33]:
# Load all sets of parameters
parameter_files = next(os.walk(PARAMETER_PATH))[2]
parameter_files = [x for x in parameter_files if x.endswith(".json")]
print(f"[INFO] Found {len(parameter_files)} parameter files in '{PARAMETER_PATH}'.")
print()

parameter_sets = [json.load(open(os.path.join(PARAMETER_PATH, x), mode='r')) for x in parameter_files]

columns = ["name", "learning_algorithm", "reward_function", "representation_generator"]

rows = []
for p in parameter_sets:
    rows.extend(get_rows(p, columns))
    
# Update columns
columns.extend(["model_path", "model_file", "train_performance", "test_performance"])
columns[0] = "parameter_name"
    
masterframe = pd.DataFrame(rows, columns=columns)

masterframe.sort_values("train_performance")

[INFO] Found 8 parameter files in '../../parameters'.



Unnamed: 0,parameter_name,learning_algorithm,reward_function,representation_generator,model_path,model_file,train_performance,test_performance
26,params_004,REINFORCE,binary_reward,quantile_data_sketch,../../experiments/params_004,-6647563406032993054step_0000.sd,-0.015138,0.018936
7,params_007,REINFORCE,staggered_reward,random_sampling,../../experiments/params_007,-7286741115737232906step_0050.sd,-0.004166,0.010557
88,params_003,PPO,staggered_reward,random_sampling,../../experiments/params_003,7013026371365559731step_0025.sd,-0.002037,0.008928
97,params_003,PPO,staggered_reward,random_sampling,../../experiments/params_003,7013026371365559731step_0000.sd,-0.001524,0.009447
136,params_002,PPO,staggered_reward,quantile_data_sketch,../../experiments/params_002,-6114245208928726178step_0400.sd,0.000816,-0.002426
...,...,...,...,...,...,...,...,...
4,params_007,REINFORCE,staggered_reward,random_sampling,../../experiments/params_007,-7286741115737232906step_0175.sd,0.030132,0.019646
73,params_005,REINFORCE,binary_reward,random_sampling,../../experiments/params_005,-3142818812113644152step_0375.sd,0.032159,0.013321
78,params_005,REINFORCE,binary_reward,random_sampling,../../experiments/params_005,-3142818812113644152step_0275.sd,0.034983,0.018905
76,params_005,REINFORCE,binary_reward,random_sampling,../../experiments/params_005,-3142818812113644152step_0150.sd,0.036306,0.006428


In [32]:
masterframe["parameter_name"].value_counts()

params_007    21
params_004    21
params_006    21
params_005    21
params_003    21
params_000    21
params_002    16
params_001     2
Name: parameter_name, dtype: int64

In [6]:
df = masterframe[["parameter_name", "test_performance"]]

topperformer = df.groupby("parameter_name").mean().rename(columns={"test_performance": "mean"})
b = df.groupby("parameter_name").max()

topperformer["max"] = b
topperformer.reset_index(inplace=True)

topperformer.sort_values("mean")

display(topperformer)

Unnamed: 0,parameter_name,mean,max
0,params_000,0.0,0.0
1,params_001,0.0,0.0
2,params_002,0.0,0.0
3,params_003,0.0,0.0
4,params_004,0.0,0.0
5,params_005,0.0,0.0
6,params_006,0.0,0.0
7,params_007,0.0,0.0


In [166]:
parameter_names = a["parameter_name"].unique()

In [164]:
selector = df["parameter_name"] == "params_004"

# Given a selector, select the model with the best performance

max_test = masterframe[selector]["test_performance"].max()

display(max_test)

0.0

In [155]:

best_parameter = masterframe[selector][masterframe["test_performance"] == max_test].iloc[0]

best_parameter

level_0                                                    0
index                                                      0
parameter_name                                    params_004
learning_algorithm                                 REINFORCE
reward_function                                binary_reward
representation_generator                quantile_data_sketch
model_path                      ../../experiments/params_004
model_file                  -4437039769131941435step_0000.sd
train_performance                                          0
test_performance                                           0
Name: 0, dtype: object

In [45]:
masterframe[["parameter_name", "model_path", "test_performance"]].sort_values("test_performance", ascending=False).head(30)

Unnamed: 0,parameter_name,model_path,test_performance
1,params_007,../../experiments/params_007,0.029481
21,params_004,../../experiments/params_004,0.027959
13,params_007,../../experiments/params_007,0.02793
6,params_007,../../experiments/params_007,0.027215
17,params_007,../../experiments/params_007,0.026696
5,params_007,../../experiments/params_007,0.025638
23,params_004,../../experiments/params_004,0.024012
18,params_007,../../experiments/params_007,0.02374
36,params_004,../../experiments/params_004,0.023696
34,params_004,../../experiments/params_004,0.023029


In [50]:
# local imports
from src.util.experiment import Experiment, load_savepoint


model_dir = "../../experiments/params_007/"
model_path = "-7286741115737232906.exp"

# Get the experiment file
files = next(os.walk(model_dir))[2]
files = [x for x in files if x.endswith(".exp")]
assert len(files) == 1, "To many experiment files"

# Load experiment
exp


ModuleNotFoundError: No module named 'src'