In [1]:
# Stdlib imports
import json
import os
import warnings
from typing import List, Tuple
import traceback

# library imports
import numpy as np
import pandas as pd

# local imports
from src.agent.rollout import evaluate, transform_dataset
from src.util.dataset_loader import load_datasets
from src.util.experiment import Experiment, load_savepoint
from src.util.score import evaluate as ev_function
from src.transformations import real

In [2]:
# Load metadata
PARAMETER_PATH = os.getenv("PARAMETER_PATH")
EXPERIMENT_PATH = os.getenv("EXPERIMENT_PATH")
DATA_PATH = os.getenv("DATA_PATH")
VALIDATION_DATA_PATH = os.getenv("VALIDATION_DATA_PATH")

# Set defaults
if PARAMETER_PATH == None: PARAMETER_PATH = "/home/tobias/ma/test/parameters"
if EXPERIMENT_PATH == None: EXPERIMENT_PATH = "/home/tobias/ma/test/experiments"
if DATA_PATH == None: DATA_PATH = "/home/tobias/ma/data"
if VALIDATION_DATA_PATH == None: VALIDATION_DATA_PATH = "/home/tobias/ma/data/validation"


In [3]:
transformations = real.all_transformations

In [4]:
def get_rows(params, columns) -> List[Tuple[str]]: 

    
    model_path = os.path.join(EXPERIMENT_PATH, params["name"])
    
    if not os.path.exists(model_path):
        print[f"[ERROR] Path ''{model_path}' does not exist.'"]
    
    
    f = [x[:-3] for x in next(os.walk(model_path))[2] if x.endswith(".sd")]
    f = [x for x in f if "step" in x]
    model_sd_files = [x + ".sd" for x in f]
    model_json_files = [x + ".json" for x  in f]
   
    #  Load shared
    x = [params[x] for x in columns]
    x.append(model_path)
    
    x.append(params["policy_parameters"]["enable_local_context"])
    x.append(params["policy_parameters"]["enable_domain_information"])
    
    
    # Load train step specific
    res = []
    for model_file, json_file in zip(model_sd_files, model_json_files):
        assert model_file[:-3] == json_file[:-5], f"{model_file} != {json_file}"
        y = x.copy()
        y.append(model_file)
    
        # TODO here
        json_file = os.path.join(model_path, json_file)
        json_file = json.load(open(json_file, mode='r'))

        
        
        train_performance = json_file["train_performance"]
        test_performance = json_file["test_performance"]
        
        y.append(train_performance)
        y.append(test_performance)
        
        res.append(y)
    return res

In [5]:
# Load all sets of parameters
parameter_files = next(os.walk(PARAMETER_PATH))[2]
parameter_files = [x for x in parameter_files if x.endswith(".json")]

print(f"[INFO] Found {len(parameter_files)} parameter files in '{PARAMETER_PATH}'.")
print()

parameter_sets = [json.load(open(os.path.join(PARAMETER_PATH, x), mode='r')) for x in parameter_files]

columns = ["name", "learning_algorithm", "reward_function",  "representation_generator"]

rows = []
for p in parameter_sets:
    rows.extend(get_rows(p, columns))

# Update columns
columns.extend(["model_path","enable_local_context", "enable_domain_information", "model_file", "train_performance", "test_performance"])
columns[0] = "parameter_name"
    
masterframe = pd.DataFrame(rows, columns=columns)

masterframe.sort_values("train_performance")

[INFO] Found 14 parameter files in '/home/tobias/ma/test/parameters'.



Unnamed: 0,parameter_name,learning_algorithm,reward_function,representation_generator,model_path,enable_local_context,enable_domain_information,model_file,train_performance,test_performance
42,params_004,REINFORCE,binary_reward,quantile_data_sketch,/home/tobias/ma/test/experiments/params_004,False,False,-2795312369091581638step_0025.sd,-0.026787,0.002246
98,params_005,REINFORCE,binary_reward,random_sampling,/home/tobias/ma/test/experiments/params_005,False,False,-3204051483301470282step_0050.sd,-0.015515,0.006007
164,params_005_01,REINFORCE,binary_reward,random_sampling,/home/tobias/ma/test/experiments/params_005_01,True,False,-8764017441242044382step_0000.sd,-0.007299,0.004947
70,params_006,REINFORCE,staggered_reward,quantile_data_sketch,/home/tobias/ma/test/experiments/params_006,False,False,4186393700938150504step_0000.sd,-0.006822,0.019954
87,params_005,REINFORCE,binary_reward,random_sampling,/home/tobias/ma/test/experiments/params_005,False,False,-3204051483301470282step_0125.sd,-0.002613,0.007962
...,...,...,...,...,...,...,...,...,...,...
59,params_004,REINFORCE,binary_reward,quantile_data_sketch,/home/tobias/ma/test/experiments/params_004,False,False,-2795312369091581638step_0250.sd,0.027599,0.001565
88,params_005,REINFORCE,binary_reward,random_sampling,/home/tobias/ma/test/experiments/params_005,False,False,-3204051483301470282step_0225.sd,0.029027,0.013610
52,params_004,REINFORCE,binary_reward,quantile_data_sketch,/home/tobias/ma/test/experiments/params_004,False,False,-2795312369091581638step_0375.sd,0.030385,0.006081
53,params_004,REINFORCE,binary_reward,quantile_data_sketch,/home/tobias/ma/test/experiments/params_004,False,False,-2795312369091581638step_0400.sd,0.031416,0.001708


In [6]:
masterframe["parameter_name"].value_counts()

params_007       21
params_003_10    21
params_004       21
params_003_11    21
params_001       21
params_006       20
params_003       20
params_005       19
params_000       19
params_002       19
params_005_01     1
params_003_01     1
Name: parameter_name, dtype: int64

In [7]:
df = masterframe[["parameter_name", "test_performance"]]

topperformer = df.groupby("parameter_name").mean().rename(columns={"test_performance": "mean"})
b = df.groupby("parameter_name").max()

topperformer["max"] = b
topperformer.reset_index(inplace=True)

topperformer.sort_values("mean")

display(topperformer)

Unnamed: 0,parameter_name,mean,max
0,params_000,0.008986,0.018923
1,params_001,0.019896,0.024472
2,params_002,0.016909,0.029514
3,params_003,0.006373,0.009736
4,params_003_01,-0.003713,-0.003713
5,params_003_10,0.006144,0.016859
6,params_003_11,0.016756,0.024442
7,params_004,0.00386,0.007718
8,params_005,0.010905,0.015994
9,params_005_01,0.004947,0.004947


In [8]:
# Example code on how to evaluate specific models

# local imports
# model_file_name = "-3841776332189409403step_0000.sd"

# Get the model parameters
# candidates = masterframe[masterframe["model_file"] == model_file_name]
# assert candidates.shape[0] == 1, f"1 != {candidates.shape}"
# candidate = candidates.iloc[0]

# model_dir = os.path.join(EXPERIMENT_PATH, candidate["parameter_name"])

# Get the experiment file
# files = next(os.walk(model_dir))[2]
# files = [x for x in files if x.endswith(".exp")]
# assert len(files) == 1, "To many experiment files"

# Load experiment with good parameters
# exp = load_savepoint(model_dir)
# exp.load_agent_parameters(os.path.join(model_dir, model_file_name))

In [9]:
validation_data = load_datasets(VALIDATION_DATA_PATH)
evaluation_function = lambda x,c: ev_function(dataset=x, context=c, modeltype="decision_tree")
tfds = lambda ds, fv, experiment: transform_dataset(
    root_dataset = ds,
    feature_values = fv,
    evaluation_function = evaluation_function,
    experiment = experiment,
    predict = lambda agent, repre: agent(repre)
)

In [10]:
# Demo
if False:
    overall = []
    for vdata, context in validation_data:
        res = [tfds(vdata, context) for _ in range(10)]
        res = np.array(res)
    
        root_performance = evaluation_function(vdata,context, experiment)
        overall.append(res.mean() - root_performance)
        print(
            "| ",
            vdata.name.rjust(20), " | ",
            f"{root_performance:1.3f} | ",
            f"{res.mean():1.3f}+-{res.std():1.3f}"
        )
    

In [11]:
def get_mean_validation_performance(model_dir, model_file_name):
    # Get the experiment file
    files = next(os.walk(model_dir))[2]
    files = [x for x in files if x.endswith(".exp")]
    assert len(files) == 1, "To many experiment files"

    # Load experiment with good parameters
    exp = load_savepoint(model_dir)
    
    # Hack, can be removed later
    exp.dir_path = model_dir
    
    exp.load_agent_parameters(os.path.join(model_dir, model_file_name))
    
    
    overall = []
    for vdata, context in validation_data:
        res = []
        for _ in range(5):
            tmp = []
            for _ in range(5):
                try:
                    tmp.append(tfds(vdata, context, exp))
                except:
                    print("ERROR")
                    traceback.print_exc()
                    
            assert len(tmp) > 0
            res.append(max(tmp))
        res = np.array(res)
    
        root_performance = evaluation_function(vdata,context)
        overall.append(res.mean() - root_performance)
    
    overall = np.array(overall)
    return overall.mean(), overall.std()

In [12]:

experiment_a_table_1 = [
    {"reward_function": "binary_reward", "representation_generator": "random_sampling", "learning_algorithm": "REINFORCE"},
    {"reward_function": "binary_reward", "representation_generator": "random_sampling", "learning_algorithm": "PPO"},
    {"reward_function": "staggered_reward", "representation_generator": "random_sampling", "learning_algorithm": "REINFORCE"},
    {"reward_function": "staggered_reward", "representation_generator": "random_sampling", "learning_algorithm": "PPO"},
    {"reward_function": "binary_reward", "representation_generator": "quantile_data_sketch", "learning_algorithm": "REINFORCE"},
    {"reward_function": "binary_reward", "representation_generator": "quantile_data_sketch", "learning_algorithm": "PPO"},
    {"reward_function": "staggered_reward", "representation_generator": "quantile_data_sketch", "learning_algorithm": "REINFORCE"},
    {"reward_function": "staggered_reward", "representation_generator": "quantile_data_sketch", "learning_algorithm": "PPO"},    
]

experiment_a_table_2 = experiment_a_table_1.copy()

experiment_b = [
    {"enable_local_context": False, "enable_domain_information": False},
    {"enable_local_context": True, "enable_domain_information": False},
    {"enable_local_context": False, "enable_domain_information": True},
    {"enable_local_context": True, "enable_domain_information": True},
]



In [13]:
def select_best(a):
    """
        Select best model for a given set of parameters.
    """
    selectors = []
    for x in a:
        selectors.append(masterframe[x] == a[x])
    
    if len(a) == 0:
        selected = masterframe
    else:
        selector = selectors[0]
        for s in selectors[1:]: selector = selector & s
        selected = masterframe[selector]
    
    max_value = selected["test_performance"].max()

    chosen_model = selected[selected["test_performance"] == max_value]
    chosen_model = chosen_model.iloc[0,:]
    return chosen_model

In [14]:
def latexify(s: str)-> str:
    return s.replace("_", "\\_")
    
def latex_table_row(lst) -> str:
    s = ""
    for i in lst:
        s += f"{latexify(i)} & "
        
    # Cut off last &
    s = s[:-2]
    s += "\\\\"
        
    return s
    
def evaluate_model(params):
    # Get best model with given parameters
    best_model = select_best(params)
    
    # Load dir and filename
    model_dir = best_model["model_path"]
    model_file_name = best_model["model_file"]
    
    # Generate values
    mean, std = get_mean_validation_performance(model_dir, model_file_name)
        
    c1 = best_model["reward_function"]
    c2 = best_model["representation_generator"]
    c3 = best_model["learning_algorithm"]
    return [ c1, c2, c3, f"{mean:1.3f}$\pm${std:1.3f}"]
    

In [15]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    # Do stuff here

    print("Experiment A Tabelle 1")
    for p in experiment_a_table_1:
        #s = evaluate_model(p)
        #s = latex_table_row(s)
        #print(s)
        pass
        

Experiment A Tabelle 1


In [16]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    # Do stuff here

    print("Experiment A Tabelle 1")
    for p in experiment_b:
        #s = evaluate_model(p)
        #s = latex_table_row(s)
        print(p)
        

Experiment A Tabelle 1
{'enable_local_context': False, 'enable_domain_information': False}
{'enable_local_context': True, 'enable_domain_information': False}
{'enable_local_context': False, 'enable_domain_information': True}
{'enable_local_context': True, 'enable_domain_information': True}


In [17]:
def get_mean_dataset(model_dir, model_file_name, dataset, context, a=3, b=5):
    # Get the experiment file
    files = next(os.walk(model_dir))[2]
    files = [x for x in files if x.endswith(".exp")]
    assert len(files) == 1, "To many experiment files"

    # Load experiment with good parameters
    exp = load_savepoint(model_dir)
    
    # Hack, can be removed later
    exp.dir_path = model_dir
    
    exp.load_agent_parameters(os.path.join(model_dir, model_file_name))
    
    
    res = []
    for _ in range(a):
        tmp = []
        for _ in range(b):
            try:
                tmp.append(tfds(dataset, context, exp))
            except:
                print("ERROR")
                traceback.print_exc()
                
        assert len(tmp) > 0
        res.append(max(tmp))
    res = np.array(res)
    
    root_performance = evaluation_function(dataset,context)
    
    print(dataset.name, res.mean(), res.std(), root_performance)
    

# Get best model
best_model = select_best({})

# Load dir and filename
model_dir = best_model["model_path"]
model_file_name = best_model["model_file"]

data = load_datasets(DATA_PATH)
relevant = []
for ds in [x for x in data if x[0].name in relevant]:
    get_mean_dataset(model_dir, model_file_name, ds[0], ds[1])

In [18]:
# Get best model
best_model = select_best({})

# Load dir and filename
model_dir = best_model["model_path"]
model_file_name = best_model["model_file"]

data = load_datasets(DATA_PATH)
relevant = []
for ds in [x for x in data if x[0].name in relevant]:
    get_mean_dataset(model_dir, model_file_name, ds[0], ds[1])

In [19]:
# Get best model
best_model = select_best({})

# Load dir and filename
model_dir = best_model["model_path"]
model_file_name = best_model["model_file"]

data = load_datasets(DATA_PATH)

print([x[0].name for x in data])

relevant = ["default_credit"]
for ds in [x for x in data if x[0].name in relevant]:
    get_mean_dataset(model_dir, model_file_name, ds[0], ds[1], a=2, b=2)

['mushrooms', 'ionosphere', 'churn_modelling', 'baloons', 'fourclass', 'campus_recruit', 'glass', 'diabetes', 'lymphography', 'sklearn_boston', 'australian', 'credit_approval', 'pokemon', 'liver_disorders', 'world_happiness', 'svmguide1', 'splice', 'soybean', 'skin_nonskin', 'ges_classification', 'blood_transfusions', 'sklearn_cancer', 'accelerometer', 'balance_scale', 'ecoli', 'svmguide3', 'spect_heart', 'madelon', 'spambase', 'iris', 'fertility', 'sklearn_wine', 'default_credit']
default_credit 0.7268999999999999 0.004799999999999971 0.7153


In [20]:
# Get best model
best_model = select_best({})

# Load dir and filename
model_dir = best_model["model_path"]
model_file_name = best_model["model_file"]

data = load_datasets(DATA_PATH)
relevant = ["diabetes", "ecoli"]
for ds in [x for x in data if x[0].name in relevant]:
    get_mean_dataset(model_dir, model_file_name, ds[0], ds[1])

diabetes 0.7152777777777777 0.0026755269110108684 0.6979166666666666
ecoli 0.8329014227642277 0.003633218914145104 0.8258384146341463


In [21]:
# Get best model
best_model = select_best({})

# Load dir and filename
model_dir = best_model["model_path"]
model_file_name = best_model["model_file"]

data = load_datasets(DATA_PATH)
relevant = ["sklearn_digits"]
for ds in [x for x in data if x[0].name in relevant]:
    get_mean_dataset(model_dir, model_file_name, ds[0], ds[1])