In [3]:
import pandas as pd
from pandas.io.json import json_normalize 
import os
import json 

def read_jsonl(path):
    data = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line))

    return json_normalize(data)

def nice_name(row):
    if (row["model"] in ["RandomForestClassifier", "ExtraTreesClassifier", "GradientBoostingClassifier"]): 
        model_name = "{} with T = {}".format(row["model"], row["model_params.n_estimators"])
    elif row["model"] == "RiverModel":
        model_name = "{} with {}".format(row["model"], row["river_model"])
    elif (row["model"] == "BiasedProxEnsemble"):
        if int(row["max_trees"]) == 0:
            model_name = "{} with λ = {}, max_depth = {}, modes = {}/{}, stepsize = {}".format(row["model"],row["model_params.l_reg"], row["model_params.max_depth"], row["model_params.init_mode"],row["model_params.next_mode"], row["model_params.step_size"])
        else:
            model_name = "{} with T = {}, max_depth = {}, modes = {}/{}, stepsize = {} with λ = {}".format(row["model"], row["model_params.max_trees"], row["model_params.max_depth"], row["step_size"], row["init_mode"],row["next_mode"], row["l_reg"])
    elif row["model"] == "JaxModel":
        model_name = "{} with T = {}, max_depth = {}, with temp_scaling = {}".format(row["model"], row["model_params.n_trees"], row["model_params.max_depth"], row["model_params.temp_scaling"])
    elif row["model"] == "ProxPruningClassifier":
        model_name = "{} with {} and R1 = {}, λ1 = {}, R2 = {}, λ2 = {}".format(row["model"], row["base_ensemble"], row.get("model_params.ensemble_regularizer", "None"), row.get("model_params.l_ensemble_reg", "None"), row.get("model_params.tree_regularizer", "None"), row.get("model_params.l_tree_reg", "None"))
    elif row["model"] == "AdaBoostClassifier":
        model_name = "{} with base = {} with T = {}".format(row["model"], row["model_params.base_estimator"], row["model_params.n_estimators"])
    elif row["model"] == "PyBiasedProxEnsemble":
        model_name = "{} with max_depth {} and R1 = {}, λ1 = {}, R2 = {}, λ2 = {}".format(row["model"], row.get("model_params.max_depth", None), row.get("model_params.ensemble_regularizer", "None"), row.get("model_params.l_ensemble_reg", "None"), row.get("model_params.tree_regularizer", "None"), row.get("model_params.l_tree_reg", "None"))
    else:
        model_name = row["model"]
    
    return model_name

#dataset = "covertype"
dataset = "magic"
dataset = os.path.join(dataset, "results")
all_subdirs = [os.path.join(dataset,d) for d in os.listdir(dataset) if os.path.isdir(os.path.join(dataset, d))]
#print(all_subdirs)
latest_folder = max(all_subdirs, key=os.path.getmtime)

#latest_folder = dataset + '/04-01-2021-14:58:21'
df = read_jsonl(os.path.join(latest_folder, "results.jsonl"))
#df.columns

df["nice_name"] = df.apply(nice_name, axis=1)
df = df.round(decimals = 3)
df

Unnamed: 0,X,Y,experiment_id,idx,loss,model,out_path,repetitions,seed,model_params.bootstrap,...,model_params.normalize_weights,model_params.out_file,model_params.scale_batch,model_params.seed,model_params.sliding_window,model_params.step_size,model_params.tree_regularizer,model_params.var_batch,model_params.verbose,nice_name
0,X,Y,2,idx,mse,ExtraTreesClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,ExtraTreesClassifier with T = 16.0
1,X,Y,6,idx,mse,ExtraTreesClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,ExtraTreesClassifier with T = 32.0
2,X,Y,1,idx,mse,RandomForestClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,RandomForestClassifier with T = 16.0
3,X,Y,5,idx,mse,RandomForestClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,RandomForestClassifier with T = 32.0
4,X,Y,10,idx,mse,ExtraTreesClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,ExtraTreesClassifier with T = 64.0
5,X,Y,9,idx,mse,RandomForestClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,RandomForestClassifier with T = 64.0
6,X,Y,3,idx,,GradientBoostingClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,,...,,,,,,,,,,GradientBoostingClassifier with T = 16.0
7,X,Y,7,idx,,GradientBoostingClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,,...,,,,,,,,,,GradientBoostingClassifier with T = 32.0
8,X,Y,14,idx,mse,ExtraTreesClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,ExtraTreesClassifier with T = 128.0
9,X,Y,13,idx,mse,RandomForestClassifier,/home/buschjae/projects/psgd-ensemble/batch/ma...,5,0,True,...,,,,,,,,,,RandomForestClassifier with T = 128.0


In [4]:
from IPython.display import display, HTML
df.columns
tabledf = df[["nice_name", "scores.mean_test_accuracy", "scores.mean_test_loss", "scores.mean_train_accuracy", "scores.mean_train_loss", "scores.mean_n_estimators", "scores.mean_fit_time", "scores.mean_n_parameters"]]
tabledf = tabledf.sort_values(by=['scores.mean_test_accuracy'], ascending = False)
#display(tabledf)
display(HTML(tabledf.to_html()))

Unnamed: 0,nice_name,scores.mean_test_accuracy,scores.mean_test_loss,scores.mean_train_accuracy,scores.mean_train_loss,scores.mean_n_estimators,scores.mean_fit_time,scores.mean_n_parameters
11,RandomForestClassifier with T = 256.0,88.08,0.09,100.0,0.012,256.0,13.278,702091.6
15,GradientBoostingClassifier with T = 256.0,88.065,0.214,100.0,0.157,256.0,66.342,776333.2
5,RandomForestClassifier with T = 64.0,87.965,0.091,99.988,0.013,64.0,3.149,175333.6
9,RandomForestClassifier with T = 128.0,87.87,0.09,100.0,0.013,128.0,6.808,350828.4
3,RandomForestClassifier with T = 32.0,87.796,0.093,99.862,0.014,32.0,1.617,87709.2
13,GradientBoostingClassifier with T = 128.0,87.665,0.215,100.0,0.157,128.0,36.177,450311.6
12,ExtraTreesClassifier with T = 256.0,87.654,0.094,100.0,0.013,256.0,2.41,1481491.2
8,ExtraTreesClassifier with T = 128.0,87.576,0.094,100.0,0.013,128.0,1.264,740814.0
19,"PyBiasedProxEnsemble with max_depth nan and R1 = hard-L1, λ1 = 256.0, R2 = nan, λ2 = 0.0",87.56,0.092,93.802,0.048,257.0,200.641,215383.8
18,"PyBiasedProxEnsemble with max_depth nan and R1 = hard-L1, λ1 = 128.0, R2 = nan, λ2 = 0.0",87.486,0.092,93.813,0.048,129.0,152.069,108303.8


In [32]:
from IPython.display import display, HTML
df.columns
tabledf = df[["nice_name", "scores.mean_test_accuracy", "scores.mean_test_loss", "scores.mean_train_accuracy", "scores.mean_train_loss", "scores.mean_n_estimators", "scores.mean_fit_time", "scores.mean_n_parameters"]]
tabledf = tabledf.sort_values(by=['scores.mean_test_accuracy'], ascending = False)
#display(tabledf)
display(HTML(tabledf.to_html()))


Unnamed: 0,nice_name,scores.mean_test_accuracy,scores.mean_test_loss,scores.mean_train_accuracy,scores.mean_train_loss,scores.mean_n_estimators,scores.mean_fit_time,scores.mean_n_parameters
0,"ProxPruningClassifier with base RandomForestClassifier(n_estimators=128, random_state=0) and λ = 0.0",87.602,0.229,99.958,0.177,108.2,10.373,296658.2
