# Compile the results on the MOF case study 

In [480]:
from glob import glob
from pathlib import Path
import matplotlib.pyplot as plt
plt.style.use(['science', 'nature'])
import pandas as pd 
from fastcore.helpers import load_pickle
import numpy as np 
from gpt3forchem.helpers import get_else_nan
from gpt3forchem.api_wrappers import extract_regression_prediction
from gpt3forchem.output import get_regression_metrics

In [481]:
def compile_table_row(row):
    template =  "\\num⁍ {accuracy} \\pm {accuracy_std} ⁌ &  \\num⁍ {f1_micro} \\pm {f1_micro_std}  ⁌ & \\num⁍ {f1_macro} \\pm {f1_macro_std} ⁌\\\\"
    row = row.round(2)
    string = template.format(
        accuracy=row['accuracy']['mean'].values[0],
        accuracy_std=row['accuracy']['std'].values[0],
        f1_micro=row['f1_micro']['mean'].values[0],
        f1_micro_std=row['f1_micro']['std'].values[0],
        f1_macro=row['f1_macro']['mean'].values[0],
        f1_macro_std=row['f1_macro']['std'].values[0]
    )

    return string.replace("⁍", "{").replace("⁌", "}")

def compile_table_baseline_row(row):
    template =  "\\num⁍ {accuracy} \\pm {accuracy_std} ⁌ &  \\num⁍ {f1_micro} \\pm {f1_micro_std}  ⁌ & \\num⁍ {f1_macro} \\pm {f1_macro_std} ⁌\\\\"
    row = row.round(2)
    string = template.format(
        accuracy=row['baseline_accuracy']['mean'].values[0],
        accuracy_std=row['baseline_accuracy']['std'].values[0],
        f1_micro=row['baseline_f1_micro']['mean'].values[0],
        f1_micro_std=row['baseline_f1_micro']['std'].values[0],
        f1_macro=row['baseline_f1_macro']['mean'].values[0],
        f1_macro_std=row['baseline_f1_macro']['std'].values[0]
    )

    return string.replace("⁍", "{").replace("⁌", "}")

## QMOF 

### Classification 

In [482]:
all_qmof_classification_res = glob('/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220913_mof_classification/*.pkl')

In [490]:
compiled_qmof_classification_data = []
compiled_qmof_baseline_data = []


compiled_qmof_classification_data_skip_hyperopt = []
compiled_qmof_baseline_data_skip_hyperopt = []


# ToDo: handle the case in which the baseline used the wrong test data - skip them if we have both numerics and string
for filename in all_qmof_classification_res:
    loaded_res = load_pickle(filename)

    res = {
        "accuracy": get_else_nan(loaded_res["cm"], "ACC_Macro"),
        "f1_macro": get_else_nan(loaded_res["cm"], "F1_Macro"),
        "f1_micro": get_else_nan(loaded_res["cm"], "F1_Micro"),
        "train_size": loaded_res["train_size"],
        "mcc": get_else_nan(loaded_res["cm"], "Overall_MCC"),
        "representation": loaded_res["representation"],
        "target": loaded_res["target"],
    }

    try:
        if len(loaded_res["baseline_cm"].classes) == 5:
            baseline_res = {
                "baseline_accuracy": get_else_nan(loaded_res["baseline_cm"], "ACC_Macro"),
                "baseline_f1_macro": get_else_nan(loaded_res["baseline_cm"], "F1_Macro"),
                "baseline_f1_micro": get_else_nan(loaded_res["baseline_cm"], "F1_Micro"),
                "baseline_mcc": get_else_nan(loaded_res["baseline_cm"], "Overall_MCC"),
                "train_size": loaded_res["train_size"],
                "representation": loaded_res["representation"],
                "target": loaded_res["target"],
            }
        else: 
            raise ValueError
    except Exception: 
           baseline_res = {
                "baseline_accuracy": np.nan,
                "baseline_f1_macro": np.nan,
                "baseline_f1_micro": np.nan,
                "baseline_mcc": np.nan,
                "train_size": loaded_res["train_size"],
                "representation": loaded_res["representation"],
                "target": loaded_res["target"],
            } 
    if 'skip' in filename:
        compiled_qmof_classification_data.append(res)
        compiled_qmof_baseline_data_skip_hyperopt.append(baseline_res)
    else:
        compiled_qmof_classification_data.append(res)
        compiled_qmof_baseline_data.append(baseline_res)


In [491]:
compiled_qmof_classification_data = pd.DataFrame(compiled_qmof_classification_data)
compiled_qmof_baseline_data = pd.DataFrame(compiled_qmof_baseline_data)

compiled_qmof_classification_data_skip_hyperopt = pd.DataFrame(compiled_qmof_classification_data_skip_hyperopt)
compiled_qmof_baseline_data_skip_hyperopt = pd.DataFrame(compiled_qmof_baseline_data_skip_hyperopt)

In [492]:
compiled_qmof_baseline_data_skip_hyperopt.groupby(['target', 'train_size']).agg(['mean', 'std'])

  compiled_qmof_baseline_data_skip_hyperopt.groupby(['target', 'train_size']).agg(['mean', 'std'])


Unnamed: 0_level_0,Unnamed: 1_level_0,baseline_accuracy,baseline_accuracy,baseline_f1_macro,baseline_f1_macro,baseline_f1_micro,baseline_f1_micro,baseline_mcc,baseline_mcc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
outputs.pbe.bandgap_cat,10,0.709063,0.013206,0.185662,0.013295,0.272657,0.033015,0.003969,0.025721
outputs.pbe.bandgap_cat,50,0.735119,0.005988,0.229145,0.015604,0.337797,0.014969,0.042884,0.018518
outputs.pbe.bandgap_cat,100,0.740346,0.005675,0.238457,0.01735,0.350865,0.014188,0.055555,0.023818
outputs.pbe.bandgap_cat,200,0.751736,0.002329,0.282819,0.015911,0.379341,0.005823,0.103591,0.010545
outputs.pbe.bandgap_cat,500,0.771794,0.003894,0.357373,0.02366,0.429485,0.009736,0.177773,0.014886
outputs.pbe.bandgap_cat,1000,0.787106,0.002284,0.404268,0.025026,0.467765,0.005709,0.232906,0.009743
outputs.pbe.bandgap_cat,2000,0.802023,0.003151,0.452838,0.036692,0.505057,0.007876,0.287331,0.012027
outputs.pbe.bandgap_cat,3000,0.813426,0.006305,0.491458,0.049201,0.533565,0.015762,0.329239,0.021908


In [493]:
compiled_qmof_baseline_data_skip_hyperopt

Unnamed: 0,baseline_accuracy,baseline_f1_macro,baseline_f1_micro,baseline_mcc,train_size,representation,target
0,0.723307,0.198947,0.308267,0.011310,10,chemical_name,outputs.pbe.bandgap_cat
1,0.746867,0.255328,0.367168,0.082474,100,chemical_name,outputs.pbe.bandgap_cat
2,0.739223,0.263108,0.348057,0.062488,50,chemical_name,outputs.pbe.bandgap_cat
3,0.753487,0.250687,0.383718,0.107179,200,chemical_name,outputs.pbe.bandgap_cat
4,0.733144,0.218896,0.332861,0.021617,100,chemical_name,outputs.pbe.bandgap_cat
...,...,...,...,...,...,...,...
80,0.807253,0.517046,0.518132,0.307009,2000,chemical_name,outputs.pbe.bandgap_cat
81,0.820096,0.470443,0.550239,0.351770,3000,chemical_name,outputs.pbe.bandgap_cat
82,0.775248,0.347948,0.438120,0.188616,500,chemical_name,outputs.pbe.bandgap_cat
83,0.749702,0.254978,0.374256,0.086916,100,chemical_name,outputs.pbe.bandgap_cat


In [494]:
compiled_qmof_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])

  compiled_qmof_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,f1_macro,f1_macro,f1_micro,f1_micro
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
target,representation,train_size,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
outputs.pbe.bandgap_cat,chemical_name,10,0.735467,0.021914,0.194741,0.038253,0.338667,0.054784
outputs.pbe.bandgap_cat,chemical_name,50,0.7688,0.041899,0.182454,0.098772,0.422,0.104747
outputs.pbe.bandgap_cat,chemical_name,100,0.818133,0.014048,0.292088,0.05274,0.545333,0.035119
outputs.pbe.bandgap_cat,chemical_name,200,0.842667,0.01121,0.368055,0.016604,0.606667,0.028024
outputs.pbe.bandgap_cat,chemical_name,500,0.8608,0.00792,0.51858,0.079594,0.652,0.019799
outputs.pbe.bandgap_cat,chemical_name,1000,0.885022,0.016213,0.505309,0.067886,0.694667,0.039716
outputs.pbe.bandgap_cat,chemical_name,2000,0.900356,0.019524,0.57597,0.072864,0.738,0.026907
outputs.pbe.bandgap_cat,chemical_name,3000,0.9164,0.008485,0.664919,0.0611,0.791,0.021213
outputs.pbe.bandgap_cat,info.mofid.mofid_clean,10,0.717433,0.037845,0.141765,0.090571,0.2525,0.168312
outputs.pbe.bandgap_cat,info.mofid.mofid_clean,50,0.763985,0.024521,0.174914,0.070117,0.341667,0.137802


In [489]:
compiled_qmof_baseline_data[['target', 'train_size', 'baseline_accuracy', 'baseline_f1_micro', 'baseline_f1_macro']].groupby(['target', 'train_size']).agg(['std', 'mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,baseline_accuracy,baseline_accuracy,baseline_f1_micro,baseline_f1_micro,baseline_f1_macro,baseline_f1_macro
Unnamed: 0_level_1,Unnamed: 1_level_1,std,mean,std,mean,std,mean
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
outputs.pbe.bandgap_cat,10,,,,,,
outputs.pbe.bandgap_cat,50,,0.747274,,0.368186,,0.218125
outputs.pbe.bandgap_cat,100,,,,,,
outputs.pbe.bandgap_cat,200,,,,,,
outputs.pbe.bandgap_cat,500,,,,,,
outputs.pbe.bandgap_cat,1000,,,,,,
outputs.pbe.bandgap_cat,2000,,,,,,
outputs.pbe.bandgap_cat,3000,,,,,,


### Regression 

In [398]:
qmof_regression_results = glob('/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220915_mof_regression/*.pkl')

In [399]:
load_pickle(qmof_regression_results[0])

{'model_type': 'ada',
 'train_set_size': 2000,
 'prefix': '',
 'train_size': 2000,
 'test_size': 1627,
 'metrics': {'r2': 0.7192669331671564,
  'max_error': 2.5827999999999998,
  'mean_absolute_error': 0.435709634,
  'mean_squared_error': 0.374205485176178},
 'completions': {'choices': [<OpenAIObject at 0x2b4fb1ea0> JSON: {
     "finish_reason": "length",
     "index": 0,
     "logprobs": null,
     "text": " 0.9595@@@4.936"
   },
   <OpenAIObject at 0x2b4fb1e00> JSON: {
     "finish_reason": "length",
     "index": 1,
     "logprobs": null,
     "text": " 1.934@@@+1.9"
   },
   <OpenAIObject at 0x2b4fb1cc0> JSON: {
     "finish_reason": "length",
     "index": 2,
     "logprobs": null,
     "text": " 0.49@@@@@@ 0.49"
   },
   <OpenAIObject at 0x17f631720> JSON: {
     "finish_reason": "length",
     "index": 3,
     "logprobs": null,
     "text": " 0.03934@@@0.49"
   },
   <OpenAIObject at 0x17f6319f0> JSON: {
     "finish_reason": "length",
     "index": 4,
     "logprobs": null,
   

In [400]:
def metrics_ignoring_nan(valid_file, completions): 
    valid_frame = pd.read_json(valid_file, lines=True, orient='records')
    true = np.array([
            float(valid_frame.iloc[i]["completion"].split("@")[0])
            for i in range(len(valid_frame))
        ])
    prd = np.array([extract_regression_prediction(completions, i) for i in range(len(completions['choices']))])

    true = true[:len(prd)]
    prd_no_nan = prd[~np.isnan(prd)]
    true_no_nan = true[~np.isnan(prd)]
    
    metrics = get_regression_metrics(true_no_nan, prd_no_nan)
    return metrics

In [401]:
compiled_regression_results = []
compiled_regression_baseline_results = []


# ToDo: handle nan metrics (ignore nan in the computation of the metrics)
for res_file in qmof_regression_results:
    if '3000' in res_file:
        print(res_file)
    try:
        res = load_pickle(res_file)
        res_dict = {
            'r2': res['metrics']['r2'],
            'max_error': res['metrics']['max_error'],
            'mean_absolute_error': res['metrics']['mean_absolute_error'],
            'mean_squared_error': res['metrics']['mean_squared_error'],
        }
        if np.isnan(res_dict['r2']):
            # recompute metrics
            print('recomputing metrics')
            res_dict = metrics_ignoring_nan(res['valid_filename'], res['completions'])
        res_dict['train_size'] = res['train_size']
        res_dict['representation'] = res['representation']
        res_dict['target'] = res['target']
        compiled_regression_results.append(res_dict)
        
        baseline_res_dict = res['baseline_metrics']
        baseline_res_dict['train_size'] = res['train_size']
        baseline_res_dict['target'] = res['target']

        
        compiled_regression_baseline_results.append(baseline_res_dict)
    
    except Exception as e: 
        pass
    
 

/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220915_mof_regression/2022-09-16-08-16-14_results_mof_regression_3000__ada_info.mofid.mofid_clean_outputs.pbe.bandgap.pkl
/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220915_mof_regression/2022-09-23-08-55-39_results_mof_regression_3000__ada_info.mofid.mofid_clean_outputs.pbe.bandgap.pkl
/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220915_mof_regression/2022-09-25-22-10-44_results_mof_regression_3000__ada_chemical_name_outputs.pbe.bandgap.pkl
/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220915_mof_regression/2022-09-26-00-19-44_results_mof_regression_3000__ada_chemical_name_outputs.pbe.bandgap.pkl
/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220915_mof_regression/2022-09-26-04-27-25_results_mof_regression_3000__ada_chemical_name_outputs.pbe.bandgap.pkl
recomputing metrics
/Users/kevinmaikjablon

In [402]:
compiled_regression_results = pd.DataFrame(compiled_regression_results)
compiled_regression_baseline_results = pd.DataFrame(compiled_regression_baseline_results)

In [403]:
compiled_regression_results.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std', 'count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,r2,r2,r2,max_error,max_error,max_error,mean_absolute_error,mean_absolute_error,mean_absolute_error,mean_squared_error,mean_squared_error,mean_squared_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count
target,representation,train_size,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
outputs.pbe.bandgap,chemical_name,10,-1.053107,0.670171,6,4.504935,0.788691,6,1.33309,0.230264,6,2.804753,0.931255,6
outputs.pbe.bandgap,chemical_name,50,-0.582105,0.468003,5,3.755916,0.849512,5,1.161561,0.198497,5,2.111223,0.685125,5
outputs.pbe.bandgap,chemical_name,100,0.244085,0.165655,5,3.450556,0.611235,5,0.779229,0.135535,5,1.076367,0.260011,5
outputs.pbe.bandgap,chemical_name,200,0.434591,0.115752,5,3.042948,0.300688,5,0.659303,0.050815,5,0.785407,0.124984,5
outputs.pbe.bandgap,chemical_name,500,0.552918,0.032685,5,3.197736,0.304235,5,0.57681,0.03164,5,0.615811,0.053024,5
outputs.pbe.bandgap,chemical_name,1000,0.617889,0.035222,5,3.09762,0.309496,5,0.538628,0.034173,5,0.537305,0.04708,5
outputs.pbe.bandgap,chemical_name,2000,0.668723,0.048312,5,2.921456,0.074627,5,0.495943,0.028959,5,0.464963,0.049448,5
outputs.pbe.bandgap,chemical_name,3000,0.7265,0.006635,5,3.332256,0.420635,5,0.438706,0.016082,5,0.388084,0.016777,5
outputs.pbe.bandgap,info.mofid.mofid_clean,10,-0.594362,0.50792,5,3.45942,0.439433,5,1.177635,0.188368,5,2.164952,0.702307,5
outputs.pbe.bandgap,info.mofid.mofid_clean,50,-0.327317,0.305812,6,3.614013,0.589225,6,1.054364,0.133961,6,1.762382,0.374967,6


In [404]:
compiled_regression_baseline_results.groupby(['target', 'train_size']).agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,r2,r2,max_error,max_error,mean_absolute_error,mean_absolute_error,mean_squared_error,mean_squared_error
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
outputs.pbe.bandgap,10,-1.866861,0.000411,5.688,0.0,1.645561,0.000322,3.948074,0.001489
outputs.pbe.bandgap,50,-0.158858,,4.121102,,1.047274,,1.59744,
outputs.pbe.bandgap,100,-0.11388,,4.027462,,1.025447,,1.53236,
outputs.pbe.bandgap,200,-0.150069,,3.622514,,1.042389,,1.587646,
outputs.pbe.bandgap,500,-0.41504,,4.86339,,1.162051,,1.959075,
outputs.pbe.bandgap,1000,,,,,,,,
outputs.pbe.bandgap,2000,,,,,,,,
outputs.pbe.bandgap,3000,,,,,,,,


## CoRE MOF

In [405]:
all_core_results = glob('results/20220914_mof_classification/*pkl')

In [406]:
compiled_core_classification_data = []
compiled_core_baseline_data = []

compiled_core_classification_data_skip_hyperopt = []
compiled_core_baseline_data_skip_hyperopt = []

for filename in all_core_results:
    loaded_res = load_pickle(filename)

    res = {
        "accuracy": get_else_nan(loaded_res["cm"], "ACC_Macro"),
        "f1_macro": get_else_nan(loaded_res["cm"], "F1_Macro"),
        "f1_micro": get_else_nan(loaded_res["cm"], "F1_Micro"),
        "train_size": loaded_res["train_size"],
        "mcc": get_else_nan(loaded_res["cm"], "Overall_MCC"),
        "representation": loaded_res["representation"],
        "target": loaded_res["target"],
    }

    try:
        if len(loaded_res["baseline_cm"].classes) == 5:
            baseline_res = {
                "baseline_accuracy": get_else_nan(loaded_res["baseline_cm"], "ACC_Macro"),
                "baseline_f1_macro": get_else_nan(loaded_res["baseline_cm"], "F1_Macro"),
                "baseline_f1_micro": get_else_nan(loaded_res["baseline_cm"], "F1_Micro"),
                "baseline_mcc": get_else_nan(loaded_res["baseline_cm"], "Overall_MCC"),
                "train_size": loaded_res["train_size"],
                "representation": loaded_res["representation"],
                "target": loaded_res["target"],
            }
        else: 
            raise ValueError
    except Exception:
        baseline_res = {
                "baseline_accuracy": np.nan,
                "baseline_f1_macro": np.nan,
                "baseline_f1_micro": np.nan,
                "baseline_mcc": np.nan,
                "train_size": loaded_res["train_size"],
                "representation": loaded_res["representation"],
                "target": loaded_res["target"],
            }
    if 'skip_hyperopt' in filename:
        compiled_core_classification_data_skip_hyperopt.append(res)
        compiled_core_baseline_data_skip_hyperopt.append(baseline_res)
    else:
        compiled_core_classification_data.append(res)
        compiled_core_baseline_data.append(baseline_res)


In [407]:
compiled_core_classification_data = pd.DataFrame(compiled_core_classification_data)
compiled_core_baseline_data = pd.DataFrame(compiled_core_baseline_data)

compiled_core_classification_data_skip_hyperopt = pd.DataFrame(compiled_core_classification_data_skip_hyperopt)
compiled_core_baseline_data_skip_hyperopt = pd.DataFrame(compiled_core_baseline_data_skip_hyperopt)

In [408]:
compiled_core_classification_data_agg = compiled_core_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])

  compiled_core_classification_data_agg = compiled_core_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])


In [409]:
compiled_core_classification_data_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,f1_macro,f1_macro,f1_micro,f1_micro
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
target,representation,train_size,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
outputs.CH4DC_cat,chemical_name_y,10,0.7178,0.010701,0.149823,0.027937,0.2945,0.026752
outputs.CH4DC_cat,chemical_name_y,50,0.7272,0.005581,0.130674,0.023026,0.318,0.013952
outputs.CH4DC_cat,chemical_name_y,100,0.7214,0.006,0.153043,0.016027,0.3035,0.015
outputs.CH4DC_cat,chemical_name_y,200,0.8068,0.124025,0.132933,0.123551,0.225333,0.195185
outputs.CH4DC_cat,chemical_name_y,500,0.736533,0.005478,0.249662,0.076365,0.3075,0.07994
outputs.CH4DC_cat,chemical_name_y,1000,0.773067,0.008992,0.35793,0.034869,0.432667,0.02248
outputs.CH4DC_cat,clean_mofid,10,0.745229,0.04321,0.124279,0.026044,0.3155,0.016442
outputs.CH4DC_cat,clean_mofid,50,0.7206,0.008941,0.159902,0.047351,0.3015,0.022353
outputs.CH4DC_cat,clean_mofid,100,0.719733,0.002013,0.135585,0.020836,0.299333,0.005033
outputs.CH4DC_cat,clean_mofid,200,0.7364,0.014816,0.198593,0.056259,0.341,0.037041


In [410]:
compiled_core_baseline_data.groupby(['target',  "train_size"]).agg(['mean', 'std'])

  compiled_core_baseline_data.groupby(['target',  "train_size"]).agg(['mean', 'std'])


Unnamed: 0_level_0,Unnamed: 1_level_0,baseline_accuracy,baseline_accuracy,baseline_f1_macro,baseline_f1_macro,baseline_f1_micro,baseline_f1_micro,baseline_mcc,baseline_mcc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
outputs.CH4DC_cat,10,,,,,,,,
outputs.CH4DC_cat,50,0.786994,0.025932,0.411602,0.023767,0.467484,0.064831,0.281864,0.084508
outputs.CH4DC_cat,100,0.827163,0.010223,0.536074,0.06102,0.567908,0.025558,0.419658,0.036628
outputs.CH4DC_cat,200,0.861877,,0.647754,,0.654693,,0.540553,
outputs.CH4DC_cat,500,,,,,,,,
outputs.CH4DC_cat,1000,,,,,,,,
outputs.logKH_CO2_cat,10,,,,,,,,
outputs.logKH_CO2_cat,50,,,,,,,,
outputs.logKH_CO2_cat,100,,,,,,,,
outputs.logKH_CO2_cat,200,0.803013,,0.27786,,0.507532,,0.214539,


In [411]:
compiled_core_baseline_data_skip_hyperopt_agg =compiled_core_baseline_data_skip_hyperopt.groupby(['target',  "train_size"]).agg(['mean', 'std'])
compiled_core_baseline_data_skip_hyperopt_agg

  compiled_core_baseline_data_skip_hyperopt_agg =compiled_core_baseline_data_skip_hyperopt.groupby(['target',  "train_size"]).agg(['mean', 'std'])


Unnamed: 0_level_0,Unnamed: 1_level_0,baseline_accuracy,baseline_accuracy,baseline_f1_macro,baseline_f1_macro,baseline_f1_micro,baseline_f1_micro,baseline_mcc,baseline_mcc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
outputs.CH4DC_cat,10,0.722651,0.010529,0.228574,0.021924,0.306628,0.026323,0.080307,0.029159
outputs.CH4DC_cat,50,0.804115,0.010768,0.476998,0.02591,0.510288,0.026919,0.34697,0.037424
outputs.CH4DC_cat,100,0.812859,0.009818,0.51683,0.027868,0.532147,0.024545,0.375949,0.032717
outputs.CH4DC_cat,200,0.844959,0.004801,0.597759,0.015077,0.612399,0.012004,0.483121,0.015312
outputs.CH4DC_cat,500,0.870937,0.003918,0.670434,0.010805,0.677341,0.009795,0.569829,0.012482
outputs.CH4DC_cat,1000,0.887257,0.004515,0.714128,0.014921,0.718143,0.011288,0.625027,0.014852
outputs.logKH_CO2_cat,10,0.737323,0.016218,0.196766,0.023445,0.343308,0.040546,0.018442,0.046429
outputs.logKH_CO2_cat,50,0.769486,0.008209,0.239585,0.015373,0.423715,0.020523,0.079805,0.032316
outputs.logKH_CO2_cat,100,0.784836,0.008639,0.252245,0.00966,0.462091,0.021597,0.142702,0.0294
outputs.logKH_CO2_cat,200,0.7914,0.006498,0.301413,0.020863,0.478499,0.016244,0.172343,0.024734


In [432]:
print(compile_table_baseline_row(compiled_core_baseline_data_skip_hyperopt_agg.loc['outputs.logKH_CO2_cat'].loc[1000:1000]))

\num{ 0.82 \pm 0.0 } &  \num{ 0.55 \pm 0.01  } & \num{ 0.41 \pm 0.02 }\\
