# Compile the results on the MOF case study 

In [85]:
from glob import glob
from pathlib import Path
import matplotlib.pyplot as plt
plt.style.use(['science', 'nature'])
import pandas as pd 
from fastcore.helpers import load_pickle
from gpt3forchem.helpers import get_else_nan

## QMOF 

### Classification 

In [86]:
all_qmof_classification_res = glob('/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220913_mof_classification/*.pkl')

In [87]:
compiled_qmof_classification_data = []
compiled_qmof_baseline_data = []


compiled_qmof_classification_data_skip_hyperopt = []
compiled_qmof_baseline_data_skip_hyperopt = []

for filename in all_qmof_classification_res:
    loaded_res = load_pickle(filename)

    res = {
        "accuracy": get_else_nan(loaded_res["cm"], "ACC_Macro"),
        "f1_macro": get_else_nan(loaded_res["cm"], "F1_Macro"),
        "f1_micro": get_else_nan(loaded_res["cm"], "F1_Micro"),
        "train_size": loaded_res["train_size"],
        "mcc": get_else_nan(loaded_res["cm"], "Overall_MCC"),
        "representation": loaded_res["representation"],
        "target": loaded_res["target"],
    }

    baseline_res = {
        "baseline_accuracy": get_else_nan(loaded_res["baseline_cm"], "ACC_Macro"),
        "baseline_f1_macro": get_else_nan(loaded_res["baseline_cm"], "F1_Macro"),
        "baseline_f1_micro": get_else_nan(loaded_res["baseline_cm"], "F1_Micro"),
        "baseline_mcc": get_else_nan(loaded_res["baseline_cm"], "Overall_MCC"),
        "train_size": loaded_res["train_size"],
        "representation": loaded_res["representation"],
        "target": loaded_res["target"],
    }

    if 'skip_hyperopt' in filename:
        compiled_qmof_classification_data_skip_hyperopt.append(res)
        compiled_qmof_baseline_data_skip_hyperopt.append(baseline_res)
    else:
        compiled_qmof_classification_data.append(res)
        compiled_qmof_baseline_data.append(baseline_res)


In [88]:
compiled_qmof_classification_data = pd.DataFrame(compiled_qmof_classification_data)
compiled_qmof_baseline_data = pd.DataFrame(compiled_qmof_baseline_data)

compiled_qmof_classification_data_skip_hyperopt = pd.DataFrame(compiled_qmof_classification_data_skip_hyperopt)
compiled_qmof_baseline_data_skip_hyperopt = pd.DataFrame(compiled_qmof_baseline_data_skip_hyperopt)

In [89]:
compiled_qmof_baseline_data_skip_hyperopt

Unnamed: 0,baseline_accuracy,baseline_f1_macro,baseline_f1_micro,baseline_mcc,train_size,representation,target
0,0.777778,0.0,0.0,0.0,100,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
1,0.8,0.0,0.0,0.0,1000,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
2,0.777778,0.0,0.0,0.0,100,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
3,0.777778,0.0,0.0,0.0,50,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
4,0.8,0.0,0.0,0.0,2000,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
5,0.8,0.0,0.0,0.0,500,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
6,0.777778,0.0,0.0,0.0,50,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
7,0.8,0.0,0.0,0.0,3000,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
8,0.777778,0.0,0.0,0.0,200,info.mofid.mofid_clean,outputs.pbe.bandgap_cat
9,0.8,0.0,0.0,0.0,100,info.mofid.mofid_clean,outputs.pbe.bandgap_cat


In [90]:
compiled_qmof_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])

  compiled_qmof_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,f1_macro,f1_macro,f1_micro,f1_micro
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
target,representation,train_size,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
outputs.pbe.bandgap_cat,chemical_name,10,0.735467,0.021914,0.194741,0.038253,0.338667,0.054784
outputs.pbe.bandgap_cat,chemical_name,50,0.7688,0.041899,0.182454,0.098772,0.422,0.104747
outputs.pbe.bandgap_cat,chemical_name,100,0.818133,0.014048,0.292088,0.05274,0.545333,0.035119
outputs.pbe.bandgap_cat,chemical_name,200,0.8364,0.00396,0.360514,0.014498,0.591,0.009899
outputs.pbe.bandgap_cat,chemical_name,500,0.8552,,0.462298,,0.638,
outputs.pbe.bandgap_cat,chemical_name,1000,0.879533,0.018573,0.469777,0.04052,0.672,0.008485
outputs.pbe.bandgap_cat,chemical_name,2000,0.8892,0.00396,0.608783,0.064483,0.723,0.009899
outputs.pbe.bandgap_cat,chemical_name,3000,0.9224,,0.621715,,0.806,
outputs.pbe.bandgap_cat,info.mofid.mofid_clean,10,0.717433,0.037845,0.141765,0.090571,0.2525,0.168312
outputs.pbe.bandgap_cat,info.mofid.mofid_clean,50,0.763985,0.024521,0.174914,0.070117,0.341667,0.137802


In [91]:
compiled_qmof_baseline_data[['target', 'train_size', 'baseline_accuracy', 'baseline_f1_micro', 'baseline_f1_macro']].groupby(['target', 'train_size']).agg(['std', 'mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,baseline_accuracy,baseline_accuracy,baseline_f1_micro,baseline_f1_micro,baseline_f1_macro,baseline_f1_macro
Unnamed: 0_level_1,Unnamed: 1_level_1,std,mean,std,mean,std,mean
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
outputs.pbe.bandgap_cat,10,,,,,,
outputs.pbe.bandgap_cat,50,,0.777778,,0.0,,0.0
outputs.pbe.bandgap_cat,100,,,,,,
outputs.pbe.bandgap_cat,200,,,,,,
outputs.pbe.bandgap_cat,500,,,,,,
outputs.pbe.bandgap_cat,1000,,,,,,
outputs.pbe.bandgap_cat,2000,,,,,,
outputs.pbe.bandgap_cat,3000,,,,,,


### Regression 

In [92]:
qmof_regression_results = glob('/Users/kevinmaikjablonka/git/kjappelbaum/gpt3forchem/experiments/results/20220915_mof_regression/*.pkl')

In [93]:
load_pickle(qmof_regression_results[0])

{'model_type': 'ada',
 'train_set_size': 2000,
 'prefix': '',
 'train_size': 2000,
 'test_size': 1627,
 'metrics': {'r2': 0.7192669331671564,
  'max_error': 2.5827999999999998,
  'mean_absolute_error': 0.435709634,
  'mean_squared_error': 0.374205485176178},
 'completions': {'choices': [<OpenAIObject at 0x2a08d0ea0> JSON: {
     "finish_reason": "length",
     "index": 0,
     "logprobs": null,
     "text": " 0.9595@@@4.936"
   },
   <OpenAIObject at 0x14f10a220> JSON: {
     "finish_reason": "length",
     "index": 1,
     "logprobs": null,
     "text": " 1.934@@@+1.9"
   },
   <OpenAIObject at 0x14f10a4f0> JSON: {
     "finish_reason": "length",
     "index": 2,
     "logprobs": null,
     "text": " 0.49@@@@@@ 0.49"
   },
   <OpenAIObject at 0x2a08cc8b0> JSON: {
     "finish_reason": "length",
     "index": 3,
     "logprobs": null,
     "text": " 0.03934@@@0.49"
   },
   <OpenAIObject at 0x2a08cc9f0> JSON: {
     "finish_reason": "length",
     "index": 4,
     "logprobs": null,
   

In [94]:
compiled_regression_results = []
compiled_regression_baseline_results = []

for res_file in qmof_regression_results:
    try:
        res = load_pickle(res_file)
        res_dict = {
            'r2': res['metrics']['r2'],
            'max_error': res['metrics']['max_error'],
            'mean_absolute_error': res['metrics']['mean_absolute_error'],
            'mean_squared_error': res['metrics']['mean_squared_error'],
        }
        res_dict['train_size'] = res['train_size']
        res_dict['representation'] = res['representation']
        res_dict['target'] = res['target']
    except Exception: 
        pass
    baseline_res_dict = res['baseline_metrics']
    baseline_res_dict['train_size'] = res['train_size']
    baseline_res_dict['target'] = res['target']

    compiled_regression_results.append(res_dict)
    compiled_regression_baseline_results.append(baseline_res_dict)
    

In [95]:
compiled_regression_results = pd.DataFrame(compiled_regression_results)
compiled_regression_baseline_results = pd.DataFrame(compiled_regression_baseline_results)

In [96]:
compiled_regression_results.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,r2,r2,max_error,max_error,mean_absolute_error,mean_absolute_error,mean_squared_error,mean_squared_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
target,representation,train_size,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
outputs.pbe.bandgap,chemical_name,10,-0.443025,,2.9188,,1.107028,,1.942161,
outputs.pbe.bandgap,chemical_name,50,-1.133713,,4.822,,1.416127,,2.895841,
outputs.pbe.bandgap,chemical_name,100,0.157699,,2.96148,,0.845537,,1.297949,
outputs.pbe.bandgap,chemical_name,200,0.434276,,2.7548,,0.665363,,0.805371,
outputs.pbe.bandgap,chemical_name,1000,0.613104,,2.8516,,0.564574,,0.566019,
outputs.pbe.bandgap,info.mofid.mofid_clean,10,-0.386953,0.017388,3.491233,0.22932,1.110821,0.028184,1.898458,0.08367
outputs.pbe.bandgap,info.mofid.mofid_clean,50,-0.281432,0.295598,3.484084,0.568373,1.041856,0.143033,1.728283,0.401773
outputs.pbe.bandgap,info.mofid.mofid_clean,100,-0.477597,0.515338,3.94692,0.554811,1.10519,0.233195,2.097595,0.776169
outputs.pbe.bandgap,info.mofid.mofid_clean,200,0.385412,0.091779,3.638514,0.534815,0.699816,0.054406,0.876516,0.124047
outputs.pbe.bandgap,info.mofid.mofid_clean,500,0.587263,0.039966,3.07575,0.722592,0.550163,0.037568,0.552069,0.069055


In [97]:
compiled_regression_baseline_results.groupby(['target', 'train_size']).agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,r2,r2,max_error,max_error,mean_absolute_error,mean_absolute_error,mean_squared_error,mean_squared_error
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
outputs.pbe.bandgap,10,-1.858794,0.000729,5.688,0.0,1.644053,6.5e-05,3.945551,0.00022
outputs.pbe.bandgap,50,-0.095206,0.057805,4.510446,0.022751,1.032713,0.02086,1.510529,0.075435
outputs.pbe.bandgap,100,-0.164208,0.015492,4.392919,0.106019,1.059205,0.006257,1.602024,0.021638
outputs.pbe.bandgap,200,-0.125452,0.057295,4.324363,0.207612,1.035032,0.012469,1.540818,0.068257
outputs.pbe.bandgap,500,-0.255081,0.081829,4.590697,0.157389,1.09524,0.039725,1.731735,0.104383
outputs.pbe.bandgap,1000,-0.140141,,4.219677,,1.050336,,1.575476,
outputs.pbe.bandgap,2000,-0.142327,,3.987293,,1.051185,,1.569617,
outputs.pbe.bandgap,3000,-0.320016,,4.786718,,1.106718,,1.77404,


## CoRE MOF

In [78]:
all_core_results = glob('results/20220914_mof_classification/*pkl')

In [79]:
compiled_core_classification_data = []
compiled_core_baseline_data = []

compiled_core_classification_data_skip_hyperopt = []
compiled_core_baseline_data_skip_hyperopt = []

for filename in all_core_results:
    loaded_res = load_pickle(filename)

    res = {
        "accuracy": get_else_nan(loaded_res["cm"], "ACC_Macro"),
        "f1_macro": get_else_nan(loaded_res["cm"], "F1_Macro"),
        "f1_micro": get_else_nan(loaded_res["cm"], "F1_Micro"),
        "train_size": loaded_res["train_size"],
        "mcc": get_else_nan(loaded_res["cm"], "Overall_MCC"),
        "representation": loaded_res["representation"],
        "target": loaded_res["target"],
    }

    baseline_res = {
        "baseline_accuracy": get_else_nan(loaded_res["baseline_cm"], "ACC_Macro"),
        "baseline_f1_macro": get_else_nan(loaded_res["baseline_cm"], "F1_Macro"),
        "baseline_f1_micro": get_else_nan(loaded_res["baseline_cm"], "F1_Micro"),
        "baseline_mcc": get_else_nan(loaded_res["baseline_cm"], "Overall_MCC"),
        "train_size": loaded_res["train_size"],
        "representation": loaded_res["representation"],
        "target": loaded_res["target"],
    }

    if 'skip_hyperopt' in filename:
        compiled_core_classification_data_skip_hyperopt.append(res)
        compiled_core_baseline_data_skip_hyperopt.append(baseline_res)
    else:
        compiled_core_classification_data.append(res)
        compiled_core_baseline_data.append(baseline_res)


In [80]:
compiled_core_classification_data = pd.DataFrame(compiled_core_classification_data)
compiled_core_baseline_data = pd.DataFrame(compiled_core_baseline_data)

compiled_core_classification_data_skip_hyperopt = pd.DataFrame(compiled_core_classification_data_skip_hyperopt)
compiled_core_baseline_data_skip_hyperopt = pd.DataFrame(compiled_core_baseline_data_skip_hyperopt)

In [81]:
compiled_core_classification_data_agg = compiled_core_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])

  compiled_core_classification_data_agg = compiled_core_classification_data.groupby(['target', 'representation', 'train_size']).agg(['mean', 'std'])


In [82]:
compiled_core_classification_data_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,f1_macro,f1_macro,f1_micro,f1_micro
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
target,representation,train_size,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
outputs.CH4DC_cat,chemical_name_y,10,0.7178,0.010701,0.149823,0.027937,0.2945,0.026752
outputs.CH4DC_cat,chemical_name_y,50,0.7272,0.005581,0.130674,0.023026,0.318,0.013952
outputs.CH4DC_cat,chemical_name_y,100,0.7214,0.006,0.153043,0.016027,0.3035,0.015
outputs.CH4DC_cat,chemical_name_y,200,0.8068,0.124025,0.132933,0.123551,0.225333,0.195185
outputs.CH4DC_cat,chemical_name_y,500,0.736533,0.005478,0.249662,0.076365,0.3075,0.07994
outputs.CH4DC_cat,chemical_name_y,1000,0.773067,0.008992,0.35793,0.034869,0.432667,0.02248
outputs.CH4DC_cat,clean_mofid,10,0.745229,0.04321,0.124279,0.026044,0.3155,0.016442
outputs.CH4DC_cat,clean_mofid,50,0.7206,0.008941,0.159902,0.047351,0.3015,0.022353
outputs.CH4DC_cat,clean_mofid,100,0.719733,0.002013,0.135585,0.020836,0.299333,0.005033
outputs.CH4DC_cat,clean_mofid,200,0.7364,0.014816,0.198593,0.056259,0.341,0.037041


In [83]:
compiled_core_baseline_data.groupby(['target',  "train_size"]).agg(['mean', 'std'])

  compiled_core_baseline_data.groupby(['target',  "train_size"]).agg(['mean', 'std'])


Unnamed: 0_level_0,Unnamed: 1_level_0,baseline_accuracy,baseline_accuracy,baseline_f1_macro,baseline_f1_macro,baseline_f1_micro,baseline_f1_micro,baseline_mcc,baseline_mcc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
target,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
outputs.CH4DC_cat,10,,,,,,,,
outputs.CH4DC_cat,50,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0
outputs.CH4DC_cat,100,0.8,1.110223e-16,0.0,0.0,0.0,0.0,0.0,0.0
outputs.CH4DC_cat,200,0.8,1.570092e-16,0.0,0.0,0.0,0.0,0.0,0.0
outputs.CH4DC_cat,500,0.8,,0.0,,0.0,,0.0,
outputs.CH4DC_cat,1000,,,,,,,,
outputs.logKH_CO2_cat,10,,,,,,,,
outputs.logKH_CO2_cat,50,,,,,,,,
outputs.logKH_CO2_cat,100,,,,,,,,
outputs.logKH_CO2_cat,200,0.792593,0.01283001,0.0,0.0,0.0,0.0,0.0,0.0


In [84]:
compiled_core_baseline_data_skip_hyperopt

Unnamed: 0,baseline_accuracy,baseline_f1_macro,baseline_f1_micro,baseline_mcc,train_size,representation,target
0,0.8,0.0,0.0,0.0,100,clean_mofid,outputs.CH4DC_cat
1,0.8,0.0,0.0,0.0,10,clean_mofid,outputs.CH4DC_cat
2,0.8,0.0,0.0,0.0,10,clean_mofid,outputs.CH4DC_cat
3,0.8,0.0,0.0,0.0,50,clean_mofid,outputs.logKH_CO2_cat
4,0.8,0.0,0.0,0.0,100,clean_mofid,outputs.CH4DC_cat
5,0.8,0.0,0.0,0.0,200,clean_mofid,outputs.logKH_CO2_cat
6,0.8,0.0,0.0,0.0,50,clean_mofid,outputs.CH4DC_cat
7,0.8,0.0,0.0,0.0,50,clean_mofid,outputs.CH4DC_cat
8,0.8,0.0,0.0,0.0,500,clean_mofid,outputs.CH4DC_cat
9,0.777778,0.0,0.0,0.0,10,clean_mofid,outputs.logKH_CO2_cat
