In [116]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [117]:
import pandas as pd
from glob import glob 
from fastcore.xtras import load_pickle

from gpt3forchem.output import get_regression_metrics
from gpt3forchem.api_wrappers import extract_prediction

from pycm import ConfusionMatrix

## Classification

In [118]:
all_res = glob('results/20221130_esol/*')

In [119]:
metrics = []
baselines = []

for res in all_res:
    res = load_pickle(res)
    cm = res['cm']
    cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), res['cm'].actual_vector)), list(map(lambda x: str(x).strip(), res['cm'].predict_vector)))
    baseline_cm = res['baseline']['cm']
    baseline_cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), baseline_cm.actual_vector)), list(map(lambda x: str(x).strip(), baseline_cm.predict_vector)))
    metrics.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'accuracy': cm.ACC_Macro,
            'f1_macro': cm.F1_Macro,
            'f1_micro': cm.F1_Micro
        })
    baselines.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'accuracy': baseline_cm.ACC_Macro,
            'f1_macro': baseline_cm.F1_Macro,
            'f1_micro': baseline_cm.F1_Micro  
        })

In [120]:
res = load_pickle(all_res[-1])

In [121]:
metrics = pd.DataFrame(metrics)
baselines = pd.DataFrame(baselines)

In [122]:
metrics.groupby(['representation', 'train_size']).agg(['mean', 'std', 'count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,accuracy,accuracy,f1_macro,f1_macro,f1_macro,f1_micro,f1_micro,f1_micro
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,count,mean,std,count,mean,std,count
representation,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
inchi,10,0.750667,0.021939,3,0.126942,0.040486,3,0.376667,0.054848,3
inchi,50,0.796667,0.027227,3,0.213825,0.092108,3,0.491667,0.068069,3
inchi,500,0.899,0.004243,2,0.622984,0.055124,2,0.7475,0.010607,2
iupac_name,10,0.756,0.025456,2,0.150165,0.067264,2,0.39,0.06364,2
iupac_name,50,0.774,0.005657,2,0.184889,0.025109,2,0.435,0.014142,2
iupac_name,500,0.886,,1,0.63401,,1,0.715,,1
selfies,10,0.785083,0.029413,4,0.162095,0.050342,4,0.43875,0.063163,4
selfies,50,0.787,0.017626,4,0.227685,0.044624,4,0.4675,0.044064,4
selfies,500,0.877333,0.003055,3,0.617188,0.029482,3,0.693333,0.007638,3
smiles,10,0.7572,0.031515,5,0.136093,0.047727,5,0.393,0.078788,5


In [123]:
baselines.groupby(['train_size']).agg(['mean', 'std'])

  baselines.groupby(['train_size']).agg(['mean', 'std'])


Unnamed: 0_level_0,accuracy,accuracy,f1_macro,f1_macro,f1_micro,f1_micro
Unnamed: 0_level_1,mean,std,mean,std,mean,std
train_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0.763857,0.008018,0.116184,0.004229,0.409643,0.020045
50,0.852,0.0,0.36869,0.0,0.63,0.0
500,0.818182,0.0,0.0,0.0,0.0,0.0


## Regression

In [124]:
all_res_regression = glob('results/20221129_esol_regression/*')

In [125]:
metrics_regression = []
baselines_regression = []

for res in all_res_regression:
    res = load_pickle(res)
    metrics_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['metrics']['r2'],
            'max_error': res['metrics']['max_error'],
            'mean_absolute_error': res['metrics']['mean_absolute_error'],
            'mean_squared_error': res['metrics']['mean_squared_error'],
            'rmse': res['metrics']['rmse'],
        })
    baselines_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['baseline']['r2'],
            'max_error': res['baseline']['max_error'],
            'mean_absolute_error': res['baseline']['mean_absolute_error'], 
            'mean_squared_error': res['baseline']['mean_squared_error'],
            'rmse': res['baseline']['rmse'],
        })

In [126]:
metrics_regression = pd.DataFrame(metrics_regression)

baselines_regression = pd.DataFrame(baselines_regression)

In [127]:
metrics_regression.groupby(['representation', 'train_size']).agg(['mean', 'std', 'count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,r2,r2,r2,max_error,max_error,max_error,mean_absolute_error,mean_absolute_error,mean_absolute_error,mean_squared_error,mean_squared_error,mean_squared_error,rmse,rmse,rmse
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count,mean,std,count
representation,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
inchi,10,-0.113511,0.28454,2,6.44,1.640488,2,1.64545,0.194172,2,4.459442,1.139537,2,2.103031,0.270927,2
inchi,50,0.184728,0.18359,2,5.48,0.707107,2,1.41655,0.168079,2,3.36222,0.757135,2,1.827777,0.207119,2
inchi,500,0.697506,0.05654,2,4.7,1.92333,2,0.78905,0.026375,2,1.17175,0.219014,2,1.080098,0.101386,2
iupac_name,10,-0.685451,0.015444,2,5.98,0.0,2,2.1912,0.00502,2,6.749972,0.06185,2,2.598057,0.011903,2
iupac_name,50,-0.307868,0.134366,2,8.43,0.141421,2,1.700925,0.111829,2,5.393711,0.554132,2,2.320902,0.119379,2
iupac_name,500,0.564888,0.03731,2,6.32,0.989949,2,0.90275,0.021779,2,1.685464,0.144526,2,1.297657,0.055687,2
selfies,10,-0.485522,0.342497,3,5.576667,0.561456,3,2.03095,0.272153,3,5.949288,1.371647,3,2.428841,0.273916,3
selfies,50,-0.18418,0.154835,3,7.353333,0.041633,3,1.67575,0.130111,3,4.883616,0.638546,3,2.206746,0.144336,3
selfies,500,0.712676,0.025338,2,3.475,0.190919,2,0.79835,0.039739,2,1.112986,0.098151,2,1.054468,0.04654,2
smiles,10,-0.213162,0.257878,6,6.503333,0.744222,6,1.742408,0.216714,6,4.822443,1.069518,6,2.185279,0.237478,6


In [128]:
baselines_regression.groupby(['train_size']).agg(['mean', 'std', 'count'])

  baselines_regression.groupby(['train_size']).agg(['mean', 'std', 'count'])


Unnamed: 0_level_0,r2,r2,r2,max_error,max_error,max_error,mean_absolute_error,mean_absolute_error,mean_absolute_error,mean_squared_error,mean_squared_error,mean_squared_error,rmse,rmse,rmse
Unnamed: 0_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count,mean,std,count
train_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
10,0.010333,0.03011,13,5.634251,0.204666,13,1.549004,0.020711,13,3.737158,0.083279,13,1.933059,0.021871,13
50,0.594279,0.017111,12,4.406837,0.165991,12,0.953239,0.022597,12,1.604161,0.096878,12,1.265973,0.040095,12
500,0.896202,0.0,10,1.975797,0.0,10,0.49226,0.0,10,0.437248,0.0,10,0.661248,0.0,10
