In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import pandas as pd
from glob import glob 
from fastcore.xtras import load_pickle

from gpt3forchem.output import get_regression_metrics
from gpt3forchem.api_wrappers import extract_prediction
from gpt3forchem.helpers import compile_table_row


from pycm import ConfusionMatrix

## Classification

In [None]:
all_res = glob('results/20221130_freesolv/*')

In [5]:
metrics = []
baselines = []

for res in all_res:
    res = load_pickle(res)
    cm = res['cm']
    cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), res['cm'].actual_vector)), list(map(lambda x: str(x).strip(), res['cm'].predict_vector)))
    baseline_cm = res['baseline']['cm']
    baseline_cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), baseline_cm.actual_vector)), list(map(lambda x: str(x).strip(), baseline_cm.predict_vector)))
    metrics.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'acc': cm.ACC_Macro,
            'f1_macro': cm.F1_Macro,
            'f1_micro': cm.F1_Micro
        })
    baselines.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'acc': baseline_cm.ACC_Macro,
            'f1_macro': baseline_cm.F1_Macro,
            'f1_micro': baseline_cm.F1_Micro  
        })

In [6]:
metrics = pd.DataFrame(metrics)
baselines = pd.DataFrame(baselines)

In [7]:
metrics.groupby(['representation', 'train_size']).agg(['mean', 'std', 'count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,f1_macro,f1_macro,f1_macro,f1_micro,f1_micro,f1_micro
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,count,mean,std,count,mean,std,count
representation,train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
smiles,10,0.83,0.019799,2,0.193592,0.075424,2,0.575,0.049497,2
smiles,50,0.894,,1,0.364279,,1,0.735,,1


In [122]:
print(metrics.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{llrrrrrr}
\toprule
       &     & \multicolumn{2}{l}{acc} & \multicolumn{2}{l}{f1\_macro} & \multicolumn{2}{l}{f1\_micro} \\
       &     &  mean &   std &     mean &   std &     mean &   std \\
representation & train\_size &       &       &          &       &          &       \\
\midrule
inchi & 10  &  0.80 &  0.02 &     0.20 &  0.03 &     0.50 &  0.06 \\
       & 50  &  0.84 &  0.01 &     0.23 &  0.03 &     0.59 &  0.03 \\
       & 500 &  0.94 &  0.01 &     0.76 &  0.16 &     0.84 &  0.03 \\
iupac\_name & 10  &  0.81 &  0.02 &     0.19 &  0.04 &     0.53 &  0.06 \\
       & 50  &  0.86 &  0.03 &     0.38 &  0.11 &     0.66 &  0.07 \\
       & 500 &  0.94 &  0.01 &     0.75 &  0.10 &     0.85 &  0.02 \\
selfies & 10  &  0.79 &  0.04 &     0.19 &  0.06 &     0.48 &  0.10 \\
       & 50  &  0.86 &  0.03 &     0.25 &  0.07 &     0.65 &  0.09 \\
       & 500 &  0.93 &  0.01 &     0.72 &  0.17 &     0.83 &  0.04 \\
smiles & 10  &  0.83 &  0.02 &     0.25 &  0.01 &     0.58 

In [123]:
print(baselines.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{2}{l}{acc} & \multicolumn{2}{l}{f1\_macro} & \multicolumn{2}{l}{f1\_micro} \\
{} &  mean &   std &     mean &   std &     mean &   std \\
train\_size &       &       &          &       &          &       \\
\midrule
10         &  0.83 &  0.00 &     0.22 &  0.04 &     0.58 &  0.01 \\
50         &  0.90 &  0.01 &     0.47 &  0.08 &     0.74 &  0.02 \\
500        &  0.86 &  0.07 &     0.31 &  0.38 &     0.35 &  0.42 \\
\bottomrule
\end{tabular}



## Regression

In [103]:
all_res_regression = glob('results/20221129_freesolv_regression/*')

In [104]:
metrics_regression = []
baselines_regression = []

for res in all_res_regression:
    res = load_pickle(res)
    metrics_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['metrics']['r2'],
            'max_error': res['metrics']['max_error'],
            'mean_absolute_error': res['metrics']['mean_absolute_error'],
            'mean_squared_error': res['metrics']['mean_squared_error'],
            'rmse': res['metrics']['rmse'],
        })
    baselines_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['baseline']['r2'],
            'max_error': res['baseline']['max_error'],
            'mean_absolute_error': res['baseline']['mean_absolute_error'], 
            'mean_squared_error': res['baseline']['mean_squared_error'],
            'rmse': res['baseline']['rmse'],
        })

In [105]:
metrics_regression = pd.DataFrame(metrics_regression)

baselines_regression = pd.DataFrame(baselines_regression)

In [124]:
print(metrics_regression.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{llrrrrrrrrrr}
\toprule
       &     & \multicolumn{2}{l}{r2} & \multicolumn{2}{l}{max\_error} & \multicolumn{2}{l}{mean\_absolute\_error} & \multicolumn{2}{l}{mean\_squared\_error} & \multicolumn{2}{l}{rmse} \\
       &     &  mean &   std &      mean &   std &                mean &   std &               mean &   std &  mean &   std \\
representation & train\_size &       &       &           &       &                     &       &                    &       &       &       \\
\midrule
inchi & 10  & -0.22 &  0.11 &     20.21 &  2.34 &                3.14 &  0.17 &              18.34 &  1.38 &  4.28 &  0.16 \\
       & 50  & -0.05 &  0.12 &     18.26 &  0.54 &                2.85 &  0.15 &              15.40 &  1.59 &  3.92 &  0.20 \\
       & 500 &  0.62 &  0.04 &     15.47 &  2.49 &                1.41 &  0.10 &               5.94 &  0.79 &  2.43 &  0.16 \\
iupac\_name & 10  & -0.31 &  0.21 &     18.03 &  3.30 &                3.43 &  0.31 &              19.64 &  3.23 &

In [125]:
print(baselines_regression.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{r2} & \multicolumn{2}{l}{max\_error} & \multicolumn{2}{l}{mean\_absolute\_error} & \multicolumn{2}{l}{mean\_squared\_error} & \multicolumn{2}{l}{rmse} \\
{} &  mean &   std &      mean &   std &                mean &   std &               mean &   std &  mean &   std \\
train\_size &       &       &           &       &                     &       &                    &       &       &       \\
\midrule
10         &  0.13 &  0.05 &     20.27 &  0.60 &                2.71 &  0.07 &              13.93 &  0.34 &  3.73 &  0.05 \\
50         &  0.69 &  0.04 &     10.60 &  2.43 &                1.60 &  0.03 &               4.81 &  0.54 &  2.19 &  0.12 \\
500        &  0.91 &  0.01 &      4.36 &  1.26 &                0.86 &  0.06 &               1.39 &  0.24 &  1.17 &  0.11 \\
\bottomrule
\end{tabular}

