In [67]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [68]:
import pandas as pd
from glob import glob 
from fastcore.xtras import load_pickle

from gpt3forchem.output import get_regression_metrics
from gpt3forchem.api_wrappers import extract_prediction

from pycm import ConfusionMatrix

## Classification

In [69]:
all_res = glob('results/20221130_lipophilicity/*')

In [70]:
len(all_res)

56

In [71]:
metrics = []
baselines = []

for res in all_res:
    res = load_pickle(res)
    cm = res['cm']
    cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), res['cm'].actual_vector)), list(map(lambda x: str(x).strip(), res['cm'].predict_vector)))
    baseline_cm = res['baseline']['cm']
    baseline_cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), baseline_cm.actual_vector)), list(map(lambda x: str(x).strip(), baseline_cm.predict_vector)))
    metrics.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'accuracy': cm.ACC_Macro,
            'f1_macro': cm.F1_Macro,
            'f1_micro': cm.F1_Micro
        })
    baselines.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'accuracy': baseline_cm.ACC_Macro,
            'f1_macro': baseline_cm.F1_Macro,
            'f1_micro': baseline_cm.F1_Micro  
        })

In [72]:
metrics = pd.DataFrame(metrics)
baselines = pd.DataFrame(baselines)

In [76]:
print(metrics.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{llrrrrrr}
\toprule
       &     & \multicolumn{2}{l}{accuracy} & \multicolumn{2}{l}{f1\_macro} & \multicolumn{2}{l}{f1\_micro} \\
       &     &     mean &   std &     mean &   std &     mean &   std \\
representation & train\_size &          &       &          &       &          &       \\
\midrule
inchi & 10  &     0.76 &  0.01 &     0.14 &  0.04 &     0.37 &  0.02 \\
       & 50  &     0.75 &  0.01 &     0.13 &  0.03 &     0.38 &  0.02 \\
       & 500 &     0.76 &  0.01 &     0.22 &  0.02 &     0.39 &  0.02 \\
iupac\_name & 10  &     0.76 &  0.06 &     0.12 &  0.03 &     0.25 &  0.07 \\
       & 50  &     0.73 &  0.02 &     0.12 &  0.04 &     0.33 &  0.06 \\
       & 500 &     0.75 &  0.02 &     0.24 &  0.04 &     0.37 &  0.04 \\
selfies & 10  &     0.74 &  0.03 &     0.11 &  0.03 &     0.32 &  0.04 \\
       & 50  &     0.75 &  0.01 &     0.15 &  0.03 &     0.38 &  0.01 \\
       & 500 &     0.76 &  0.01 &     0.23 &  0.04 &     0.39 &  0.02 \\
smiles & 10  &     0.

In [78]:
print(baselines.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{2}{l}{accuracy} & \multicolumn{2}{l}{f1\_macro} & \multicolumn{2}{l}{f1\_micro} \\
{} &     mean &   std &     mean &   std &     mean &   std \\
train\_size &          &       &          &       &          &       \\
\midrule
10         &     0.75 &  0.00 &     0.11 &  0.00 &     0.38 &  0.00 \\
50         &     0.75 &  0.01 &     0.15 &  0.04 &     0.37 &  0.01 \\
500        &     0.79 &  0.01 &     0.29 &  0.04 &     0.46 &  0.02 \\
\bottomrule
\end{tabular}



## Regression

In [79]:
all_res_regression = glob('results/20221129_lipophilicity_regression/*')

In [80]:
metrics_regression = []
baselines_regression = []

for res in all_res_regression:
    res = load_pickle(res)
    metrics_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['metrics']['r2'],
            'max_error': res['metrics']['max_error'],
            'mean_absolute_error': res['metrics']['mean_absolute_error'],
            'mean_squared_error': res['metrics']['mean_squared_error'],
            'rmse': res['metrics']['rmse'],
        })
    baselines_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['baseline']['r2'],
            'max_error': res['baseline']['max_error'],
            'mean_absolute_error': res['baseline']['mean_absolute_error'], 
            'mean_squared_error': res['baseline']['mean_squared_error'],
            'rmse': res['baseline']['rmse'],
        })

In [81]:
metrics_regression = pd.DataFrame(metrics_regression)

baselines_regression = pd.DataFrame(baselines_regression)

In [85]:
print(metrics_regression.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{llrrrrrrrrrr}
\toprule
       &     & \multicolumn{2}{l}{r2} & \multicolumn{2}{l}{max\_error} & \multicolumn{2}{l}{mean\_absolute\_error} & \multicolumn{2}{l}{mean\_squared\_error} & \multicolumn{2}{l}{rmse} \\
       &     &  mean &   std &      mean &   std &                mean &   std &               mean &   std &  mean &   std \\
representation & train\_size &       &       &           &       &                     &       &                    &       &       &       \\
\midrule
inchi & 10  & -0.60 &  0.15 &      4.17 &  0.75 &                1.26 &  0.08 &               2.47 &  0.34 &  1.57 &  0.11 \\
       & 50  & -0.19 &  0.30 &      3.86 &  0.42 &                1.09 &  0.15 &               1.87 &  0.49 &  1.36 &  0.18 \\
       & 500 & -0.04 &  0.10 &      3.55 &  0.20 &                1.02 &  0.05 &               1.61 &  0.17 &  1.27 &  0.07 \\
iupac\_name & 10  & -0.29 &  0.28 &      4.25 &  0.48 &                1.12 &  0.13 &               1.98 &  0.48 &

In [87]:
print(baselines_regression.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{r2} & \multicolumn{2}{l}{max\_error} & \multicolumn{2}{l}{mean\_absolute\_error} & \multicolumn{2}{l}{mean\_squared\_error} & \multicolumn{2}{l}{rmse} \\
{} &  mean &   std &      mean &   std &                mean &   std &               mean &   std &  mean &   std \\
train\_size &       &       &           &       &                     &       &                    &       &       &       \\
\midrule
10         & -0.09 &  0.06 &      3.82 &  0.17 &                1.01 &  0.05 &               1.69 &  0.19 &  1.30 &  0.07 \\
50         &  0.07 &  0.07 &      3.26 &  0.30 &                0.96 &  0.06 &               1.43 &  0.16 &  1.19 &  0.07 \\
500        &  0.38 &  0.02 &      3.11 &  0.44 &                0.79 &  0.03 &               1.00 &  0.05 &  1.00 &  0.03 \\
\bottomrule
\end{tabular}

