In [129]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [130]:
import pandas as pd
from glob import glob 
from fastcore.xtras import load_pickle

from gpt3forchem.output import get_regression_metrics
from gpt3forchem.api_wrappers import extract_prediction

from pycm import ConfusionMatrix

## Classification

In [131]:
all_res = glob('results/20221130_esol/*')

In [132]:
metrics = []
baselines = []

for res in all_res:
    res = load_pickle(res)
    cm = res['cm']
    cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), res['cm'].actual_vector)), list(map(lambda x: str(x).strip(), res['cm'].predict_vector)))
    baseline_cm = res['baseline']['cm']
    baseline_cm = ConfusionMatrix(list(map(lambda x: str(x).strip(), baseline_cm.actual_vector)), list(map(lambda x: str(x).strip(), baseline_cm.predict_vector)))
    metrics.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'accuracy': cm.ACC_Macro,
            'f1_macro': cm.F1_Macro,
            'f1_micro': cm.F1_Micro
        })
    baselines.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'accuracy': baseline_cm.ACC_Macro,
            'f1_macro': baseline_cm.F1_Macro,
            'f1_micro': baseline_cm.F1_Micro  
        })

In [133]:
res = load_pickle(all_res[-1])

In [134]:
metrics = pd.DataFrame(metrics)
baselines = pd.DataFrame(baselines)

In [142]:
print(metrics.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{llrrrrrr}
\toprule
       &     & \multicolumn{2}{l}{accuracy} & \multicolumn{2}{l}{f1\_macro} & \multicolumn{2}{l}{f1\_micro} \\
       &     &     mean &   std &     mean &   std &     mean &   std \\
representation & train\_size &          &       &          &       &          &       \\
\midrule
inchi & 10  &     0.76 &  0.02 &     0.13 &  0.03 &     0.39 &  0.04 \\
       & 50  &     0.80 &  0.02 &     0.21 &  0.07 &     0.49 &  0.05 \\
       & 500 &     0.90 &  0.01 &     0.66 &  0.05 &     0.75 &  0.02 \\
iupac\_name & 10  &     0.76 &  0.02 &     0.14 &  0.04 &     0.40 &  0.05 \\
       & 50  &     0.77 &  0.01 &     0.17 &  0.04 &     0.42 &  0.02 \\
       & 500 &     0.88 &  0.01 &     0.62 &  0.03 &     0.70 &  0.02 \\
selfies & 10  &     0.78 &  0.03 &     0.17 &  0.06 &     0.44 &  0.07 \\
       & 50  &     0.80 &  0.02 &     0.22 &  0.04 &     0.49 &  0.05 \\
       & 500 &     0.89 &  0.01 &     0.64 &  0.04 &     0.71 &  0.03 \\
smiles & 10  &     0.

  print(metrics.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())


In [144]:
print(baselines.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{2}{l}{accuracy} & \multicolumn{2}{l}{f1\_macro} & \multicolumn{2}{l}{f1\_micro} \\
{} &     mean &   std &     mean &   std &     mean &   std \\
train\_size &          &       &          &       &          &       \\
\midrule
10         &     0.76 &  0.02 &     0.13 &  0.04 &     0.41 &  0.05 \\
50         &     0.85 &  0.01 &     0.41 &  0.07 &     0.63 &  0.03 \\
500        &     0.87 &  0.05 &     0.39 &  0.36 &     0.43 &  0.39 \\
\bottomrule
\end{tabular}



  print(baselines.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())
  print(baselines.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())


## Regression

In [145]:
all_res_regression = glob('results/20221129_esol_regression/*')

In [146]:
metrics_regression = []
baselines_regression = []

for res in all_res_regression:
    res = load_pickle(res)
    metrics_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['metrics']['r2'],
            'max_error': res['metrics']['max_error'],
            'mean_absolute_error': res['metrics']['mean_absolute_error'],
            'mean_squared_error': res['metrics']['mean_squared_error'],
            'rmse': res['metrics']['rmse'],
        })
    baselines_regression.append(
        {
            'train_size': res['train_size'],
            'representation': res['representation'],
            'r2': res['baseline']['r2'],
            'max_error': res['baseline']['max_error'],
            'mean_absolute_error': res['baseline']['mean_absolute_error'], 
            'mean_squared_error': res['baseline']['mean_squared_error'],
            'rmse': res['baseline']['rmse'],
        })

In [147]:
metrics_regression = pd.DataFrame(metrics_regression)

baselines_regression = pd.DataFrame(baselines_regression)

In [150]:
print(metrics_regression.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{llrrrrrrrrrr}
\toprule
       &     & \multicolumn{2}{l}{r2} & \multicolumn{2}{l}{max\_error} & \multicolumn{2}{l}{mean\_absolute\_error} & \multicolumn{2}{l}{mean\_squared\_error} & \multicolumn{2}{l}{rmse} \\
       &     &  mean &   std &      mean &   std &                mean &   std &               mean &   std &  mean &   std \\
representation & train\_size &       &       &           &       &                     &       &                    &       &       &       \\
\midrule
inchi & 10  & -0.20 &  0.21 &      6.09 &  0.90 &                1.73 &  0.18 &               4.75 &  0.89 &  2.17 &  0.21 \\
       & 50  &  0.16 &  0.13 &      5.81 &  0.78 &                1.42 &  0.12 &               3.45 &  0.54 &  1.85 &  0.15 \\
       & 500 &  0.71 &  0.05 &      4.60 &  1.13 &                0.78 &  0.06 &               1.15 &  0.21 &  1.07 &  0.10 \\
iupac\_name & 10  & -0.46 &  0.30 &      5.91 &  0.23 &                1.99 &  0.28 &               5.81 &  1.27 &

  print(metrics_regression.groupby(['representation', 'train_size']).agg(['mean', 'std']).round(2).to_latex())


In [151]:
print(baselines_regression.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{r2} & \multicolumn{2}{l}{max\_error} & \multicolumn{2}{l}{mean\_absolute\_error} & \multicolumn{2}{l}{mean\_squared\_error} & \multicolumn{2}{l}{rmse} \\
{} &  mean &   std &      mean &   std &                mean &   std &               mean &   std &  mean &   std \\
train\_size &       &       &           &       &                     &       &                    &       &       &       \\
\midrule
10         &  0.01 &  0.03 &      5.66 &  0.25 &                1.55 &  0.02 &               3.74 &  0.07 &  1.93 &  0.02 \\
50         &  0.60 &  0.03 &      4.28 &  0.30 &                0.94 &  0.04 &               1.55 &  0.14 &  1.24 &  0.06 \\
500        &  0.89 &  0.01 &      2.28 &  0.60 &                0.50 &  0.01 &               0.45 &  0.03 &  0.67 &  0.02 \\
\bottomrule
\end{tabular}



  print(baselines_regression.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())
  print(baselines_regression.groupby(['train_size']).agg(['mean', 'std']).round(2).to_latex())
