In [1]:
import os
import re
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import curve_fit

In [2]:
import sys
sys.path.append("/g/g92/dskoda/prj/dskrc/python/inits")

from plotting import *
plt.rcParams['figure.dpi'] = 150
plt.style.use("paper")

In [3]:
hpr = pd.read_csv("02_data/mace_hyperparams.csv", index_col=0)
lcv = pd.read_csv("02_data/mace_learning.csv", index_col=0)
lls = pd.read_csv("02_data/mace_losslands.csv", index_col=0)
sdf = pd.read_csv("02_data/mace_entropy.csv", index_col=0)
exp = pd.read_csv("02_data/mace_explosion.csv", index_col=0)
ext = pd.read_csv("02_data/mace_extrapolation_errors.csv", index_col=0)

In [4]:
LABELS = {
    "no_rescaling": "v=2, L=3, no rescaling",
    "no_bessel": "v=2, L=3, all opt",
    "only_rescaling": "v=2, L=3, rescaling",
    "rescaling+bessel": "v=2, L=3, Bessel",
    "v2-base": "v=2, L=3, baseline",
    "v2-amsgrad": "v=2, L=3, AMSGrad",
    "v2-ema": "v=2, L=3, EMA",
    "v2-swa": "v=2, L=3, SWA",
    "v3_l3": "v=3, L=3",
    "v2_l3": "v=2, L=3",
    "v1_l3": "v=1, L=3",
    "v2_l2": "v=2, L=2",
    "v2_l1": "v=2, L=1",
    "v2_l0": "v=2, L=0",
    "v1_l0": "v=1, L=0",
}
FIGS_DIR = "04_si"

In [5]:
MODELS = {
    "BIG": ["no_rescaling", "no_bessel", "only_rescaling", "rescaling+bessel"],
    "OPT":  ["v2-base", "v2-swa", "v2-ema", "v2-amsgrad"],
    "V1": ["v1_l0", "v1_l3"],
    "V2": ["v2_l0", "v2_l1", "v2_l2", "v2_l3"] ,
    "VL3": ["v1_l3", "v2_l3", "v3_l3"],
}

MODELS_ORDER = MODELS["BIG"] + MODELS["OPT"] + MODELS["V1"] + MODELS["V2"] + MODELS["VL3"]
MODELS_L3 = MODELS["BIG"] + MODELS["OPT"] + MODELS["VL3"]

In [6]:
CMAPS = {
    "BIG": cm.Reds_r,
    "OPT": cm.Blues_r,
    "V1": cm.Greys_r,
    "V2": cm.Greens_r,
    "VL3": cm.Purples_r,
}

In [7]:
labels = pd.DataFrame(LABELS.values(), index=LABELS.keys(), columns=["label"])

In [8]:
_colors = {}
for mset, cases in MODELS.items():
    norm = plt.Normalize(0, len(cases))
    for i, name in enumerate(cases):
        _colors[name] = CMAPS[mset](norm(i))
    
labels["color"] = pd.Series(_colors)

## Hyperparameters

In [9]:
def df_to_latex_table(df, caption="", **kwargs):
    latex_str = df.to_latex(index=False, escape=False, **kwargs)
    latex_str = latex_str.replace('\\toprule', '\\hline \\hline')
    latex_str = latex_str.replace('\\midrule', '\\hline')
    latex_str = latex_str.replace('\\bottomrule', '\\hline \\hline')
    latex_str = latex_str.replace("True", r'\TrueMarker')
    latex_str = latex_str.replace("False", r'\FalseMarker')
    
    latex_lines = [
        re.sub(r'\s+', ' ', line)
        for line in latex_str.split(r"\\")
    ]
    latex_str = "\\\\ \n".join(latex_lines)
    return f'\\begin{{table}}[h]\n\\centering\n\\caption{{{caption}}}\n{latex_str}\\end{{table}}'

In [10]:
HPR_COLS = ["num_interactions", "hidden_irreps", "max_ell", "amsgrad", "ema", "swa", "scaling"]

In [11]:
_hpr = hpr.loc[MODELS_ORDER, HPR_COLS].rename(index=LABELS).reset_index()

In [12]:
print(df_to_latex_table(_hpr.copy(), "MACE hyperparameters"))

\begin{table}[h]
\centering
\caption{MACE hyperparameters}
\begin{tabular}{lrlrllll} \hline \hline index & num_interactions & hidden_irreps & max_ell & amsgrad & ema & swa & scaling \\ 
 \hline v=2, L=3, no rescaling & 2 & 256x0e + 256x1o + 256x2e & 3 & \TrueMarker & \TrueMarker & \TrueMarker & no_scaling \\ 
 v=2, L=3, all opt & 2 & 256x0e + 256x1o + 256x2e & 3 & \TrueMarker & \TrueMarker & \TrueMarker & rms_forces_scaling \\ 
 v=2, L=3, rescaling & 2 & 256x0e + 256x1o + 256x2e & 3 & \FalseMarker & \FalseMarker & \FalseMarker & rms_forces_scaling \\ 
 v=2, L=3, Bessel & 2 & 256x0e + 256x1o + 256x2e & 3 & \TrueMarker & \TrueMarker & \TrueMarker & rms_forces_scaling \\ 
 v=2, L=3, baseline & 2 & 256x0e + 256x1o + 256x2e & 3 & \FalseMarker & \FalseMarker & \FalseMarker & rms_forces_scaling \\ 
 v=2, L=3, SWA & 2 & 256x0e + 256x1o + 256x2e & 3 & \FalseMarker & \FalseMarker & \TrueMarker & rms_forces_scaling \\ 
 v=2, L=3, EMA & 2 & 256x0e + 256x1o + 256x2e & 3 & \FalseMarker & \TrueMarker

  latex_str = df.to_latex(index=False, escape=False, **kwargs)


## Table with the entropy results

In [19]:
ENTROPY_COLS = ["Se", "Sf", "S", "explosion_mean", "explosion_std"]

In [20]:
_sdf = sdf.loc[MODELS_ORDER, ENTROPY_COLS]

_failure = [
    f"${row['explosion_mean'] / 1000:0.2f} \\pm ${row['explosion_std'] / 1000:0.2f}"
    for _, row in _sdf.iterrows()
]
_sdf["Time to failure (ps)"] = _failure
_sdf = _sdf.drop(["explosion_mean", "explosion_std"], axis=1)
_sdf = _sdf.reset_index()
_sdf = _sdf.rename(index=LABELS)

In [22]:
print(df_to_latex_table(_sdf.copy(), "MACE entropy", float_format="%.2f"))

\begin{table}[h]
\centering
\caption{MACE entropy}
\begin{tabular}{lrrrl} \hline \hline index & Se & Sf & S & Time to failure (ps) \\ 
 \hline no_rescaling & -0.31 & -0.05 & -0.11 & $0.39 \pm $0.00 \\ 
 no_bessel & 0.63 & 2.48 & 2.11 & $2.53 \pm $1.40 \\ 
 only_rescaling & 0.28 & 2.45 & 2.02 & $1.94 \pm $0.90 \\ 
 rescaling+bessel & 0.52 & 2.53 & 2.13 & $1.86 \pm $0.81 \\ 
 v2-base & 0.28 & 2.45 & 2.02 & $2.25 \pm $1.37 \\ 
 v2-swa & 0.55 & 2.56 & 2.16 & $2.68 \pm $1.30 \\ 
 v2-ema & 0.08 & 2.45 & 1.97 & $2.13 \pm $0.90 \\ 
 v2-amsgrad & 0.48 & 2.48 & 2.08 & $2.71 \pm $1.51 \\ 
 v1_l0 & 1.25 & 1.52 & 1.47 & $0.25 \pm $0.00 \\ 
 v1_l3 & 2.04 & 2.17 & 2.14 & $1.92 \pm $0.84 \\ 
 v2_l0 & 0.92 & 1.93 & 1.73 & $0.39 \pm $0.00 \\ 
 v2_l1 & 1.15 & 2.74 & 2.42 & $0.89 \pm $0.10 \\ 
 v2_l2 & 0.42 & 2.47 & 2.06 & $3.42 \pm $1.97 \\ 
 v2_l3 & 0.47 & 2.44 & 2.04 & $1.72 \pm $0.91 \\ 
 v1_l3 & 2.04 & 2.17 & 2.14 & $1.92 \pm $0.84 \\ 
 v2_l3 & 0.47 & 2.44 & 2.04 & $1.72 \pm $0.91 \\ 
 v3_l3 & 0.63 &

  latex_str = df.to_latex(index=False, escape=False, **kwargs)


## Table with the extrapolation errors

## 3BPA, energies

In [25]:
ext.label.unique()

array(['v2-base', 'v2-amsgrad', 'v2-ema', 'v2-swa', 'v3_l3', 'v2_l3',
       'v1_l3', 'v2_l2', 'v2_l1', 'v2_l0'], dtype=object)

In [29]:
EXT_ORDER

['v2-base',
 'v2-swa',
 'v2-ema',
 'v2-amsgrad',
 'v2_l0',
 'v2_l1',
 'v2_l2',
 'v1_l3',
 'v2_l3',
 'v3_l3']

In [36]:
EXT_COLS = ["T", 25, 125, 250, 500]
EXT_ORDER = MODELS["OPT"] + MODELS["V2"][:-1] + MODELS["VL3"]

_ext = ext.loc[ext.nsamples > 5]
_ext = _ext.groupby(["nsamples", "label"]).first().reset_index()

_exts = []

for T in [300, 600, 1200]:
    vals = f"test_{T}K_energy"
    _tmp = _ext.pivot(index="label", columns="nsamples", values=vals)
    _tmp = _tmp * 1000
    _tmp["T"] = T
    _exts.append(_tmp)

_ext_e = pd.concat(_exts)
_ext_e = _ext_e.sort_values(["label", "T"])[EXT_COLS].loc[EXT_ORDER].reset_index()

In [None]:
print(df_to_latex_table(_ext_e.copy(), "NequIP Energy RMSE", float_format="%.1f"))

## 3BPA, forces

In [37]:
EXT_COLS = ["T", 25, 125, 250, 500]
EXT_ORDER = MODELS["OPT"] + MODELS["V2"][:-1] + MODELS["VL3"]

_ext = ext.loc[ext.nsamples > 5]
_ext = _ext.groupby(["nsamples", "label"]).first().reset_index()

_exts = []

for T in [300, 600, 1200]:
    vals = f"test_{T}K_forces"
    _tmp = _ext.pivot(index="label", columns="nsamples", values=vals)
    _tmp = _tmp * 1000
    _tmp["T"] = T
    _exts.append(_tmp)

_ext_f = pd.concat(_exts)
_ext_f = _ext_f.sort_values(["label", "T"])[EXT_COLS].loc[EXT_ORDER].reset_index()

In [38]:
print(df_to_latex_table(_ext_f.copy(), "NequIP Forces RMSE", float_format="%.1f"))

\begin{table}[h]
\centering
\caption{NequIP Forces RMSE}
\begin{tabular}{lrrrrr} \hline \hline label & T & 25 & 125 & 250 & 500 \\ 
 \hline v2-base & 300 & 65.3 & 23.9 & 17.3 & 12.0 \\ 
 v2-base & 600 & 102.4 & 47.8 & 39.9 & 31.0 \\ 
 v2-base & 1200 & 203.9 & 127.2 & 121.9 & 107.2 \\ 
 v2-swa & 300 & 71.5 & 26.2 & 18.3 & 12.2 \\ 
 v2-swa & 600 & 116.1 & 50.2 & 41.3 & 31.5 \\ 
 v2-swa & 1200 & 243.5 & 128.4 & 125.8 & 109.8 \\ 
 v2-ema & 300 & 66.6 & 24.4 & 17.5 & 12.0 \\ 
 v2-ema & 600 & 104.4 & 49.6 & 40.2 & 31.0 \\ 
 v2-ema & 1200 & 208.8 & 136.6 & 121.3 & 109.1 \\ 
 v2-amsgrad & 300 & 64.3 & 23.6 & 16.1 & 10.6 \\ 
 v2-amsgrad & 600 & 103.4 & 46.6 & 35.5 & 26.7 \\ 
 v2-amsgrad & 1200 & 207.9 & 117.1 & 98.2 & 87.3 \\ 
 v2_l0 & 300 & 194.5 & 90.6 & 61.4 & 47.5 \\ 
 v2_l0 & 600 & 311.3 & 179.6 & 130.1 & 108.4 \\ 
 v2_l0 & 1200 & 491.6 & 353.8 & 320.6 & 288.1 \\ 
 v2_l1 & 300 & 125.6 & 42.7 & 29.0 & 19.4 \\ 
 v2_l1 & 600 & 188.7 & 84.8 & 63.4 & 46.9 \\ 
 v2_l1 & 1200 & 346.2 & 196.5 & 163

  latex_str = df.to_latex(index=False, escape=False, **kwargs)


In [39]:
_ext = pd.concat([
    _ext_e.set_index(["label", "T"]),
    _ext_f.set_index(["label", "T"]),
], axis=1).reset_index()

In [41]:
print(df_to_latex_table(_ext.copy(), "MACE RMSE", float_format="%.1f"))

\begin{table}[h]
\centering
\caption{MACE RMSE}
\begin{tabular}{lrrrrrrrrr} \hline \hline label & T & 25 & 125 & 250 & 500 & 25 & 125 & 250 & 500 \\ 
 \hline v2-base & 300 & 3.3 & 0.7 & 0.3 & 0.1 & 65.3 & 23.9 & 17.3 & 12.0 \\ 
 v2-base & 600 & 3.6 & 1.0 & 0.6 & 0.4 & 102.4 & 47.8 & 39.9 & 31.0 \\ 
 v2-base & 1200 & 5.9 & 1.9 & 1.9 & 1.6 & 203.9 & 127.2 & 121.9 & 107.2 \\ 
 v2-swa & 300 & 1.5 & 0.3 & 0.2 & 0.2 & 71.5 & 26.2 & 18.3 & 12.2 \\ 
 v2-swa & 600 & 1.8 & 0.8 & 0.6 & 0.5 & 116.1 & 50.2 & 41.3 & 31.5 \\ 
 v2-swa & 1200 & 5.6 & 2.2 & 2.1 & 1.9 & 243.5 & 128.4 & 125.8 & 109.8 \\ 
 v2-ema & 300 & 2.2 & 0.3 & 0.2 & 0.7 & 66.6 & 24.4 & 17.5 & 12.0 \\ 
 v2-ema & 600 & 2.5 & 0.8 & 0.6 & 0.8 & 104.4 & 49.6 & 40.2 & 31.0 \\ 
 v2-ema & 1200 & 5.0 & 2.3 & 1.9 & 2.1 & 208.8 & 136.6 & 121.3 & 109.1 \\ 
 v2-amsgrad & 300 & 1.9 & 0.4 & 0.2 & 0.1 & 64.3 & 23.6 & 16.1 & 10.6 \\ 
 v2-amsgrad & 600 & 2.0 & 0.8 & 0.5 & 0.4 & 103.4 & 46.6 & 35.5 & 26.7 \\ 
 v2-amsgrad & 1200 & 4.1 & 1.9 & 1.6 & 1.4 

  latex_str = df.to_latex(index=False, escape=False, **kwargs)
