In [None]:
import torch
from pathlib import Path
import numpy as np
import pandas as pd
from tensorboard.backend.event_processing.event_multiplexer import EventMultiplexer
import matplotlib as mpl
import matplotlib.pyplot as plt
from itertools import product
from scipy.interpolate import Rbf
from uncertainties import ufloat
from scipy.special import erf
from matplotlib.patches import Rectangle

from deepqmc.wf.paulinet import DistanceBasis

In [None]:
# needs to be in a separate cell, see https://github.com/ipython/ipython/issues/11098
mpl.rcParams['figure.dpi'] = 100 
mpl.rc('font', family='serif', serif='STIXGeneral', size=10)
mpl.rc('mathtext', fontset='stix')
COLORS = plt.rcParams['axes.prop_cycle'].by_key()['color']

In [None]:
def savefig(fig, name, ext='pdf', **kwargs):
    fig.savefig(
        f'../pub/figs/{name}.{ext}',
        transparent=True,
        bbox_inches='tight',
        pad_inches=0,
        **kwargs
    )

## Learning curves

In [None]:
ref_energies = {
    'H2': (-1.133509, -1.17447),
    'LiH': (-7.98737, -8.070548),
    'Be': (-14.57301, -14.66736),
    'B': (-24.53316, -24.65391),
    'H10-1.8': (-5.3974043, -5.6655),
}

def to_corr(x, ref):
    return (ref[0] - x) / (ref[0] - ref[1])

In [None]:
mplx = EventMultiplexer()
mplx.AddRunsFromDirectory('../runs/r007')
mplx.AddRunsFromDirectory('../runs/r009')
mplx.Reload()

In [None]:
runs = list(mplx.Runs().keys())
runs

In [None]:
df = (
    pd.concat(
        {
            (*run.split('/'), var):
            pd.DataFrame(mplx.Scalars(run, f'E_loc/{var}')).set_index('step')
            for var in ['mean', 'var']
            for run in runs
            if ('H10' not in run or '1.8' in run)
        },
        names=['system', 'ansatz', 'var']
    )
    .unstack('var')
    .drop(columns=('wall_time', 'var'))
)
df.columns = ['wall_time', 'E_mean', 'E_var']
df.head()

In [None]:
df.index.levels

In [None]:
def ewm(x, X, Y, widths):
    ws = np.where(x[:, None] >= X, 1 / widths * np.exp(-(x[:, None] - X) / widths), 0)
    return (Y * ws).sum(axis=-1) / ws.sum(axis=-1)
    

def plot_mean_var(ax, df, ref_enes, bs, plot_sd=True):
    step, E_mean, E_var = df.reset_index()[['step', 'E_mean', 'E_var']].values.T
    # l_mean, = ax.plot(step, 1 - to_corr(df.E_mean.ewm(alpha=0.04).mean(), ref_enes))
    x = np.hstack([[0], np.logspace(0, np.log10(step.max()), 1000)])
    err = 1 - to_corr(df.E_mean.values, ref_enes)
    bstd = np.sqrt(df.E_var.ewm(alpha=0.04).mean().values)/(ref_enes[0]-ref_enes[1])/np.sqrt(bs)
    # ax.scatter(step, bstd/err)
    ws = 2/np.where(err > 0.3e-2, err, 0.3e-2)  #  * 20 * np.sqrt(df.E_var.ewm(alpha=0.04).mean().values)
    if plot_sd:
        l_var, = ax.plot(x, ewm(x, step, np.sqrt(E_var), 20)/(ref_enes[0]-ref_enes[1])/np.sqrt(bs), linestyle='dashed', color='black')
    else:
        l_var = None
    l_mean, = ax.plot(x, 1 - to_corr(ewm(x, step, E_mean, ws), ref_enes))
    ax.axhline(0.01, color='black', ls='dotted')
    ax.set_xscale('log')
    ax.set_yscale('log')
    # ax2 = ax.twinx()
    # ax2.set_yscale('log')
    ax.set_xlabel('iterations')
    ax.set_ylabel(r'$1-(E-E_\mathrm{HF})/(E_0-E_\mathrm{HF})$')
    # ax2.set_ylabel('std(E)')
    return l_mean, l_var


fig, ax = plt.subplots()
ax.set_yscale('log')
ax.set_xscale('log')


plot_mean_var(ax, df.loc(0)['LiH', 'MD-SJBF'], ref_energies['LiH'], 10_000);

In [None]:
fig, axes = plt.subplots(5, 4, figsize=(3.4, 5.5), gridspec_kw={'hspace': 0.06, 'wspace': 0.04})
for (i, system), (j, ansatz) in product(
    enumerate(['H2', 'LiH', 'Be', 'B', 'H10-1.8']),
    enumerate(['SD-SJ', 'SD-SJBF', 'MD-SJ', 'MD-SJBF']),
):
    ax = axes[i, j]
    l_mean, l_var = plot_mean_var(
        ax,
        df.loc(0)[system, ansatz],
        ref_energies[system],
        10_000 if 'H10' not in system else 4_000
    )
    ax.set_xticks([1, 10, 100, 1000])
    ax.set_xticklabels([1, '', '', r'$10^3$'])
    ax.set_yticks([1e-3, 1e-2, 1e-1, 1])
    ax.set_yticklabels(['0.1%', '1%', '10%', '100%'])
    # ax.set_title(f'{system} – {ansatz}', fontdict={'fontsize': 10})
    if i == 0:
        ax.set_title(ansatz, fontdict={'fontsize': 10})
    if j == 0:
        ax.set_ylabel(system.replace('-1.8', ''), labelpad=17)
    ax.axhline(1, color='black', ls='dotted')
    ax.set_xlim(1, 1e4)
    ax.set_ylim(1e-3, 8)
    # ax2.set_ylim(1e-5, 10)
    if i != 4:
        ax.set_xticks([])
    # else:
    #     ax.set_xticks([2.5, 5, 7.5, 25, 50, 75, 250, 500, 750, 2500, 5000, 7500], minor=True)
    ax.set_xlabel(None)
    if j != 0:
        ax.set_yticks([])
        ax.set_ylabel(None)
    # else:
    #     ax.set_yticks(np.array([0.25, 0.5, 0.75, 2.5, 5, 7.5, 25, 50, 75, 250, 500])/100, minor=True)
    # if j != 3:
    #     ax2.set_yticks([])
    #     ax2.set_ylabel(None)
fig.legend(
    [l_mean, l_var],
    ['mean', 'SD of mean'],
    loc='lower center',
    bbox_to_anchor=(0.65, 0.0),
    ncol=2,
)
fig.text(-0.03, 0.6, 'correlation energy error', rotation='vertical', ha='center')
fig.text(0.5, 0.08, 'iterations', ha='center')
fig.tight_layout()
fig.subplots_adjust(bottom=0.15) 

In [None]:
fig.savefig('../pub/figs/learning-curves.pdf', bbox_inches='tight')

## Interaction curves

In [None]:
ene_h2_hf = np.load('../data/h2/discurve_H2_E_hf_ccpVQZ.npy')
ene_h2_fci = np.load('../data/h2/discurve_H2_E_fci_ccpVQZ.npy')
ene_h2_dl = np.load('../data/h2/discurve_H2_mean.npy')

In [None]:
ene_h10_hf = []
for p in sorted(Path('../data/motta-hydrogen/N_10_OBC').glob('*/RHF_CBS')):
    ene_h10_hf.append((float(p.parent.name[2:]), *np.loadtxt(p)))
ene_h10_hf = np.array(ene_h10_hf)
ene_h10_exact = []
for p in sorted(Path('../data/motta-hydrogen/N_10_OBC').glob('*/MRCI+Q+F12_CBS')):
    ene_h10_exact.append((float(p.parent.name[2:]), *np.loadtxt(p)))
ene_h10_exact = np.array(ene_h10_exact)

In [None]:
ene_h10_paulinet = np.array([-5.129,-5.460,-5.616,-5.651,-5.626,-5.495,-5.340,-5.218,-5.107])
ene_h10_deepwf2 = np.array("""
0.99967873	-0.43235004
1.2030338	-0.5072482
1.4010332	-0.5403461
1.5985633	-0.5509359
1.800484	-0.55702364
1.9976655	-0.5508931
2.3964996	-0.5379886
2.7951126	-0.5144731
3.1983907	-0.4996388
3.6016958	-0.4860907
""".split()).astype(float).reshape(-1, 2)[:, 1]

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(5.5, 1.5), sharey=True, sharex=True)
axes[0].plot(np.arange(.8, 3.65, .2), ene_h2_hf[1:]/2)
l1, = axes[0].plot(np.arange(.8, 3.65, .2), ene_h2_dl[1:]/2)
axes[0].plot(np.arange(.8, 3.65, .2), ene_h2_fci[1:]/2, color='black', ls='dotted')
l0, = axes[1].plot(ene_h10_hf[1:, 0], ene_h10_hf[1:, 1]/10)
axes[1].plot(ene_h10_hf[1:, 0], np.array(ene_h10_paulinet)/10, color=COLORS[1])
l2, = axes[1].plot(ene_h10_hf[1:, 0], np.array(ene_h10_deepwf2[1:]), color=COLORS[2])
l3, = axes[1].plot(ene_h10_exact[1:, 0], ene_h10_exact[1:, 1]/10, color='black', ls='dotted' )
for i in range(2):
    axes[i].set_xlabel(r'$d_\mathrm{HH}/a_0$')
axes[0].set_ylabel(r'$E_0/(N_\mathrm{H}E_\mathrm{h})$')
axes[0].text(.8, -.48, '(a)')
axes[1].text(.8, -.48, '(b)')
fig.legend(
    [l0, l2, l1, l3],
    ['Hartree–Fock', 'DeepWF', 'PauliNet', 'exact'],
    loc='center left', bbox_to_anchor=(.95, 0.7), ncol=1
)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(3.5, 1.85), sharey=True, sharex=True)
axes[0].plot(np.arange(.8, 3.65, .2), ene_h2_hf[1:]/2)
l1, = axes[0].plot(np.arange(.8, 3.65, .2), ene_h2_dl[1:]/2)
axes[0].plot(np.arange(.8, 3.65, .2), ene_h2_fci[1:]/2, color='black', ls='dotted')
l0, = axes[1].plot(ene_h10_hf[1:, 0], ene_h10_hf[1:, 1]/10)
axes[1].plot(ene_h10_hf[1:, 0], np.array(ene_h10_paulinet)/10, color=COLORS[1])
l2, = axes[1].plot(ene_h10_hf[1:, 0], np.array(ene_h10_deepwf2[1:]), color=COLORS[2])
l3, = axes[1].plot(ene_h10_exact[1:, 0], ene_h10_exact[1:, 1]/10, color='black', ls='dotted' )
for i in range(2):
    axes[i].set_xlabel(r'$d_\mathrm{HH}/a_0$')
axes[0].set_ylabel(r'$E_0/(N_\mathrm{H}E_\mathrm{h})$')
axes[0].text(.8, -.48, '(a)')
axes[1].text(.8, -.48, '(b)')
axes[0].set_yticks([-0.5, -0.6])
fig.legend(
    [l0, l1, l2, l3],
    ['Hartree–Fock', 'PauliNet', 'DeepWF', 'exact'],
    loc='center left', bbox_to_anchor=(.2, 1.05), ncol=2
)
fig.tight_layout()
fig.savefig('../pub/figs/int-curves.pdf', bbox_inches='tight')

## Distance basis

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
x = torch.linspace(0, 12, 300)
ax.plot(x.numpy(), DistanceBasis(32, envelope='nocusp')(x).numpy());
ax.set_xlabel(r'$r/a_0$')
ax.set_ylabel(r'$\mathbf{e}(r)$')
ax.set_yticks([0, 0.4])
fig.savefig('../pub/figs/dist-features.pdf', bbox_inches='tight')

## Determinants

- NN-QMC: 1s to 10s
- standard MD-QMC: 100s to 100,000s
- NN-CI: 100,000s
- FCI-QMC: 1,000,000s to 100,000,000s
- FCI: to 100,000,000s to 1,000,000,000s

In [None]:
def sstep(x):
    return np.piecewise(
        x,
        [x <= 0, x < 1],
        [0, lambda x: -20 * x ** 7 + 70 * x ** 6 - 84 * x ** 5 + 35 * x ** 4, 1],
    )


def get_bar(ws, vmax=1, dens=100):
    return np.hstack(
        [
            np.linspace(0, vmax, dens * ws[0]),
            vmax * np.ones(dens * ws[1]),
            np.linspace(vmax, 0, dens * ws[2]),
        ]
    )


fig, ax = plt.subplots(figsize=(2.7, 1.6))
payload = [
    ('multideterminant\nQMC + NNs', 2, 50),
    ('multideterminant QMC', 100, 1e5),
    ('configuration\ninteraction + NNs', 1e5, 1e6),
    ('configuration interaction', 2e6, 2e9),
]

for i, (_, fro, to) in enumerate(payload):
    ax.add_patch(Rectangle((fro, i + 0.1), to - fro, 0.8, color='grey'))
ax.set_xlim(1, 1e10)
ax.set_ylim(0, 4)
ax.set_xscale('log')
ax.set_xlabel('number of determinants')
ax.axvline(1e5, color='black', ls='dashed')
ax.text(2.5, 4.3, '1st quantization', fontstyle='italic')
ax.text(2.0e5, 4.3, '2nd quantization', fontstyle='italic')
ax.set_yticks([0.5, 1.5, 2.5, 3.5])
ax.set_yticklabels([l for l, *_ in payload], ha='right', fontsize=9)
savefig(fig, 'ndets')

## Cyclobutadiene

In [None]:
results = pd.read_csv('../data/processed/cyclobutadiene-fit.csv')

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(6, 3), sharex=True, sharey=True)
for (batch, state), traj in results.groupby(['batch', 'state']):
    col = {250: 0, 375: 1}[batch]
    ax = axes[col]
    ax.plot(traj['step'], traj['energy_ewm'], label=state)
    ax.set_title((batch, state))
for ax in axes.flat:
    ax.set_ylim(-154.65, -154.0)
    ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(1000))
    ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(250))
    ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.1))
    ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.02))
    ax.grid(axis='y', which='major')
    ax.grid(axis='y', which='minor', ls='dotted')
    ax.grid(axis='x', which='major')
    ax.grid(axis='x', which='minor', ls='dotted')
axes[1].legend()
fig.tight_layout()
savefig(fig, 'cyclobutadiene-training')

In [None]:
results = pd.read_csv('../data/processed/cyclobutadiene-sample.csv')
(
    results
    .groupby(['batch', 'state'])
    .apply(lambda x: ufloat(x['energy'].mean(), x['energy'].std() / np.sqrt(len(x))))
    .unstack()
    .pipe(lambda x: 632 * (x['ground'] - x['transition']))
)