# APDFT Analysis of Multiplicity Gaps (MG)

Predictions can use

- **QC**: HF, CCSD, or CCSD(T) energy differences with the species of interests or target,
- **APDFT**: HF, CCSD, or CCSD(T) energy differences by changing the nuclear charge of other systems that have the same number of electrons as the target,
- **APDFT*n***: Taylor series approximation with order *n* of the APDFT potential energy surface.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors

from qa_tools.utils import hartree_to_ev, all_atom_systems
from qa_tools.data import prepare_dfs
from qa_tools.prediction import get_qc_excitation, get_qats_excitation
from qa_tools.analysis import qats_error_excitation_energy

json_path = '../../json-data/atom-pyscf.qa-data.posthf.json'
df_qc, df_qats = prepare_dfs(json_path, get_CBS=False)

## APDFT prediction errors

There is some intrinsic error in modeling a target system (e.g., N atom) by changing the nuclear charge of a reference system's basis set (e.g., C<sup> &ndash;</sup> ).
The following cell computes this error, and represents the best performance for APDFT without fortuitous errors.

In [2]:
system_label = 'n'
excitation_level = 1
target_charge = 0  # System charge
basis_set = 'aug-cc-pV5Z'  # aug-cc-pVTZ, aug-cc-pVQZ, aug-cc-pV5Z, CBS-aug

In [3]:
use_qats = False  # Use finite differences with Taylor series for APDFT predictions.

mg_qc_prediction = get_qc_excitation(
    df_qc, system_label, target_charge=target_charge,
    excitation_level=excitation_level,
    basis_set=basis_set
)
mg_qats_predictions = get_qats_excitation(
    df_qc, df_qats, system_label, target_charge=target_charge,
    excitation_level=excitation_level,
    basis_set=basis_set, use_qats=use_qats
)

mg_qc_prediction = hartree_to_ev(mg_qc_prediction)
mg_qats_predictions = {key:hartree_to_ev(value) for (key,value) in mg_qats_predictions.items()}
mg_qats_errors = {key:value-mg_qc_prediction for (key,value) in mg_qats_predictions.items()}

print(f'PySCF prediction of MG for {system_label}: {mg_qc_prediction:.3f} eV\n')
print(f'APDFT prediction errors in eV:')
print(pd.DataFrame(mg_qats_errors, index=[f'APDFT']))

PySCF prediction of MG for n: 2.679 eV

APDFT prediction errors in eV:
              b        c         f         o
APDFT  0.007094 -0.00066  0.001433  0.000469


## APDFT*n* prediction errors

Now, we can look at approximating the APDFT prediction by using a Taylor series centered on $\Delta Z = 0$.

In [4]:
use_qats = True  # Use finite differences with Taylor series for APDFT predictions.

mg_qc_prediction = get_qc_excitation(
    df_qc, system_label, target_charge=target_charge,
    excitation_level=excitation_level,
    basis_set=basis_set
)
mg_qats_predictions = get_qats_excitation(
    df_qc, df_qats, system_label, target_charge=target_charge,
    excitation_level=excitation_level,
    basis_set=basis_set, use_qats=use_qats
)

mg_qc_prediction = hartree_to_ev(mg_qc_prediction)
mg_qats_predictions = {key:hartree_to_ev(value) for (key,value) in mg_qats_predictions.items()}
mg_qats_errors = {key:value-mg_qc_prediction for (key,value) in mg_qats_predictions.items()}

print(f'PySCF prediction of MG for {system_label}: {mg_qc_prediction:.3f} eV\n')
print(f'APDFTn prediction errors in eV:')
print(pd.DataFrame(mg_qats_errors, index=[f'APDFT{n}' for n in range(5)]))

PySCF prediction of MG for n: 2.679 eV

APDFTn prediction errors in eV:
                 b             c         f         o
APDFT0   -2.365337     -1.196606  1.829270  0.945991
APDFT1   -2.452939      0.597803  0.100157  0.042022
APDFT2   -8.922763      6.668563  0.045459  0.013302
APDFT3  -26.595237   -543.715144  0.019743  0.004300
APDFT4  850.395494 -36837.482472  0.018841  0.001096


### Specifying lambda values

We can also specify specific lambda values to include. For example, we could only look at lambda values of +-1.

In [5]:
considered_lambdas = [-1, 1]

use_qats = True  # Use finite differences with Taylor series for APDFT predictions.

mg_qc_prediction = get_qc_excitation(
    df_qc, system_label, target_charge=target_charge,
    excitation_level=excitation_level,
    basis_set=basis_set
)
mg_qats_predictions = get_qats_excitation(
    df_qc, df_qats, system_label, target_charge=target_charge,
    excitation_level=excitation_level,
    basis_set=basis_set, use_qats=use_qats,
    considered_lambdas=considered_lambdas
)

mg_qc_prediction = hartree_to_ev(mg_qc_prediction)
mg_qats_predictions = {key:hartree_to_ev(value) for (key,value) in mg_qats_predictions.items()}
mg_qats_errors = {key:value-mg_qc_prediction for (key,value) in mg_qats_predictions.items()}

print(f'PySCF prediction of MG for {system_label}: {mg_qc_prediction:.3f} eV\n')
print(f'APDFTn prediction errors in eV:')
print(pd.DataFrame(mg_qats_errors, index=[f'APDFT{n}' for n in range(5)]))

PySCF prediction of MG for n: 2.679 eV

APDFTn prediction errors in eV:
                   c         o
APDFT0     -1.196606  0.945991
APDFT1      0.597803  0.042022
APDFT2      6.668563  0.013302
APDFT3   -543.715144  0.004300
APDFT4 -36837.482472  0.001096


## APDFT*n* errors with respect to APDFT

Or you, can compute the difference between APDFT*n* (predictions with Taylor series) and APDFT.

In [6]:
compute_difference = True  # Returns APDFTn - APDFT instead of energy predictions.

use_qats = True  # Use finite differences with Taylor series for APDFT predictions.

mg_qats_predictions = get_qats_excitation(
    df_qc, df_qats, system_label, target_charge=target_charge,
    excitation_level=excitation_level,
    basis_set=basis_set, use_qats=use_qats,
    compute_difference=compute_difference
)

mg_qats_predictions = {key:hartree_to_ev(value) for (key,value) in mg_qats_predictions.items()}

print(f'Differences between APDFTn and APDFT in eV:')
print(pd.DataFrame(mg_qats_predictions, index=[f'APDFT{n}' for n in range(5)]))

Differences between APDFTn and APDFT in eV:
                 b             c         f         o
APDFT0   -2.372431     -1.195946  1.827837  0.945521
APDFT1   -2.460033      0.598463  0.098724  0.041553
APDFT2   -8.929857      6.669223  0.044026  0.012833
APDFT3  -26.602331   -543.714485  0.018310  0.003830
APDFT4  850.388400 -36837.481812  0.017408  0.000627


## Overall statistics

We can also compute mean absolute errors (MAEs), root mean squared error (RMSE) and max error.

In [7]:
all_systems = all_atom_systems[:]
basis_set = 'aug-cc-pV5Z'
target_charge = 0
excitation_level = 1

use_qats = True
considered_lambdas = None
compute_difference = False

max_qa_order = 4

for i in range(len(all_systems)):
    sys_error = qats_error_excitation_energy(
        df_qc, df_qats, all_systems[i], target_charge=target_charge,
        excitation_level=excitation_level, basis_set=basis_set,
        use_qats=use_qats,
        compute_difference=compute_difference,
        considered_lambdas=considered_lambdas
    )
    if i == 0:
        all_error = sys_error
    else:
        all_error = pd.concat(
            [all_error, sys_error], axis=1
        )

if use_qats or compute_difference == True:
    # MAE
    for n in range(0, max_qa_order+1):
        apdftn_errors = all_error.iloc[n].values
        apdftn_mae = np.mean(np.abs(apdftn_errors))
        print(f'APDFT{n} MAE: {apdftn_mae:.1f} eV')

    # RMSE
    print()
    for n in range(0, max_qa_order+1):
        apdftn_errors = all_error.iloc[n].values
        apdftn_rmse = np.sqrt(np.mean((apdftn_errors)**2))
        print(f'APDFT{n} RMSE: {apdftn_rmse:.1f} eV')
    
    # Max
    print()
    for n in range(0, max_qa_order+1):
        apdftn_errors = all_error.iloc[n].values
        apdftn_max = np.max(np.abs(apdftn_errors))
        print(f'APDFT{n} Max Abs.: {apdftn_max:.1f} eV')
else:
    # MAE
    apdftn_errors = all_error.iloc[0].values
    apdftn_mae = np.mean(np.abs(apdftn_errors))
    print(f'APDFT MAE: {apdftn_mae:.1f} eV')

    # RMSE
    print()
    apdftn_rmse = np.sqrt(np.mean((apdftn_errors)**2))
    print(f'APDFT RMSE: {apdftn_rmse:.1f} eV')

    # Max
    print()
    apdftn_max = np.max(np.abs(apdftn_errors))
    print(f'APDFT Max Abs.: {apdftn_max:.1f} eV')


APDFT0 MAE: 12.5 eV
APDFT1 MAE: 6.6 eV
APDFT2 MAE: 447.9 eV
APDFT3 MAE: 22890.7 eV
APDFT4 MAE: 4341314.0 eV

APDFT0 RMSE: 26.5 eV
APDFT1 RMSE: 14.7 eV
APDFT2 RMSE: 2189.1 eV
APDFT3 RMSE: 99484.6 eV
APDFT4 RMSE: 21888096.7 eV

APDFT0 Max Abs.: 137.3 eV
APDFT1 Max Abs.: 74.4 eV
APDFT2 Max Abs.: 16434.6 eV
APDFT3 Max Abs.: 565038.7 eV
APDFT4 Max Abs.: 164423334.1 eV
