# APDFT Analysis

Predictions can use

- **QC**: HF, CCSD, or CCSD(T) energy differences with the species of interests or target,
- **APDFT**: HF, CCSD, or CCSD(T) energy differences by changing the nuclear charge of other systems that have the same number of electrons as the target,
- **APDFT*n***: Taylor series approximation with order *n* of the APDFT potential energy surface.

In [1]:
import numpy as np
import pandas as pd

from apdft_tools.utils import hartree_to_ev, all_dimer_systems
from apdft_tools.data import prepare_dfs
from apdft_tools.prediction import get_qc_change_charge_dimer, get_apdft_change_charge_dimer
from apdft_tools.analysis import apdft_error_change_charge_dimer

json_path = '/home/alex/Dropbox/keith/projects/apdft/apdft_tools/json-data/dimer-pyscf.apdft-data.posthf.json'
df_qc, df_apdft = prepare_dfs(json_path, get_CBS=False)

## APDFT prediction errors

There is some intrinsic error in modeling a target system (e.g., N atom) by changing the nuclear charge of a reference system's basis set (e.g., C<sup> &ndash;</sup> ).
The following cell computes this error, and represents the best performance for APDFT without fortuitous errors.

In [2]:
system_label = 'c.h'
delta_charge = 1
target_initial_charge = 0  # Initial charge of the system.
basis_set = 'cc-pV5Z'
lambda_specific_atom = 0
change_signs = False  # Multiple all predictions by negative one (e.g., for electron affinities)

n_points = 2
poly_order = 4

In [3]:
use_fin_diff = False  # Use finite differences with Taylor series for APDFT predictions.
remove_outliers = True

ip_qc_prediction = get_qc_change_charge_dimer(
    df_qc, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)
ip_apdft_predictions = get_apdft_change_charge_dimer(
    df_qc, df_apdft, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_fin_diff=use_fin_diff, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)

ip_qc_prediction = hartree_to_ev(ip_qc_prediction)
ip_apdft_predictions = {key:hartree_to_ev(value) for (key,value) in ip_apdft_predictions.items()}
ip_apdft_errors = {key:value-ip_qc_prediction for (key,value) in ip_apdft_predictions.items()}

print(f'PySCF prediction of IP for {system_label}: {ip_qc_prediction:.3f} eV\n')
print(f'APDFT prediction errors in eV:')
print(pd.DataFrame(ip_apdft_errors, index=[f'APDFT']))

PySCF prediction of IP for c.h: 10.625 eV

APDFT prediction errors in eV:
            n.h       b.h
APDFT -0.357684 -0.724528


## APDFT*n* prediction errors

Now, we can look at approximating the APDFT prediction by using a Taylor series centered on $\Delta Z = 0$.

In [4]:
use_fin_diff = True  # Use finite differences with Taylor series for APDFT predictions.
remove_outliers = True

ip_qc_prediction = get_qc_change_charge_dimer(
    df_qc, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)
ip_apdft_predictions = get_apdft_change_charge_dimer(
    df_qc, df_apdft, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_fin_diff=use_fin_diff, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)

ip_qc_prediction = hartree_to_ev(ip_qc_prediction)
ip_apdft_predictions = {key:hartree_to_ev(value) for (key,value) in ip_apdft_predictions.items()}
ip_apdft_errors = {key:value-ip_qc_prediction for (key,value) in ip_apdft_predictions.items()}

print(f'PySCF prediction of IP for {system_label}: {ip_qc_prediction:.3f} eV\n')
print(f'APDFTn prediction errors in eV:')
print(pd.DataFrame(ip_apdft_errors, index=[f'APDFT{n}' for n in range(5)]))

PySCF prediction of IP for c.h: 10.625 eV

APDFTn prediction errors in eV:
              n.h        b.h
APDFT0  15.824991 -10.676304
APDFT1  -2.474227  -2.933397
APDFT2  -0.037280  -0.197748
APDFT3  -0.089914   0.440954
APDFT4  -0.336727   1.183081


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


### Specifying lambda values

We can also specify specific lambda values to include. For example, we could only look at lambda values of +-1.

In [5]:
considered_lambdas = [-1]
use_fin_diff = True  # Use finite differences with Taylor series for APDFT predictions.
remove_outliers = True

ip_qc_prediction = get_qc_change_charge_dimer(
    df_qc, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)
ip_apdft_predictions = get_apdft_change_charge_dimer(
    df_qc, df_apdft, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_fin_diff=use_fin_diff, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers,
    considered_lambdas=considered_lambdas
)

ip_qc_prediction = hartree_to_ev(ip_qc_prediction)
ip_apdft_predictions = {key:hartree_to_ev(value) for (key,value) in ip_apdft_predictions.items()}
ip_apdft_errors = {key:value-ip_qc_prediction for (key,value) in ip_apdft_predictions.items()}

ip_qc_prediction = hartree_to_ev(ip_qc_prediction)
ip_apdft_predictions = {key:hartree_to_ev(value) for (key,value) in ip_apdft_predictions.items()}
ip_apdft_errors = {key:value-ip_qc_prediction for (key,value) in ip_apdft_predictions.items()}

print(f'PySCF prediction of IP for {system_label}: {ip_qc_prediction:.3f} eV\n')
print(f'APDFTn prediction errors in eV:')
print(pd.DataFrame(ip_apdft_errors, index=[f'APDFT{n}' for n in range(5)]))

  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


PySCF prediction of IP for c.h: 289.111 eV

APDFTn prediction errors in eV:
               n.h
APDFT0  430.619930
APDFT1  -67.327145
APDFT2   -1.014431
APDFT3   -2.446685
APDFT4   -9.162808


## APDFT*n* errors with respect to APDFT

Or you, can compute the difference between APDFT*n* (predictions with Taylor series) and APDFT.

In [6]:
compute_difference = True
use_fin_diff = True  # Use finite differences with Taylor series for APDFT predictions.
remove_outliers = True

ip_apdft_predictions = get_apdft_change_charge_dimer(
    df_qc, df_apdft, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_fin_diff=use_fin_diff, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers,
    compute_difference=compute_difference
)

ip_apdft_predictions = {key:hartree_to_ev(value) for (key,value) in ip_apdft_predictions.items()}

print(f'Differences between APDFTn and APDFT in eV:')
print(pd.DataFrame(ip_apdft_predictions, index=[f'APDFT{n}' for n in range(5)]))

Differences between APDFTn and APDFT in eV:
              n.h        b.h
APDFT0  26.449619  -0.051676
APDFT1   8.150401   7.691231
APDFT2  10.587348  10.426880
APDFT3  10.534714  11.065582
APDFT4  10.287901  11.807709


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


## Overall statistics

We can also compute mean absolute errors (MAEs), root mean squared error (RMSE) and max error.

In [7]:
all_systems = all_dimer_systems[1:-1]
target_initial_charge = 0

considered_lambdas = None
compute_difference = False
use_fin_diff = True  # Use finite differences with Taylor series for APDFT predictions.
remove_outliers = True
delta_charge = 1
max_apdft_order = 4

for i in range(len(all_systems)):
    sys_error = apdft_error_change_charge_dimer(
        df_qc, df_apdft, all_systems[i], delta_charge,
        target_initial_charge=target_initial_charge,
        change_signs=change_signs, basis_set=basis_set,
        use_fin_diff=use_fin_diff, lambda_specific_atom=lambda_specific_atom,
        n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers,
        compute_difference=compute_difference, considered_lambdas=considered_lambdas
    )
    if i == 0:
        all_error = sys_error
    else:
        all_error = pd.concat(
            [all_error, sys_error], axis=1
        )

if use_fin_diff or compute_difference == True:
    # MAE
    for n in range(0, max_apdft_order+1):
        apdftn_errors = all_error.iloc[n].values
        apdftn_mae = np.mean(np.abs(apdftn_errors))
        print(f'APDFT{n} MAE: {apdftn_mae:.4f} eV')

    # RMSE
    print()
    for n in range(0, max_apdft_order+1):
        apdftn_errors = all_error.iloc[n].values
        apdftn_rmse = np.sqrt(np.mean((apdftn_errors)**2))
        print(f'APDFT{n} RMSE: {apdftn_rmse:.4f} eV')
    
    # Max
    print()
    for n in range(0, max_apdft_order+1):
        apdftn_errors = all_error.iloc[n].values
        apdftn_max = np.max(np.abs(apdftn_errors))
        print(f'APDFT{n} max abs.: {apdftn_max:.4f} eV')
else:
    # MAE
    apdftn_errors = all_error.iloc[0].values
    apdftn_mae = np.mean(np.abs(apdftn_errors))
    print(f'APDFT MAE: {apdftn_mae:.4f} eV')

    # RMSE
    print()
    apdftn_rmse = np.sqrt(np.mean((apdftn_errors)**2))
    print(f'APDFT RMSE: {apdftn_rmse:.4f} eV')

    # Max
    print()
    apdftn_max = np.max(np.abs(apdftn_errors))
    print(f'APDFT max Abs.: {apdftn_max:.4f} eV')


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


APDFT0 MAE: 12.7656 eV
APDFT1 MAE: 2.3468 eV
APDFT2 MAE: 0.1797 eV
APDFT3 MAE: 0.6371 eV
APDFT4 MAE: 1.3647 eV

APDFT0 RMSE: 12.9730 eV
APDFT1 RMSE: 2.4321 eV
APDFT2 RMSE: 0.2127 eV
APDFT3 RMSE: 0.9307 eV
APDFT4 RMSE: 2.5191 eV

APDFT0 max abs.: 15.8250 eV
APDFT1 max abs.: 3.2526 eV
APDFT2 max abs.: 0.4482 eV
APDFT3 max abs.: 2.1746 eV
APDFT4 max abs.: 8.0994 eV
