# Electron affinities


In [1]:
import numpy as np
import pandas as pd

from qa_tools.utils import *
from qa_tools.data import prepare_dfs
from qa_tools.prediction import *
from qa_tools.analysis import *

json_path = '../../json-data/dimer-pyscf.qa-data.posthf.json'
df_qc, df_qats = prepare_dfs(json_path, get_CBS=False)

## QA prediction errors

There is some intrinsic error in modeling a target system (e.g., N atom) by changing the nuclear charge of a reference system's basis set (e.g., C<sup> &ndash;</sup> ).

In [2]:
system_label = 'o.h'
delta_charge = -1
target_initial_charge = 0  # Initial charge of the system.
basis_set = 'cc-pV5Z'
lambda_specific_atom = 0
change_signs = True  # Multiple all predictions by negative one (e.g., for electron affinities)

n_points = 2
poly_order = 4

In [3]:
use_ts = False  
remove_outliers = False

ea_qc_prediction = energy_change_charge_qc_dimer(
    df_qc, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)
ea_qats_predictions = energy_change_charge_qa_dimer(
    df_qc, df_qats, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_ts=use_ts, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)

ea_qc_prediction = hartree_to_ev(ea_qc_prediction)
ea_qats_predictions = {key:hartree_to_ev(value) for (key,value) in ea_qats_predictions.items()}
ea_qats_errors = {key:value-ea_qc_prediction for (key,value) in ea_qats_predictions.items()}

print(f'PySCF prediction of EA for {system_label}: {ea_qc_prediction:.3f} eV\n')
print(f'QA prediction errors in eV:')
print(pd.DataFrame(ea_qats_errors, index=[f'QA']))

PySCF prediction of EA for o.h: 1.523 eV

QA prediction errors in eV:
        ne.h      f.h
QA -0.549268 -0.23036


## QATS-*n* prediction errors

Now, we can look at approximating the QA prediction by using a Taylor series centered on $\Delta Z = 0$.

In [4]:
use_ts = True  
remove_outliers = False

ea_qc_prediction = energy_change_charge_qc_dimer(
    df_qc, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)
ea_qats_predictions = energy_change_charge_qa_dimer(
    df_qc, df_qats, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_ts=use_ts, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)

ea_qc_prediction = hartree_to_ev(ea_qc_prediction)
ea_qats_predictions = {key:hartree_to_ev(value) for (key,value) in ea_qats_predictions.items()}
ea_qats_errors = {key:value-ea_qc_prediction for (key,value) in ea_qats_predictions.items()}

print(f'PySCF prediction of EA for {system_label}: {ea_qc_prediction:.3f} eV\n')
print(f'QATS-n prediction errors in eV:')
print(pd.DataFrame(ea_qats_errors, index=[f'QATS-{n}' for n in range(5)]))

PySCF prediction of EA for o.h: 1.523 eV

QATS-n prediction errors in eV:
             ne.h        f.h
QATS-0  29.667766  14.568295
QATS-1 -11.567414  -2.749806
QATS-2   0.953872  -0.067438
QATS-3   0.778176  -0.178758
QATS-4  -1.915091 -12.184128


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


### Specifying lambda values

We can also specify specific lambda values to include. For example, we could only look at lambda values of +-1.

In [5]:
considered_lambdas = [-1]
use_ts = True  
remove_outliers = False

ea_qc_prediction = energy_change_charge_qc_dimer(
    df_qc, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers
)
ea_qats_predictions = energy_change_charge_qa_dimer(
    df_qc, df_qats, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_ts=use_ts, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers,
    considered_lambdas=considered_lambdas
)

ea_qc_prediction = hartree_to_ev(ea_qc_prediction)
ea_qats_predictions = {key:hartree_to_ev(value) for (key,value) in ea_qats_predictions.items()}
ea_qats_errors = {key:value-ea_qc_prediction for (key,value) in ea_qats_predictions.items()}

print(f'PySCF prediction of EA for {system_label}: {ea_qc_prediction:.3f} eV\n')
print(f'QATS-n prediction errors in eV:')
print(pd.DataFrame(ea_qats_errors, index=[f'QATS-{n}' for n in range(5)]))

PySCF prediction of EA for o.h: 1.523 eV

QATS-n prediction errors in eV:
              f.h
QATS-0  14.568295
QATS-1  -2.749806
QATS-2  -0.067438
QATS-3  -0.178758
QATS-4 -12.184128


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


## QATS-*n* errors with respect to QA

Or you, can compute the difference between QATS-*n* (predictions with Taylor series) and QA.

In [6]:
return_qats_vs_qa = True
use_ts = True  
remove_outliers = False

ea_qats_predictions = energy_change_charge_qa_dimer(
    df_qc, df_qats, system_label, delta_charge,
    target_initial_charge=target_initial_charge,
    change_signs=change_signs, basis_set=basis_set,
    use_ts=use_ts, lambda_specific_atom=lambda_specific_atom,
    n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers,
    return_qats_vs_qa=return_qats_vs_qa
)

ea_qats_predictions = {key:hartree_to_ev(value) for (key,value) in ea_qats_predictions.items()}

print(f'Differences between QATS-n and QA in eV:')
print(pd.DataFrame(ea_qats_predictions, index=[f'QATS-{n}' for n in range(5)]))

Differences between QATS-n and QA in eV:
             ne.h        f.h
QATS-0  30.217033  14.798655
QATS-1 -11.018146  -2.519446
QATS-2   1.503140   0.162922
QATS-3   1.327444   0.051602
QATS-4  -1.365824 -11.953768


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


## Overall statistics

We can also compute mean absolute errors (MAEs), root mean squared error (RMSE) and max error.

In [7]:
all_systems = all_dimer_systems[0:-2]
target_initial_charge = 0

considered_lambdas = None
return_qats_vs_qa = False
use_ts = True  
remove_outliers = False
max_qats_order = 4

for i in range(len(all_systems)):
    sys_error = error_change_charge_qats_dimer(
        df_qc, df_qats, all_systems[i], delta_charge,
        target_initial_charge=target_initial_charge,
        change_signs=change_signs, basis_set=basis_set,
        use_ts=use_ts, lambda_specific_atom=lambda_specific_atom,
        n_points=n_points, poly_order=poly_order, remove_outliers=remove_outliers,
        return_qats_vs_qa=return_qats_vs_qa, considered_lambdas=considered_lambdas
    )
    if i == 0:
        all_error = sys_error
    else:
        all_error = pd.concat(
            [all_error, sys_error], axis=1
        )

if use_ts or return_qats_vs_qa == True:
    # MAE
    for n in range(0, max_qats_order+1):
        qatsn_errors = all_error.iloc[n].values
        qatsn_mae = np.mean(np.abs(qatsn_errors))
        print(f'QATS-{n} MAE: {qatsn_mae:.3f} eV')

    # RMSE
    print()
    for n in range(0, max_qats_order+1):
        qatsn_errors = all_error.iloc[n].values
        qatsn_rmse = np.sqrt(np.mean((qatsn_errors)**2))
        print(f'QATS-{n} RMSE: {qatsn_rmse:.3f} eV')
    
    # Max
    print()
    for n in range(0, max_qats_order+1):
        qatsn_errors = all_error.iloc[n].values
        qatsn_max = np.max(np.abs(qatsn_errors))
        print(f'QATS-{n} max abs.: {qatsn_max:.3f} eV')
else:
    # MAE
    qatsn_errors = all_error.iloc[0].values
    qatsn_mae = np.mean(np.abs(qatsn_errors))
    print(f'QA MAE: {qatsn_mae:.3f} eV')

    # RMSE
    print()
    qatsn_rmse = np.sqrt(np.mean((qatsn_errors)**2))
    print(f'QA RMSE: {qatsn_rmse:.3f} eV')

    # Max
    print()
    qatsn_max = np.max(np.abs(qatsn_errors))
    print(f'QA max abs.: {qatsn_max:.3f} eV')


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(


QATS-0 MAE: 18.770 eV
QATS-1 MAE: 6.710 eV
QATS-2 MAE: 1.244 eV
QATS-3 MAE: 5.974 eV
QATS-4 MAE: 22.025 eV

QATS-0 RMSE: 20.350 eV
QATS-1 RMSE: 7.840 eV
QATS-2 RMSE: 1.943 eV
QATS-3 RMSE: 9.249 eV
QATS-4 RMSE: 49.371 eV

QATS-0 max abs.: 29.668 eV
QATS-1 max abs.: 11.567 eV
QATS-2 max abs.: 4.621 eV
QATS-3 max abs.: 18.047 eV
QATS-4 max abs.: 159.445 eV


  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
  bond_lengths_fit, poly_coeffs = fit_dimer_poly(
