This nb is written in python3. The nb has to be run WITHOUT the source of the BigDFT package, otherwise the import yaml gives rise to a conflict.

# Comparison of results

In [1]:
import yaml
import numpy as np
from tabulate import tabulate

AuToA = 0.5291772085**3

## Load the BG and the refernce data

In [2]:
# Functions to rearrange the BG data using the psp as the leading parameter
def get_field(full_results,study):
    """
    Retrieve the value of the field that provided the tolerance of the options
    """
    tol=full_results['options']['atol']
    field=full_results[study]['alpha_convergence']
    if field is not None: field=field[0]['f']
    return tol,field
def alpha_is_reliable(full_results,study):
    """
    Provide a boolean stating if the given study has a reliable statical polarizability
    """
    tol,field=get_field(full_results,study)
    if field is None or field == 0.0001: return False
    return True

def study_matches(study,key):
    """
    Returns the rest of the tuple of the study
    """
    #case of a string
    if type(key) == type('a'): return [i for i in study if i != key]
    #case of a tuple
    return [i for i in study if i not in list(key)]

def get_result(full_results,study,key):
    null=(None,None,None)
    if study == 'options' or not alpha_is_reliable(full_results,study): return null
    dataname=study_matches(study,key)
    th=2 if type(key) == type('a') else 3-len(key)
    if len(dataname)> th: return null
    result=full_results[study]['alpha_convergence']
    tol,field=get_field(full_results,study)
    return result,dataname,field

def reformat_result(full_results,key):
    """
    Works with a name of a (psp) as key
    """
    res={}
    import numpy
    for study in full_results:
        result,dataname,field = get_result(full_results,study,key)
        if result is None: continue
        alpha=numpy.array(result[1])#*AuToA
        st=tuple(dataname)
        mol=st[0]
        xc=st[1] if st[1] != 'lda_pw' else 'lda-SPW92'
        res.setdefault(mol,{})
        res[mol].update({xc:alpha,xc+'-f':field})
    return res

In [3]:
HG_data=yaml.load(open('HG Dataset/hg_data.yaml'))
HG_data['H']['spin_pol'] = 'sp'

MWdata=yaml.load(open('mw_data_0001v2.yaml'))
MW_data={mol:{xcref: MWdata[mol][xc]['diagonal'] \
              for xcref,xc in [('lda-SPW92','lda'),('pbe','pbe')]} for mol in MWdata}

full_results = yaml.load(open('Calculations/full_results.yaml'))

BG_data={}
for psp in ['hgh_k','nlcc_aw','nlcc_ss']:
    BG_data[psp]=reformat_result(full_results,(psp))

Define the list of the nsp and sp molecules used the attribution made in HG dataset

In [4]:
molecule_nsp = []
molecule_sp = []
for mol in HG_data:
    if HG_data[mol]['spin_pol'] == 'nsp' : molecule_nsp.append(mol)
    if HG_data[mol]['spin_pol'] == 'sp' : molecule_sp.append(mol)
molecule_nsp = sorted(molecule_nsp)
molecule_sp = sorted(molecule_sp)

In [5]:
# Routines to compute the errors
def relative_error(alpha,alpha_ref):
    """
    Convert the reference value in a.u. and
    compute the relative error
    """
    alpha_ref = np.array(alpha_ref)/AuToA
    return 100.0*(alpha-alpha_ref)/alpha_ref


## PSP = hgh_k

In [27]:
data = BG_data['hgh_k']
molecule_bg = list(data.keys())
molecule_mw = list(MW_data.keys())
molecule_hg = list(HG_data.keys())

missing_mw = ['CH30','PS','S2','SH']

In [28]:
data['CO']

{'lda-SPW92': array([12.628823, 12.628823, 15.86413 ]),
 'lda-SPW92-f': 0.005,
 'pbe': array([12.509689 , 12.509689 , 15.9052996]),
 'pbe-f': 0.005,
 'pbe0': array([12.010789, 12.010789, 15.320584]),
 'pbe0-f': 0.005}

In [29]:
MW_data['co']

{'lda-SPW92': [1.868431750384668, 1.8684317500142003, 2.3535110461925104],
 'pbe': [1.8532465936431202, 1.8532465936431202, 2.359389168196766]}

In [30]:
HG_data['CO']

{'CCSD(T)': [1.753, 1.753, 2.283],
 'field_int': 0.01,
 'lda-SPW92': [1.872, 1.872, 2.358],
 'lda-Slater': [1.993, 1.993, 2.49],
 'mpol_ref': '1',
 'pbe': [1.856, 1.856, 2.363],
 'pbe0': [1.778, 1.778, 2.274],
 'spin_pol': 'nsp'}

In [31]:
np.array(MW_data['no']['pbe'])/AuToA

array([ 9.77822692, 11.34321721, 16.17884364])

Some molecules are completely absent (but in some other cases converged is reached only for some of the xc)

In [32]:
for mol in molecule_nsp+molecule_sp:
    if mol not in molecule_bg:
        print(mol)

CH3O
Li2


If the highest component of the relative error is greater (in modulus) than the tol print the associated study

In [33]:
tol = 5.
xcs = ['lda-SPW92','pbe']
for xc in xcs:
    for mol in molecule_bg:
        if mol.lower() in molecule_mw:
            if xc in data[mol]:
                a = data[mol][xc]
                a_ref = MW_data[mol.lower()][xc]
                err = relative_error(a,a_ref)
                if np.max(np.abs(err)) > tol:
                    print(mol,xc,err)
            else:
                print(mol,xc,' not computed')
            

BN lda-SPW92 [ -0.64232147 -20.55503217   0.02941355]
FH-OH lda-SPW92 [111.56053631  17.1816963  -63.23432392]
H2O-Li lda-SPW92 [ 32.60781534  16.5429592  -35.97750077]
HO2 lda-SPW92 [ 8.63374911  4.68361008 -9.76457341]
NO lda-SPW92 [  0.20277828 -12.71023128   0.28450784]
NaCN lda-SPW92 [-3.47496044 -5.40371794 -1.98253719]
NaCl lda-SPW92 [-0.34201838 -0.34201839 -5.37147614]
OCl lda-SPW92 [ 0.40699611 -6.74974754  0.60581672]
OF lda-SPW92 [ 0.06281782 -6.92652484  0.57052459]
OH lda-SPW92 [  0.07646118 -20.97083438   0.43860241]
SCl lda-SPW92 [ 0.18829126 -8.46693999  0.37214949]
SF lda-SPW92 [ -0.19722138 -13.90047307   0.23595501]
BN pbe [ -0.7376914  -21.02162607  -1.37802874]
C2H pbe  not computed
FH-OH pbe [ 48.8856349   16.73046258 -49.63852461]
H2O-Li pbe [ 20.69602961  16.09242202 -28.88848219]
HO2 pbe [  8.64864464   6.45554578 -10.19711305]
NO pbe [  0.0411381  -13.7612432    0.26796948]
OCl pbe [ 0.47570435 -6.65071455  0.56499067]
OF pbe [-0.14140664 -7.30135885  0.37409

For some molecules the error is high only in one direction. Many of the y components are very high!

There is something really strange, for instance the posinp of NO is along the z axis why the y error is high and the x one is low?


We build a table with the relative errors

In [18]:
table_nsp = [[] for line in range(len(molecule_nsp)+1)]
table_sp = [[] for line in range(len(molecule_sp)+1)]

In [19]:
table_nsp[0] = ['molecule','err_lda','err_pbe','err_pbe0']
table_sp[0] = ['molecule','err_lda','err_pbe','err_pbe0']

In [20]:
def format_decimal(ll):
    if ll == 0.: return ''
    out = [ '%.2f' % elem for elem in ll ]
    out = [float(elem) for elem in out]
    return out

For molecules missing in the mw data the comparison is performed with th HG data

In [34]:
for ind,mol in enumerate(molecule_nsp):
    err_lda = 0.
    err_pbe = 0.
    err_pbe0 = 0.
    if mol in molecule_bg:
        if 'lda-SPW92' in data[mol] and mol.lower() in MW_data:
            alpha = data[mol]['lda-SPW92']
            alpha_ref = MW_data[mol.lower()]['lda-SPW92']
            err_lda = list(relative_error(alpha,alpha_ref))
        if 'lda-SPW92' in data[mol] and mol in missing_mw:
            alpha = data[mol]['lda-SPW92']
            alpha_ref = HG_data[mol]['lda-SPW92']
            err_lda = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data[mol] and mol.lower() in MW_data:
            alpha = data[mol]['pbe']
            alpha_ref = MW_data[mol.lower()]['pbe']
            err_pbe = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data[mol] and mol in missing_mw:
            alpha = data[mol]['pbe']
            alpha_ref = HG_data[mol]['pbe']
            err_pbe = list(relative_error(alpha,alpha_ref))
        if 'pbe0' in data[mol] :
            alpha = data[mol]['pbe0']
            alpha_ref = HG_data[mol]['pbe0']
            err_pbe0 = list(relative_error(alpha,alpha_ref))
    table_nsp[ind+1] = [mol,format_decimal(err_lda),format_decimal(err_pbe),format_decimal(err_pbe0)]

In [39]:
for ind,mol in enumerate(molecule_sp):
    err_lda = 0.
    err_pbe = 0.
    err_pbe0 = 0.
    if mol in molecule_bg:
        if 'lda-SPW92' in data[mol] and mol.lower() in MW_data:
            alpha = data[mol]['lda-SPW92']
            alpha_ref = MW_data[mol.lower()]['lda-SPW92']
            err_lda = list(relative_error(alpha,alpha_ref))
        if 'lda-SPW92' in data[mol] and mol in missing_mw:
            alpha = data[mol]['lda-SPW92']
            alpha_ref = HG_data[mol]['lda-SPW92']
            err_lda = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data[mol] and mol.lower() in MW_data:
            alpha = data[mol]['pbe']
            alpha_ref = MW_data[mol.lower()]['pbe']
            err_pbe = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data[mol] and mol in missing_mw:
            alpha = data[mol]['pbe']
            alpha_ref = HG_data[mol]['pbe']
            err_pbe = list(relative_error(alpha,alpha_ref))
        if 'pbe0' in data[mol] :
            alpha = data[mol]['pbe0']
            alpha_ref = HG_data[mol]['pbe0']
            err_pbe0 = list(relative_error(alpha,alpha_ref))
    table_sp[ind+1] = [mol,format_decimal(err_lda),format_decimal(err_pbe),format_decimal(err_pbe0)]

In [40]:
def colorset(ll):
    if ll == '': return ''
    return 'colorset{%s}{%s}{%s}'%(ll[0],ll[1],ll[2])

In [41]:
#table_sp

In [42]:
t = table_sp
for ind in range (1,len(t)):
    print(t[ind][0] +' & ' + '\\' + colorset(t[ind][1])+' & ' + '\\' + colorset(t[ind][2])+' & ' + '\\' + colorset(t[ind][3])+ r' \\')

BH2 & \colorset{-1.38}{-1.38}{-1.03} & \colorset{-1.93}{-2.66}{-1.56} & \colorset{-2.33}{-3.52}{-1.78} \\
BN & \colorset{-0.64}{-20.56}{0.03} & \colorset{-0.74}{-21.02}{-1.38} & \colorset{-20.55}{-20.55}{-1.73} \\
BO & \colorset{-0.76}{-0.76}{-0.68} & \colorset{-1.06}{-1.06}{-0.8} & \colorset{-1.2}{-1.2}{-0.7} \\
BS & \colorset{-0.13}{-0.13}{-0.37} & \colorset{-0.24}{-0.24}{-0.54} & \colorset{-0.56}{-0.56}{-0.66} \\
Be & \colorset{0.14}{0.14}{0.14} & \colorset{-0.07}{-0.07}{-0.07} & \colorset{-0.85}{-0.85}{-0.85} \\
BeH & \colorset{-1.54}{-1.54}{-3.06} & \colorset{-0.06}{-0.06}{-0.16} & \colorset{-0.83}{-0.83}{-1.53} \\
C2H & \colorset{-1.96}{-1.96}{-0.1} & \ & \colorset{-2.43}{-2.43}{-0.4} \\
C2H3 & \colorset{-0.39}{-0.2}{0.08} & \colorset{-0.86}{-0.47}{-0.09} & \colorset{-0.86}{-0.6}{-0.46} \\
CH2-t & \colorset{-1.36}{-0.72}{-1.03} & \colorset{-2.13}{-1.02}{-1.51} & \colorset{-2.14}{-1.03}{-1.53} \\
CH2F & \colorset{-1.29}{-0.14}{-0.61} & \colorset{-2.69}{-0.67}{-1.02} & \colorset{-2

## PSP = nlcc-aw and nlcc-ss

In [52]:
data_aw = BG_data['nlcc_aw']
data_ss = BG_data['nlcc_ss']
molecule_bg_aw = list(data_aw.keys())
molecule_bg_ss = list(data_ss.keys())
molecule_mw = list(MW_data.keys())
molecule_hg = list(HG_data.keys())

missing_mw = ['CH30','PS','S2','SH']

In [64]:
data_aw['CO']

{'pbe': array([12.540766, 12.540766, 15.889695]), 'pbe-f': 0.005}

In [68]:
table_nsp = [[] for line in range(len(molecule_nsp)+1)]
table_sp = [[] for line in range(len(molecule_sp)+1)]

In [69]:
table_nsp[0] = ['molecule','err_nlcc-aw','err_nlcc-ss']
table_sp[0] = ['molecule','err_nlcc-aw','err_nlcc-ss']

In [70]:
def format_decimal(ll):
    if ll == 0.: return ''
    out = [ '%.2f' % elem for elem in ll ]
    out = [float(elem) for elem in out]
    return out

For molecules missing in the mw data the comparison is performed with th HG data

In [71]:
for ind,mol in enumerate(molecule_nsp):
    err_aw = 0.
    err_ss = 0.
    if mol in molecule_bg_aw:
        if 'pbe' in data_aw[mol] and mol.lower() in MW_data:
            alpha = data_aw[mol]['pbe']
            alpha_ref = MW_data[mol.lower()]['pbe']
            err_aw = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data_aw[mol] and mol in missing_mw:
            alpha = data_aw[mol]['pbe']
            alpha_ref = HG_data[mol]['pbe']
            err_aw = list(relative_error(alpha,alpha_ref))
    if mol in molecule_bg_ss:        
        if 'pbe' in data_ss[mol] and mol.lower() in MW_data:
            alpha = data_ss[mol]['pbe']
            alpha_ref = MW_data[mol.lower()]['pbe']
            err_ss = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data_ss[mol] and mol in missing_mw:
            alpha = data_ss[mol]['pbe']
            alpha_ref = HG_data[mol]['pbe']
            err_ss = list(relative_error(alpha,alpha_ref))
    table_nsp[ind+1] = [mol,format_decimal(err_aw),format_decimal(err_ss)]

In [72]:
for ind,mol in enumerate(molecule_sp):
    err_aw = 0.
    err_ss = 0.
    if mol in molecule_bg_aw:
        if 'pbe' in data_aw[mol] and mol.lower() in MW_data:
            alpha = data_aw[mol]['pbe']
            alpha_ref = MW_data[mol.lower()]['pbe']
            err_aw = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data_aw[mol] and mol in missing_mw:
            alpha = data_aw[mol]['pbe']
            alpha_ref = HG_data[mol]['pbe']
            err_aw = list(relative_error(alpha,alpha_ref))
    if mol in molecule_bg_ss:        
        if 'pbe' in data_ss[mol] and mol.lower() in MW_data:
            alpha = data_ss[mol]['pbe']
            alpha_ref = MW_data[mol.lower()]['pbe']
            err_ss = list(relative_error(alpha,alpha_ref))
        if 'pbe' in data_ss[mol] and mol in missing_mw:
            alpha = data_ss[mol]['pbe']
            alpha_ref = HG_data[mol]['pbe']
            err_ss = list(relative_error(alpha,alpha_ref))
    table_sp[ind+1] = [mol,format_decimal(err_aw),format_decimal(err_ss)]

In [78]:
t = table_sp
for ind in range (1,len(t)):
    print(t[ind][0] +' & ' + '\\' + colorset(t[ind][1])+' & ' + '\\' + colorset(t[ind][2])+ r'\\')

BH2 & \colorset{-0.96}{-0.46}{-0.62} & \colorset{-0.43}{0.5}{-0.26}\\
BN & \colorset{-0.33}{-20.7}{0.65} & \colorset{-0.05}{-20.48}{2.44}\\
BO & \colorset{-0.34}{-0.34}{-0.04} & \colorset{0.05}{0.05}{1.0}\\
BS & \colorset{-0.34}{-0.34}{-0.3} & \\\
Be & \ & \colorset{-0.77}{-0.77}{-0.77}\\
BeH & \ & \colorset{-1.68}{-1.68}{-1.86}\\
C2H & \colorset{3.09}{3.09}{-0.13} & \colorset{2.23}{2.23}{-0.36}\\
C2H3 & \colorset{-0.01}{-0.09}{0.05} & \colorset{-0.24}{-0.31}{-0.24}\\
CH2-t & \colorset{0.29}{-0.15}{0.16} & \colorset{0.04}{-0.37}{-0.08}\\
CH2F & \colorset{0.39}{0.08}{-0.29} & \colorset{0.37}{0.09}{-0.43}\\
CH2NH & \colorset{-0.01}{0.2}{0.07} & \colorset{-0.25}{-0.09}{-0.21}\\
CH2PH & \colorset{-0.1}{0.03}{-0.1} & \colorset{-0.27}{-0.09}{-0.15}\\
CH3 & \colorset{-0.14}{-0.14}{0.64} & \colorset{-0.37}{-0.37}{0.41}\\
CH3O & \ & \\\
CN & \colorset{-0.41}{-0.41}{-0.16} & \colorset{-0.75}{-0.75}{-0.35}\\
F2 & \colorset{-0.29}{-0.29}{0.31} & \colorset{0.01}{0.01}{0.37}\\
FCO & \colorset{-0.01}