In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os, sys, yaml
sys.path.insert(0,'../')
import StatPol as SP, Routines as R

basepath = os.getcwd()

# Data analysis

In [2]:
nsp_results = yaml.load(open('nsp_results.yaml'))
#nsp_results

In [3]:
computed_mol = R.build_computed_mol_list(nsp_results)
print 'Number of computed molecules', len(computed_mol)

Number of computed molecules 74


## First checks of the convergence procedure

We check if convergence has been achieved, both on the field intensity and on the value of rmult

In [4]:
for mol in computed_mol:
    results = nsp_results[mol]['results']
    for study in results:
        if results[study]['field_conv']['converged']==False:
            print mol, study, 'field convergence failed'
        if results[study]['rmult_conv']['converged']==False:
            print mol, study, 'rmult convergence failed'

NaCl ('lda_pw', 'hgh_k') field convergence failed


The field convergence on this study has to be further investigated

For each study we extract the convergend values of the field or the value of rmult

In [5]:
print 'mol', 'study', 'field_conv', 'rmult_conv'
for mol in computed_mol:
    print mol
    results = nsp_results[mol]['results']
    for study in results:
        print ' ', study,results[study]['field_conv']['converged_value'],\
        results[study]['rmult_conv']['converged_value']

mol study field_conv rmult_conv
AlF
  ('lda_pw', 'hgh_k') 0.01 6.0
  ('lda_pt', 'hgh_k') 0.01 6.0
  ('pbe0', 'hgh_k') 0.01 6.0
  ('pbe', 'nlcc_ss') 0.01 6.0
  ('pbe', 'nlcc_aw') 0.01 6.0
  ('pbe', 'hgh_k') 0.01 6.0
Ar
  ('lda_pw', 'hgh_k') 0.01 7.0
  ('pbe', 'hgh_k') 0.01 7.0
  ('pbe0', 'hgh_k') 0.01 7.0
  ('lda_pt', 'hgh_k') 0.01 7.0
BF
  ('lda_pw', 'hgh_k') 0.01 6.0
  ('lda_pt', 'hgh_k') 0.01 6.0
  ('pbe0', 'hgh_k') 0.01 6.0
  ('pbe', 'nlcc_ss') 0.01 6.0
  ('pbe', 'nlcc_aw') 0.01 6.0
  ('pbe', 'hgh_k') 0.01 6.0
BH2Cl
  ('lda_pw', 'hgh_k') 0.01 6.0
  ('lda_pt', 'hgh_k') 0.01 6.0
  ('pbe0', 'hgh_k') 0.01 6.0
  ('pbe', 'nlcc_ss') 0.01 6.0
  ('pbe', 'nlcc_aw') 0.01 6.0
  ('pbe', 'hgh_k') 0.01 6.0
BH2F
  ('lda_pw', 'hgh_k') 0.01 6.0
  ('lda_pt', 'hgh_k') 0.01 6.0
  ('pbe0', 'hgh_k') 0.01 6.0
  ('pbe', 'nlcc_ss') 0.01 6.0
  ('pbe', 'nlcc_aw') 0.01 6.0
  ('pbe', 'hgh_k') 0.01 6.0
BH3
  ('lda_pw', 'hgh_k') 0.01 5.0
  ('lda_pt', 'hgh_k') 0.01 5.0
  ('pbe0', 'hgh_k') 0.01 5.0
  ('pbe', 'nlcc_s

We see that both the field convergence and the rmult convergence values changes for the different molecules and studies of the dataset. This fact indicates that the (quite complex) procedure for the determination of these parameters has been useful.

## RMSRE and MRE for each study of the nsp dataset

We compute the RMSRE and MRE for each study and collect the result in a table. This analysis provides a measurement of the effects of the psp on the determination of the statical polarizability for the molecules of the dataset.

Compute the RMSRE and MRE errors for all the performed study. lda_pt is not considered here since we have no reference results for this study

In [6]:
reduced_study_set = [('lda_pw','hgh_k'),('pbe','hgh_k'),('pbe','nlcc_aw'),\
                     ('pbe','nlcc_ss'),('pbe0','hgh_k')]

In [7]:
print 'study', 'RMSRE', 'MRE'
for study in reduced_study_set:
    print '{} {:1.2f} {:1.2f}'.format(study,R.eval_rmsre(nsp_results,study[0],study[1]),\
                                 R.eval_mre(nsp_results,study[0],study[1]))

study RMSRE MRE
('lda_pw', 'hgh_k') 1.03 -0.22
('pbe', 'hgh_k') 0.58 -0.18
('pbe', 'nlcc_aw') 0.51 -0.25
('pbe', 'nlcc_ss') 0.89 -0.34
('pbe0', 'hgh_k') 0.51 -0.14


Compute the array with the relative error for each molecule and study

In [8]:
print 'molecule', 'study', 'Relative error'
for mol in computed_mol:
    print mol
    for study in reduced_study_set:
        re = R.eval_relative_error(nsp_results,mol,study[0],study[1])
        re_str = ''
        if not (re is None):
            re_str = ['{:1.2f}'.format(x) for x in re]
        print ' ',study,re_str

molecule study Relative error
AlF
  ('lda_pw', 'hgh_k') ['0.15', '0.15', '0.19']
  ('pbe', 'hgh_k') ['-0.59', '-0.59', '-0.04']
  ('pbe', 'nlcc_aw') ['1.00', '1.00', '0.75']
  ('pbe', 'nlcc_ss') ['-0.35', '-0.35', '0.11']
  ('pbe0', 'hgh_k') ['-0.68', '-0.68', '0.12']
Ar
  ('lda_pw', 'hgh_k') ['-0.35', '-0.35', '-0.35']
  ('pbe', 'hgh_k') ['-0.31', '-0.31', '-0.31']
  ('pbe', 'nlcc_aw') 
  ('pbe', 'nlcc_ss') 
  ('pbe0', 'hgh_k') ['-0.23', '-0.23', '-0.23']
BF
  ('lda_pw', 'hgh_k') ['0.57', '0.57', '0.37']
  ('pbe', 'hgh_k') ['0.39', '0.39', '0.14']
  ('pbe', 'nlcc_aw') ['0.54', '0.54', '0.47']
  ('pbe', 'nlcc_ss') ['0.39', '0.39', '1.04']
  ('pbe0', 'hgh_k') ['0.65', '0.65', '0.50']
BH2Cl
  ('lda_pw', 'hgh_k') ['0.26', '-0.06', '0.04']
  ('pbe', 'hgh_k') ['0.29', '-0.07', '-0.01']
  ('pbe', 'nlcc_aw') ['-0.19', '-0.53', '-0.57']
  ('pbe', 'nlcc_ss') ['-0.29', '-0.71', '-0.56']
  ('pbe0', 'hgh_k') ['0.14', '-0.19', '-0.04']
BH2F
  ('lda_pw', 'hgh_k') ['0.06', '-0.43', '-0.00']
  ('pbe',

## Plot of the average relative error

In [9]:
reduced_study_set

[('lda_pw', 'hgh_k'),
 ('pbe', 'hgh_k'),
 ('pbe', 'nlcc_aw'),
 ('pbe', 'nlcc_ss'),
 ('pbe0', 'hgh_k')]

In [17]:
def get_rmult_convergence_value(dataset,molecule,study):
    rmult = None
    data = dataset[molecule]['results']
    if study in data:
        rmult = data[study]['rmult_conv']['converged_value']
    return rmult
        

In [20]:
%matplotlib auto

study = reduced_study_set[1]

symbols = {5:'v',6:'^',7:'o',8:'s',9:'>'}
colors = {5:'black',6:'blue',7:'red',8:'crimson',9:'teal'}

legend_shown = {5:False,6:False,7:False,8:False,9:False}

number = []
molecules = []
values = []
ind = 0
for mol in computed_mol:
    re = R.eval_re_molecule(nsp_results,mol,study[0],study[1])
    if not (re is None):
        number.append(ind)
        ind +=1
        molecules.append(mol)
        
        values.append(re)
        rmult = get_rmult_convergence_value(nsp_results,mol,study)
        if not legend_shown[rmult]:
            plt.scatter(mol,re,marker=symbols[rmult],s=150,color=colors[rmult],label='rmult conv = '+str(rmult))
            legend_shown[rmult] = True
        else : plt.scatter(mol,re,marker=symbols[rmult],s=150,color=colors[rmult])
        
plt.plot(molecules,values,linestyle='--',linewidth=2)
title = 'Average relative error for %s-%s'%(study[0],study[1])
plt.axhline(y=0., color='black', linestyle='--',linewidth=1)
plt.title(title,size = 30)
plt.ylabel('Relative error (%)',size=24)
plt.xticks(number,molecules,rotation='vertical')#,size=24)
ax = plt.axes()  
ax.yaxis.grid(linestyle='--') # horizontal lines
ax.xaxis.grid(linestyle='--') # vertical lines
plt.legend(loc='best',prop={'size': 24})
plt.show()

Using matplotlib backend: TkAgg


In [25]:
def get_psp_list(reduced_study_set,xc):
    return [psp for (xc0,psp) in reduced_study_set if xc0==xc]

In [76]:
%matplotlib auto
xc='pbe'
for i,psp in enumerate(get_psp_list(reduced_study_set,xc)):
    mols=[]
    vals=[]
    for mol in computed_mol:
        val=R.eval_re_molecule(nsp_results,mol,xc,psp)
        if val is None: continue
        mols.append(mol)
        vals.append(val)
    print(len(vals))
    title = 'Average relative error for %s-%s'%(xc,psp)
    plt.axhline(y=0., color='black', linestyle='--',linewidth=1)
    plt.title(title,size = 30)
    plt.plot(mols,vals,alpha=0.5,color=colors[i+5],label=psp)
    plt.scatter(mols,vals,label='',color=colors[i+5])
    plt.xticks(rotation='vertical')
    #plt.axhline(R.eval_mre(nsp_results,xc,psp))
    plt.grid(axis='both')
plt.legend(loc='best')
plt.show()

Using matplotlib backend: TkAgg
74
63
61


In [56]:
R.eval_re_molecule(nsp_results,'LiH','pbe','nlcc_ss')

3.916729797431575

In [52]:
plt.plot(['A','B','C'],[1,1.5,3])
plt.plot(['A','C'],[2,2.5])

[<matplotlib.lines.Line2D at 0x7f1143702450>]

In [75]:
nsp_results['LiH']['results'][('pbe','hgh_k')]

{'field_conv': {'converged': True,
  'converged_value': 0.001,
  'label': 'field_int',
  'results': {0.0005: matrix([[3.298067e+01, 3.790000e-05, 2.010000e-03],
           [3.790000e-05, 3.298067e+01, 2.010000e-03],
           [2.000000e-03, 2.000000e-03, 3.099800e+01]]),
   0.001: matrix([[ 3.2987965e+01, -6.9100000e-05,  1.7664500e-03],
           [-6.9100000e-05,  3.2987965e+01,  1.7664500e-03],
           [ 1.5000000e-03,  1.5000000e-03,  3.1019500e+01]]),
   0.005: matrix([[ 3.321976e+01, -2.309800e-04,  1.318210e-03],
           [-2.309800e-04,  3.321976e+01,  1.318210e-03],
           [ 1.400000e-03,  1.400000e-03,  3.171760e+01]]),
   0.01: matrix([[ 3.3978095e+01, -2.1170000e-04, -1.2059150e-03],
           [-2.1170000e-04,  3.3978095e+01, -1.2059150e-03],
           [-7.0000000e-04, -7.0000000e-04,  3.4385400e+01]])},
  'tolerance': 0.01,
  'values': [0.01, 0.005, 0.001, 0.0005]},
 'gs_conv': {'converged': True,
  'converged_value': 5.0,
  'label': 'rmult',
  'results': {3.0: