# Demo for parsing the PAO files

In [1]:
from pathlib import Path
from pao_file_utils import parse_pao_file, append_samples

In [2]:
# Find and parse all .pao files.
# Each file corresponds to a molecular configuration, ie. a frame.
# Since the system contains multiple atoms, each .pao file contains multiple samples.
samples = {}
for path in Path().glob("2H2O_MD/frame_*/2H2O_pao44-1_0.pao"):
    kinds, atom2kind, coords, xblocks = parse_pao_file(path)
    append_samples(samples, kinds, atom2kind, coords, xblocks)

In [3]:
# The system consists of six atoms, namely a water dimer:
atom2kind

['O', 'H', 'H', 'O', 'H', 'H']

In [4]:
# Samples for Hydrogen atoms:
len(samples['H'])

324

In [5]:
# Samples for Oxygen atoms:
len(samples['O'])

162

In [6]:
# Each sample has coordinates relative to the atom for with the pao basis was optimized:
samples['H'][0].rel_coords

array([[-0.19109217, -0.63418089,  0.69283366],
       [ 0.        ,  0.        ,  0.        ],
       [-0.56070176, -1.4123194 ,  0.28088054],
       [ 2.85104974, -0.60561731,  0.25819602],
       [ 3.60174453, -0.78088106,  0.82914859],
       [ 2.08629638, -0.71205968,  0.82551853]])

In [7]:
# Each sample has contraction cofficients of shape (pao_basis_size, prim_basis_size):
samples['H'][0].xblock

array([[ 0.88347126,  0.45774537, -0.0808372 ,  0.05511535, -0.01935442],
       [-0.08659683,  0.41451374,  0.68942075, -0.54766492,  0.21321479],
       [-0.08092542,  0.13795269,  0.27721878,  0.66766525,  0.67215655],
       [ 0.0731936 , -0.13749382, -0.55284951, -0.44616767,  0.6863184 ]])

In [8]:
# So, the first pao basis vector of the first Hydrogen sample is:
samples['H'][0].xblock[0,:]

array([ 0.88347126,  0.45774537, -0.0808372 ,  0.05511535, -0.01935442])

In [9]:
# For the pao basis I chose a size of four for both Hydrogen and Oxygen:
assert samples['H'][0].xblock.shape[0] == 4
assert samples['O'][0].xblock.shape[0] == 4

In [10]:
# The primary basis set is organized in shells (i.e. channels in equivar lingo).
# I used the DZVP-MOLOPT-GTH basis set, which has the following number of shells:
prim_basis_shells = {
    'H': [2, 1, 0], # two s-shells, one p-shell, no d-shells
    'O': [2, 2, 1], # two s-shells, two p-shells, one d-shell
}

In [11]:
# From the number of shells one can calculate the size of the primary basis:
def calc_basis_size(shells):
    return shells[0] + 3* shells[1] + 5 * shells[2]

assert calc_basis_size(prim_basis_shells['H']) == samples['H'][0].xblock.shape[1]
assert calc_basis_size(prim_basis_shells['O']) == samples['O'][0].xblock.shape[1]

In [12]:
# The basis functions are enumerated in the following way:
def list_basis_functions(shells):
    basis_functions = []
    for l in range(len(shells)):
        for i in range(shells[l]):
            for m in range(-l, l+1):
                basis_functions.append("l={},m={}".format(l, m))
    return basis_functions

assert len(list_basis_functions(prim_basis_shells['H'])) == samples['H'][0].xblock.shape[1]
assert len(list_basis_functions(prim_basis_shells['O'])) == samples['O'][0].xblock.shape[1]

print(list_basis_functions(prim_basis_shells['H']))

['l=0,m=0', 'l=0,m=0', 'l=1,m=-1', 'l=1,m=0', 'l=1,m=1']
