# Demo for parsing the PAO files

In [1]:
from glob import glob
from pao_utils import parse_pao_file, append_samples

In [2]:
# Find and parse all .pao files.
# Each file corresponds to a molecular configuration.
# Since the system contains multiple atoms, each of .pao file contains samples.
samples = {}
for fn in glob("2H2O_MD/frame_*/2H2O_pao44-1_0.pao"):
    kinds, atom2kind, coords, xblocks = parse_pao_file(fn)
    append_samples(samples, kinds, atom2kind, coords, xblocks)

In [3]:
# The system consists of six atoms, namely a water dimer:
atom2kind

['O', 'H', 'H', 'O', 'H', 'H']

In [4]:
# Samples for Hydrogen atoms:
samples['H']

[{'rel_coords': array([[ 0.3425041 ,  0.92194901,  0.15595922],
         [ 0.        ,  0.        ,  0.        ],
         [ 0.52840828,  1.27305239, -0.72802381],
         [ 2.6687522 ,  0.7242059 ,  1.88612184],
         [ 3.17263631, -0.03532663,  1.66453891],
         [ 2.00662405,  0.76007126,  1.18317671]]),
  'xblock': array([[-0.88063604, -0.46333423, -0.09075214,  0.00440029, -0.03911771],
         [-0.15097514,  0.50220279, -0.78525686,  0.01779201, -0.32875757],
         [ 0.0582941 , -0.0833491 , -0.01659599, -0.97781942, -0.18232613],
         [ 0.01481885, -0.02151606, -0.3815705 , -0.16234312,  0.9095979 ]])},
 {'rel_coords': array([[-0.18590418, -0.35110337,  0.88398302],
         [-0.52840828, -1.27305239,  0.72802381],
         [ 0.        ,  0.        ,  0.        ],
         [ 2.14034392, -0.54884648,  2.61414565],
         [ 2.64422803, -1.30837902,  2.39256272],
         [ 1.47821578, -0.51298113,  1.91120052]]),
  'xblock': array([[ 0.88227228,  0.46069934, -0.05

In [5]:
# Samples for Oxygen atoms:
samples['O']

[{'rel_coords': array([[ 0.        ,  0.        ,  0.        ],
         [-0.3425041 , -0.92194901, -0.15595922],
         [ 0.18590418,  0.35110337, -0.88398302],
         [ 2.3262481 , -0.19774311,  1.73016262],
         [ 2.83013221, -0.95727564,  1.50857969],
         [ 1.66411996, -0.16187776,  1.0272175 ]]),
  'xblock': array([[ 0.97458692, -0.1563819 , -0.0671153 , -0.1426199 , -0.01318631,
           0.00820553,  0.02048499,  0.00374336,  0.00829572, -0.00161727,
           0.00595436, -0.00124107, -0.00854245],
         [-0.01343772, -0.00572206,  0.79812598, -0.49398908,  0.33223322,
           0.06268018, -0.0375763 ,  0.02582455, -0.01035123, -0.023253  ,
           0.03539641, -0.01275172,  0.01733211],
         [-0.15376477, -0.07710096, -0.45828867, -0.84276595, -0.16160562,
          -0.07037928, -0.13099804, -0.02582749,  0.00863505,  0.0206023 ,
           0.02287792,  0.00282344, -0.01286454],
         [-0.00297852,  0.00097801, -0.36188119,  0.0267922 ,  0.90881859,

In [6]:
# Each sample has coordinates relative to the atom for with the pao basis was optimized:
samples['H'][0]['rel_coords']

array([[ 0.3425041 ,  0.92194901,  0.15595922],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.52840828,  1.27305239, -0.72802381],
       [ 2.6687522 ,  0.7242059 ,  1.88612184],
       [ 3.17263631, -0.03532663,  1.66453891],
       [ 2.00662405,  0.76007126,  1.18317671]])

In [7]:
# Each sample has contraction cofficients of shape (pao_basis_size, prim_basis_size):
samples['H'][0]['xblock']

array([[-0.88063604, -0.46333423, -0.09075214,  0.00440029, -0.03911771],
       [-0.15097514,  0.50220279, -0.78525686,  0.01779201, -0.32875757],
       [ 0.0582941 , -0.0833491 , -0.01659599, -0.97781942, -0.18232613],
       [ 0.01481885, -0.02151606, -0.3815705 , -0.16234312,  0.9095979 ]])

In [8]:
# So, the first pao basis vector of the first Hydrogen sample is:
samples['H'][0]['xblock'][0,:]

array([-0.88063604, -0.46333423, -0.09075214,  0.00440029, -0.03911771])

In [9]:
# For the pao basis I chose a size of four for both Hydrogen and Oxygen:
assert samples['H'][0]['xblock'].shape[0] == 4
assert samples['O'][0]['xblock'].shape[0] == 4

In [10]:
# The primary basis set is organized in shells (what you call channels).
# I used the DZVP-MOLOPT-GTH basis set, which has the following number of shells:
prim_basis_shells = {
    'H': [2, 1, 0], # two s-shells, one p-shell, no d-shells
    'O': [2, 2, 1], # two s-shells, two p-shells, one d-shell
}

In [11]:
# From the number of shells one can calculate the size of the primary basis:
def calc_basis_size(shells):
    return shells[0] + 3* shells[1] + 5 * shells[2]

assert calc_basis_size(prim_basis_shells['H']) == samples['H'][0]['xblock'].shape[1]
assert calc_basis_size(prim_basis_shells['O']) == samples['O'][0]['xblock'].shape[1]

In [12]:
# The basis functions are enumerated in the following way:
def list_basis_functions(shells):
    basis_functions = []
    for l in range(len(shells)):
        for i in range(shells[l]):
            for m in range(-l, l+1):
                basis_functions.append("l={},m={}".format(l, m))
    return basis_functions

assert len(list_basis_functions(prim_basis_shells['H'])) == samples['H'][0]['xblock'].shape[1]
assert len(list_basis_functions(prim_basis_shells['O'])) == samples['O'][0]['xblock'].shape[1]

print(list_basis_functions(prim_basis_shells['H']))

['l=0,m=0', 'l=0,m=0', 'l=1,m=-1', 'l=1,m=0', 'l=1,m=1']
