In [2]:
import numpy as np
import scipy as sp
import scipy.linalg
import matplotlib.pyplot as plt
import time
from scipy.optimize import minimize
import multiprocessing as mp
from MD_Parser import *
import math

### Define helper functions from previous version.

In [2]:
# get two body, one component kernel
# x1, x2 are lists containing the central atom type and chemical environments
# containing distances, Cartesian coordinate differences, and atom types
def tb_kern(x1, x2, d1, d2, sig, ls):
    # loop over atoms
    tot_kern = 0
    
    # record central atom types
    c1 = x1[0][0]
    c2 = x2[0][0]
    
    # get size of environments
    l1 = len(x1[1][0])
    l2 = len(x2[1][0])
    
    for m in range(l1):
        e1 = x1[1][4][m]
        dist1 = x1[1][0][m]
        coord1 = x1[1][d1][m]
        for n in range(l2):
            e2 = x2[1][4][n]
            dist2 = x2[1][0][n]
            coord2 = x2[1][d2][n]
            
            # check that atom types match
            if c1==c2 and e1==e2:
                # calculate covariance for current pair of atoms
                base = sig**2*np.exp(-(dist1-dist2)**2/(2*ls**2))
                doub_fac = -ls**2+(dist1-dist2)**2
                kern = -base*coord1*coord2*doub_fac/(dist1*dist2*ls**4)

                tot_kern+=kern
                
    return tot_kern

# get 3Nx3N noiseless kernel matrix
# assume all 3 force components are known for each configuration
# X is assumed to be a list of environments
def get_K(X,sig,ls,noise):
    # initialize matrix
    size = len(X)*3
    K = np.zeros([size, size])
    
    # calculate elements
    for m in range(size):
        x1 = X[int(math.floor(m/3))]
        d1 = m%3+1
        for n in range(m,size):
            x2 = X[int(math.floor(n/3))]
            d2 = n%3+1
           
            # calculate kernel
            cov = tb_kern(x1, x2, d1, d2, sig, ls)
            K[m,n] = cov
            K[n,m] = cov
    # perform cholesky decomposition
    L = np.linalg.cholesky(K+noise**2*np.eye(size))
    
    return K, L

# get row of covariances
def get_cov_row(x1, d1, m, size, X, sig, ls):
    covs = []
    for n in range(m,size):
        x2 = X[int(math.floor(n/3))]
        d2 = n%3+1

        # calculate kernel
        cov = tb_kern(x1, x2, d1, d2, sig, ls)
        covs.append(cov)
        
    return covs

# get covariance matrix with multiple processors
def get_K_par(X,sig,ls,noise,pool):
    # initialize matrix
    size = len(X)*3
    K = np.zeros([size, size])

    # calculate elements
    results = []
    for m in range(size):
        x1 = X[int(math.floor(m/3))]
        d1 = m%3+1

        results.append(pool.apply_async(get_cov_row, \
            args=(x1, d1, m, size, X, sig, ls)))

    # construct covariance matrix
    for m in range(size):
        res_cur = results[m].get()
        for n in range(m,size):
            K[m,n]=res_cur[n-m]
            K[n,m]=res_cur[n-m]

    # perform cholesky decomposition
    L = np.linalg.cholesky(K+noise**2*np.eye(size))
    
    return K,L

# get kernel vector
def get_kv(X,x,d1,sig,ls):
    size = len(X)*3
    kv=np.zeros([size,1])
    for m in range(size):
        x2 = X[int(math.floor(m/3))]
        d2 = m%3+1
        kv[m]=tb_kern(x,x2,d1,d2,sig,ls)
        
    return kv

# get alpha
def get_alpha(K,L,y): 
    # get alpha
    ts1 = sp.linalg.solve_triangular(L,y,lower=True)
    alpha = sp.linalg.solve_triangular(L.transpose(),ts1)
            
    return alpha

# get likelihood
def get_like(K,L,y,alpha): 
    # get log marginal likelihood
    like = -(1/2)*np.matmul(y.transpose(),alpha)-\
            np.sum(np.log(np.diagonal(L)))-\
            np.log(2*np.pi)*K.shape[1]/2
            
    return like

# get likelihood as a function of hyperparameters
def like_hyp(hyp,X,y):
    # unpack hyperparameters
    sig = hyp[0]
    ls = hyp[1]
    noise = hyp[2]
    
    # calculate likelihood
    K, L = get_K(X,sig,ls,noise)
    alpha = get_alpha(K,L,y)
    like = get_like(K,L,y,alpha)
    
    # print
    print('sig: '+str(sig))
    print('ls: '+str(ls))
    print('noise: '+str(noise))
    print('log like: '+str(like))
    
    return like

# get minus likelihood as a function of hyperparameters
def minus_like_hyp(hyp,X,y):
    like = like_hyp(hyp,X,y)
    minus_like = -like
    return minus_like

# make GP prediction with SE kernel
def GP_pred(X,y,K,L,alpha,sig,ls,xt,d):
    # get kernel vector
    kv = get_kv(X,xt,d,sig,ls)
    
    # get predictive mean
    f = np.matmul(kv.transpose(),alpha)
    
    # get predictive variance
    v = sp.linalg.solve_triangular(L,kv,lower=True)
    self_kern = tb_kern(xt, xt, d, d, sig, ls)
    var = self_kern - np.matmul(v.transpose(),v)
    
    return f, var

# given list of Cartesian coordinates, return list of atomic environments
def get_cutoff_vecs(vec, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    # get bravais coefficients
    coeff = np.matmul(brav_inv, vec)
    
    # get bravais coefficients for atoms within one super-super-cell
    coeffs = [[],[],[]]
    for n in range(3):
        coeffs[n].append(coeff[n])
        coeffs[n].append(coeff[n]-1)
        coeffs[n].append(coeff[n]+1)
        coeffs[n].append(coeff[n]-2)
        coeffs[n].append(coeff[n]+2)

    # get vectors within cutoff
    vecs = []
    dists = []
    for m in range(len(coeffs[0])):
        for n in range(len(coeffs[1])):
            for p in range(len(coeffs[2])):
                vec_curr = coeffs[0][m]*vec1 + coeffs[1][n]*vec2 + coeffs[2][p]*vec3
                
                dist = np.linalg.norm(vec_curr)

                if dist < cutoff:
                    vecs.append(vec_curr)
                    dists.append(dist)
                    
    return vecs, dists

# given list of cartesian coordinates, get chemical environment of specified atom
# pos = list of cartesian coordinates
# typs = list of atom types
def get_env(pos, typs, atom, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    env = [[],[[],[],[],[],[]]]
    pos_atom = np.array(pos[atom]).reshape(3,1)
    typ = typs[atom]
    
    # append central atom type
    env[0].append(typ)
    
    # loop through positions to find all atoms and images in the neighborhood
    for n in range(len(pos)):
        # position relative to reference atom
        diff_curr = np.array(pos[n]).reshape(3,1) - pos_atom

        # get images within cutoff
        vecs, dists = get_cutoff_vecs(diff_curr, brav_mat, \
            brav_inv, vec1, vec2, vec3, cutoff)

        for vec, dist in zip(vecs, dists):
            # ignore self interaction
            if dist != 0:
                # append distance
                env[1][0].append(dist)
                
                # append coordinate differences
                env[1][1].append(-vec[0][0])
                env[1][2].append(-vec[1][0])
                env[1][3].append(-vec[2][0])
                
                # append atom type
                env[1][4].append(typs[n])

    return env

# given list of cartesian coordinates, return list of chemical environments
def get_envs(pos, typs, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    envs = []
    for n in range(len(pos)):
        atom = n
        env = get_env(pos, typs, atom, brav_mat, brav_inv, vec1, vec2, vec3, cutoff)
        envs.append(env)
        
    return envs

# convert list of triplets to column vector
def fc_conv(fcs):
    comp_len = len(fcs)*3
    comps = []
    for n in range(comp_len):
        fc_ind = int(math.floor(n/3))
        d = n%3
        comps.append(fcs[fc_ind][d])
    
    return np.array(comps).reshape(comp_len,1)

# rotate input vector by angle theta
def rotate_xy(x, theta):
    new_x = np.zeros(x.shape)
    for n in range(x.shape[1]):
        vec = x[:,n]
        x_new = np.cos(theta)*vec[1]-np.sin(theta)*vec[2]
        y_new = np.sin(theta)*vec[1]+np.cos(theta)*vec[2]
        new_x[0,n]=x[0,n]
        new_x[1,n]=x_new
        new_x[2,n]=y_new
        new_x[3,n]=x[3,n]
    return new_x

### Parse multicomponent file.

In [97]:
outfile ='/Users/jonpvandermause/Research/GP/Datasets/SiC_MD/sic_md.out'
Si_MD_Parsed = parse_qe_pwscf_md_output(outfile)

In [98]:
# set crystal structure
dim = 3
alat = 4.344404578
unit_cell = [[0.0, alat/2, alat/2], [alat/2, 0.0, alat/2], \
                    [alat/2, alat/2, 0.0]] # fcc primitive cell
unit_pos = [['Si',[0,0,0]],['Si',[alat/4, alat/4, alat/4]]]
brav_mat = np.array([[0.0, alat/2, alat/2], [alat/2, 0.0, alat/2], \
                    [alat/2, alat/2, 0.0]])*dim
brav_inv = np.linalg.inv(brav_mat)

# bravais vectors
vec1 = brav_mat[:,0].reshape(3,1)
vec2 = brav_mat[:,1].reshape(3,1)
vec3 = brav_mat[:,2].reshape(3,1)

In [99]:
# build force field from single snapshot
cutoff = 4.5
pos = Si_MD_Parsed[1]['positions']
typs = Si_MD_Parsed[1]['elements']
envs = get_envs(pos, typs, brav_mat, brav_inv, vec1, vec2, vec3, cutoff)
fcs = fc_conv(Si_MD_Parsed[2]['forces'])

In [100]:
len(envs[0][1][0])

33

### Check derivative formula.

In [101]:
ri1 = 1
ri2 = 2
ri3 = 3

ci1 = 2
ci2 = 3
ci3 = 4

rj1 = 4
rj2 = 5
rj3 = 6

cj1 = 5
cj2 = 6
cj3 = 7

ri_vec = np.array([[ri1],[ri2],[ri3]])
ci_vec = np.array([[ci1],[ci2],[ci3]])
rj_vec = np.array([[rj1],[rj2],[rj3]])
cj_vec = np.array([[cj1],[cj2],[cj3]])

sig = 1
ls = 1

In [102]:
def get_k3_vec(ri, rj, sig, ls):
    k3 = sig**2*np.exp(-np.sum((ri_vec-rj_vec)**2)/(2*sig**2))
    return k3

In [103]:
def get_k3(ri1,ri2,ri3,rj1,rj2,rj3,sig,ls):
    k3 = sig**2*np.exp(-((ri1-rj1)**2+(ri2-rj2)**2+(ri3-rj3)**2)/(2*sig**2))
    return k3

In [104]:
def get_k3_math(ri1,ri2,ri3,rj1,rj2,rj3,sig,ls):
    k3 = sig**2*math.exp(-((ri1-rj1)**2+(ri2-rj2)**2+(ri3-rj3)**2)/(2*ls**2))
    return k3

In [105]:
# test array function
it = 100000
times = []

for n in range(it):
    time1 = time.time()
    test = get_k3_vec(ri_vec, rj_vec, sig, ls)
    time2 = time.time()
    times.append(time2-time1)
print(np.mean(times))

7.083418369293213e-06


In [106]:
# test np function
it = 100000
times = []

for n in range(it):
    time1 = time.time()
    test = get_k3(ri1,ri2,ri3,rj1,rj2,rj3,sig,ls)
    time2 = time.time()
    times.append(time2-time1)
print(np.mean(times))

2.6282382011413576e-06


In [107]:
# test math function
it = 100000
times = []

for n in range(it):
    time1 = time.time()
    test = get_k3_math(ri1,ri2,ri3,rj1,rj2,rj3,sig,ls)
    time2 = time.time()
    times.append(time2-time1)
print(np.mean(times))

1.9296956062316896e-06


In [108]:
# test power function
x = 2
y = 3

times = []
for n in range(it):
    time1 = time.time()
    test = math.pow(2, 3)
    time2 = time.time()
    times.append(time2-time1)
print(np.mean(times))

3.36155891418457e-07


In [109]:
# test power function
x = 2
y = 3

times = []
for n in range(it):
    time1 = time.time()
    test = 2**3
    time2 = time.time()
    times.append(time2-time1)
print(np.mean(times))

1.5333414077758789e-07


In [110]:
# test math exp
times = []
for n in range(1000):
    time1 = time.time()
    test = math.exp(2)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)
    
np.mean(times)

2.2029876708984376e-07

In [111]:
# test np exp
times = []
for n in range(1000):
    time1 = time.time()
    test = np.exp(2)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)
    
np.mean(times)

1.2617111206054687e-06

In [112]:
# test nothing
times = []
for n in range(1000000):
    time1 = time.time()
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)
    
np.mean(times)

1.2826013565063478e-07

In [113]:
# test addition
times = []
for n in range(1000000):
    time1 = time.time()
    test = 1+1
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)
    
np.mean(times)

1.3772249221801758e-07

In [114]:
# define three-body derivative function
def k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,sig,ls):
    k3 = get_k3_math(ri1,ri2,ri3,rj1,rj2,rj3,sig,ls)
    fac1 = (ci1/ri1)*(cj1/rj1)+(ci2/ri2)*(cj2/rj2)+(ci3/ri3)*(cj3/rj3)
    fac2 = (ri1-rj1)*(ci1/ri1)+(ri2-rj2)*(ci2/ri2)+(ri3-rj3)*(ci3/ri3)
    fac3 = (ri1-rj1)*(cj1/rj1)+(ri2-rj2)*(cj2/rj2)+(ri3-rj3)*(cj3/rj3)
    derv = (1/sig**4)*k3*(sig**2*fac1-fac2*fac3)
    return derv

In [115]:
# test kernel derivative
times = []
for n in range(100000):
    time1 = time.time()
    test = k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,sig,ls)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)
    
np.mean(times)

3.3971261978149415e-06

In [116]:
# test k3 derivative against mathematica
x0=1
y0=2
z0=3
xi1=4
xi2=5
xi3=6
yi1=7
yi2=8
yi3=9
zi1=1
zi2=2
zi3=3
x0p=4
y0p=5
z0p=6
xj1=7
xj2=8
xj3=9
yj1=1
yj2=2
yj3=3
zj1=4
zj2=5
zj3=6
sig=1

ri1 = np.sqrt((xi1-x0)**2+(yi1-y0)**2+(zi1-z0)**2)
ri2 = np.sqrt((xi2-x0)**2+(yi2-y0)**2+(zi2-z0)**2)
ri3 = np.sqrt((xi3-x0)**2+(yi3-y0)**2+(zi3-z0)**2)

rj1 = np.sqrt((xj1-x0p)**2+(yj1-y0p)**2+(zj1-z0p)**2)
rj2 = np.sqrt((xj2-x0p)**2+(yj2-y0p)**2+(zj2-z0p)**2)
rj3 = np.sqrt((xj3-x0p)**2+(yj3-y0p)**2+(zj3-z0p)**2)

ci1 = yi1-y0
ci2 = yi2-y0
ci3 = yi3-y0

cj1 = xj1-x0p
cj2 = xj2-x0p
cj3 = xj3-x0p

sig = 1
ls = 1

# test kernel derivative
times = []
for n in range(100000):
    time1 = time.time()
    test = k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,sig,ls)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)

print(test)
np.mean(times)

-0.009306122227359713


5.953099727630615e-06

In [117]:
0.001487/5.958898067474365e-06

249.54278176304734

### Develop efficient way to sum over permutation group.

In [118]:
# test list sort
test1 = ['Si','C']
test2 = ['C','Si']
sorted(test1) == sorted(test2)

times = []
for n in range(100000):
    time1 = time.time()
    sorted(test1) == sorted(test2)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)

np.mean(times)

7.413220405578613e-07

In [119]:
def perm_test(ti1,ti2,ti3,tj1,tj2,tj3):
    if (sorted(ti1)==sorted(tj1)) and \
    (sorted(ti2)==sorted(tj2)) and \
    (sorted(ti3)==sorted(tj3)):
        return True
    else:
        return False

In [120]:
ti1 = ['Si','C']
ti2 = ['Si','Si']
ti3 = ['C','Si']

tj1 = ['C','Si']
tj2 = ['Si','Si']
tj3 = ['Si','Si']

# test perm test
test1 = ['Si','C']
test2 = ['C','Si']
sorted(test1) == sorted(test2)

times = []
for n in range(100000):
    time1 = time.time()
    perm_test(ti1,ti2,ti3,tj1,tj2,tj3)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)

np.mean(times)

1.818244457244873e-06

In [121]:
def perm_sum(ri1,ri2,ri3,ci1,ci2,ci3,ti1,ti2,ti3,\
             rj1,rj2,rj3,cj1,cj2,cj3,tj1,tj2,tj3,\
             sig,ls):
    kern = 0
    # sum over six permutations of second triplet
    # 1,2,3
    if perm_test(ti1,ti2,ti3,tj1,tj2,tj3):
        kern+=k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,sig,ls)
    # 1,3,2
    if perm_test(ti1,ti2,ti3,tj1,tj3,tj2):
        kern+=k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj3,rj2,cj1,cj3,cj2,sig,ls)
    # 2,1,3
    if perm_test(ti1,ti2,ti3,tj2,tj1,tj3):
        kern+=k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj2,rj1,rj3,cj2,cj1,cj3,sig,ls)
    # 3,1,2
    if perm_test(ti1,ti2,ti3,tj3,tj1,tj2):
        kern+=k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj3,rj1,rj2,cj3,cj1,cj2,sig,ls)
    # 2,3,1
    if perm_test(ti1,ti2,ti3,tj2,tj3,tj1):
        kern+=k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj2,rj3,rj1,cj2,cj3,cj1,sig,ls)
    # 3,2,1
    if perm_test(ti1,ti2,ti3,tj3,tj2,tj1):
        kern+=k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj3,rj2,rj1,cj3,cj2,cj1,sig,ls)
    return kern

In [122]:
# test perm sum
ri1=1
ri2=2
ri3=3
ci1=4
ci2=5
ci3=6
ti1=['C','C']
ti2=['C','C']
ti3=['C','C']
rj1=7
rj2=8
rj3=9
cj1=1
cj2=2
cj3=3
tj1=['C','C']
tj2=['C','C']
tj3=['C','C']
sig=1
ls=1

times = []
for n in range(100000):
    time1 = time.time()
    test=perm_sum(ri1,ri2,ri3,ci1,ci2,ci3,ti1,ti2,ti3,\
             rj1,rj2,rj3,cj1,cj2,cj3,tj1,tj2,tj3,\
             sig,ls)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)

print(test)
np.mean(times)

-1.4792437143746116e-21


3.0006508827209473e-05

In [123]:
test = {'label':{'test2':'hello','test3':[1,2,3,4,5]}}

In [124]:
test['label']['test3']

[1, 2, 3, 4, 5]

### Develop new env data structure that includes array of distances.

In [125]:
# given list of cartesian coordinates, get chemical environment of specified atom
# pos = list of cartesian coordinates
# typs = list of atom types
def get_env(pos, typs, atom, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    env = [[],[[],[],[],[],[]]]
    pos_atom = np.array(pos[atom]).reshape(3,1)
    typ = typs[atom]
    
    # append central atom type
    env[0].append(typ)
    
    # loop through positions to find all atoms and images in the neighborhood
    for n in range(len(pos)):
        # position relative to reference atom
        diff_curr = np.array(pos[n]).reshape(3,1) - pos_atom

        # get images within cutoff
        vecs, dists = get_cutoff_vecs(diff_curr, brav_mat, \
            brav_inv, vec1, vec2, vec3, cutoff)

        for vec, dist in zip(vecs, dists):
            # ignore self interaction
            if dist != 0:
                # append distance
                env[1][0].append(dist)
                
                # append coordinate differences
                env[1][1].append(-vec[0][0])
                env[1][2].append(-vec[1][0])
                env[1][3].append(-vec[2][0])
                
                # append atom type
                env[1][4].append(typs[n])

    return env

In [126]:
# given list of cartesian coordinates, get chemical environment of specified atom
# pos = list of cartesian coordinates
# typs = list of atom types
def get_env_struc(pos, typs, atom, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    pos_atom = np.array(pos[atom]).reshape(3,1)
    typ = typs[atom]
    env = {'central_atom':typ, 'dists':[],'xs':[],'ys':[],'zs':[],'types':[]}
    
    # loop through positions to find all atoms and images in the neighborhood
    for n in range(len(pos)):
        # position relative to reference atom
        diff_curr = np.array(pos[n]).reshape(3,1) - pos_atom

        # get images within cutoff
        vecs, dists = get_cutoff_vecs(diff_curr, brav_mat, \
            brav_inv, vec1, vec2, vec3, cutoff)

        for vec, dist in zip(vecs, dists):
            # ignore self interaction
            if dist != 0:
                # append distance
                env['dists'].append(dist)
                
                # append coordinate differences
                env['xs'].append(vec[0][0])
                env['ys'].append(vec[1][0])
                env['zs'].append(vec[2][0])
                
                # append atom type
                env['types'].append(typs[n])

    return env

In [127]:
atom = 1
test = get_env_struc(pos, typs, atom, brav_mat, brav_inv, vec1, vec2, vec3, cutoff)

In [128]:
ind1 = 0
ind2 = 1

c = test['central_atom']

r1 = test['dists'][ind1]
t1 = test['types'][ind1]
x1 = test['xs'][ind1]
y1 = test['ys'][ind1]
z1 = test['zs'][ind1]

r2 = test['dists'][ind2]
t2 = test['types'][ind2]
x2 = test['xs'][ind2]
y2 = test['ys'][ind2]
z2 = test['zs'][ind2]

# calculate third distance
r3 = np.sqrt((x1-x2)**2+(y1-y2)**2+(z1-z2)**2)

# define unordered labels
trip_labs_init = [c, t1, t2]
trip_dists_init = [r1,r2,r3]
trip_xs_init = [x1, x2, 0]
trip_ys_init = [y1, y2, 0]
trip_zs_init = [z1, z2, 0]

# order the labels
# all atoms the same
if (c==t1) and (c==t2):
    trip_type = 1
# two alike, one different: put different atom first
if (c==t1) and (c!=t2):
    trip_type = 2
    trip_labs = [t2,c,t1]
    trip_dists = [r2,r3,r1]
    trip_xs = [x2,0,x1]
    trip_ys = [y2,0,y1]
    trip_zs = [z2,0,z1]  
if (c==t2) and (c!=t1):
    trip_type = 2
    trip_labs = [t1,t2,c]
    trip_dists = [r3,r1,r2]
    trip_xs = [0,x1,x2]
    trip_ys = [0,y1,y2]
    trip_zs = [0,z1,z2]
if (t1==t2) and (c!=t1):
    trip_type = 2
# all atoms different: sort atom labels alphabetically
if (c!=t1) and (c!=t2) and (t1!=t2):
    trip_type = 3
    inds = np.argsort(trip_labs)
    # check all 6 possible orderings
    if inds[0]==0 and inds[1]==1 and inds[2]==2:
        pass
    if inds[0]==0 and inds[1]==2 and inds[2]==1:
        trip_labs = [c,t2,t1]
        trip_dists = [r2,r1,r3]
        trip_xs = [x2,x1,0]
        trip_ys = [y2,y1,0]
        trip_zs = [z2,z1,0]
    if inds[0]==1 and inds[1]==0 and inds[2]==2:
        trip_labs = [t1,c,t2]
        trip_dists = [r1,r3,r2]
        trip_xs = [x1,0,x2]
        trip_ys = [y1,0,y2]
        trip_zs = [z1,0,z2]
    if inds[0]==1 and inds[1]==2 and inds[2]==0:
        trip_labs = [t1,t2,c]
        trip_dists = [r3,r1,r2]
        trip_xs = [0,x1,x2]
        trip_ys = [0,y1,y2]
        trip_zs = [0,z1,z2]
    if inds[0]==2 and inds[1]==0 and inds[2]==1:
        trip_labs = [t2,c,t1]
        trip_dists = [r2,r3,r1]
        trip_xs = [x2,0,x1]
        trip_ys = [y2,0,y1]
        trip_zs = [z2,0,z1]
    if inds[0]==2 and inds[1]==1 and inds[2]==0:
        trip_labs = [t2,t1,c]
        trip_dists = [r3,r2,r1]
        trip_xs = [0,x2,x1]
        trip_ys = [0,y2,y1]
        trip_zs = [0,z2,z1]

In [129]:
print(trip_labs_init)
print(trip_dists_init)
print(trip_xs_init)
print(trip_ys_init)
print(trip_zs_init)

['C', 'Si', 'Si']
[1.9585744543984007, 1.6860527982322402, 3.0577115688525995]
[-0.858030091, 0.961059953, 0]
[-1.3834083060000004, 1.0658480969999997, 0]
[-1.089026958, -0.8849325619999999, 0]


In [132]:
# ordering convention: atoms (a,b,c) correspond to distances (ab, ac, bc)
def order_triplet(c,r1,t1,x1,y1,z1,r2,t2,x2,y2,z2):
    # calculate third distance
    r3 = np.sqrt((x1-x2)**2+(y1-y2)**2+(z1-z2)**2)
    
    trip_labs_init = [c,t1,t2]

    # order the labels
    # all atoms the same
    if (c==t1) and (c==t2):
        trip_type = 1
        trip_labs = [c,t1,t2]
        trip_dists = [r1,r2,r3]
        trip_xs = [x1,x2,0]
        trip_ys = [y1,y2,0]
        trip_zs = [z1,z2,0]  
    # two alike, one different: put different atom first
    if (c==t1) and (c!=t2):
        trip_type = 2
        trip_labs = [t2,c,t1]
        trip_dists = [r2,r3,r1]
        trip_xs = [x2,0,x1]
        trip_ys = [y2,0,y1]
        trip_zs = [z2,0,z1]  
    if (c==t2) and (c!=t1):
        trip_type = 2
        trip_labs = [t1,t2,c]
        trip_dists = [r3,r1,r2]
        trip_xs = [0,x1,x2]
        trip_ys = [0,y1,y2]
        trip_zs = [0,z1,z2]
    if (t1==t2) and (c!=t1):
        trip_type = 2
        trip_labs = [c,t1,t2]
        trip_dists = [r1,r2,r3]
        trip_xs = [x1,x2,0]
        trip_ys = [y1,y2,0]
        trip_zs = [z1,z2,0] 
    # all atoms different: sort atom labels alphabetically
    if (c!=t1) and (c!=t2) and (t1!=t2):
        trip_type = 3
        inds = np.argsort(trip_labs_init)
        # check all 6 possible orderings
        if inds[0]==0 and inds[1]==1 and inds[2]==2:
            trip_labs = [c,t1,t2]
            trip_dists = [r1,r2,r3]
            trip_xs = [x1,x2,0]
            trip_ys = [y1,y2,0]
            trip_zs = [z1,z2,0] 
        if inds[0]==0 and inds[1]==2 and inds[2]==1:
            trip_labs = [c,t2,t1]
            trip_dists = [r2,r1,r3]
            trip_xs = [x2,x1,0]
            trip_ys = [y2,y1,0]
            trip_zs = [z2,z1,0]
        if inds[0]==1 and inds[1]==0 and inds[2]==2:
            trip_labs = [t1,c,t2]
            trip_dists = [r1,r3,r2]
            trip_xs = [x1,0,x2]
            trip_ys = [y1,0,y2]
            trip_zs = [z1,0,z2]
        if inds[0]==1 and inds[1]==2 and inds[2]==0:
            trip_labs = [t1,t2,c]
            trip_dists = [r3,r1,r2]
            trip_xs = [0,x1,x2]
            trip_ys = [0,y1,y2]
            trip_zs = [0,z1,z2]
        if inds[0]==2 and inds[1]==0 and inds[2]==1:
            trip_labs = [t2,c,t1]
            trip_dists = [r2,r3,r1]
            trip_xs = [x2,0,x1]
            trip_ys = [y2,0,y1]
            trip_zs = [z2,0,z1]
        if inds[0]==2 and inds[1]==1 and inds[2]==0:
            trip_labs = [t2,t1,c]
            trip_dists = [r3,r2,r1]
            trip_xs = [0,x2,x1]
            trip_ys = [0,y2,y1]
            trip_zs = [0,z2,z1]
            
    return trip_type, trip_labs, trip_dists, trip_xs, trip_ys, trip_zs

In [133]:
# test triplet ordering function
c = 'c'

t1 = 'c'
x1 = 1
y1 = 2
z1 = 3
r1 = np.sqrt(x1**2+y1**2+z1**2)

t2 = 'c'
x2 = 4
y2 = 5
z2 = 6
r2 = np.sqrt(x2**2+y2**2+z2**2)

r3 = np.sqrt((x1-x2)**2+(y1-y2)**2+(z1-z2)**2)

trip_type, trip_labs, trip_dists, trip_xs, trip_ys, trip_zs = order_triplet(c,r1,t1,x1,y1,z1,r2,t2,x2,y2,z2)

print('unordered:')
print([c,t1,t2])
print([r1,r2,r3])
print([x1,x2,0])
print([y1,y2,0])
print([z1,z2,0])

print('ordered:')
print(trip_labs)
print(trip_dists)
print(trip_xs)
print(trip_ys)
print(trip_zs)
print(trip_type)

unordered:
['c', 'c', 'c']
[3.7416573867739413, 8.774964387392123, 5.196152422706632]
[1, 4, 0]
[2, 5, 0]
[3, 6, 0]
ordered:
['c', 'c', 'c']
[3.7416573867739413, 8.774964387392123, 5.196152422706632]
[1, 4, 0]
[2, 5, 0]
[3, 6, 0]
1


In [134]:
test.keys()

dict_keys(['central_atom', 'dists', 'xs', 'ys', 'zs', 'types'])

In [137]:
# create triplet dictionary from 2-body dictionary
def get_trip_dict(tb_dict):
    trip_dict = {'typs':[],'labs':[],'dists':[],'xs':[],'ys':[],'zs':[]}


    # pull relevant information from 2-body dictionary
    dist_no = len(tb_dict['dists'])
    c = tb_dict['central_atom']
    dists = tb_dict['dists']
    xs = tb_dict['xs']
    ys = tb_dict['ys']
    zs = tb_dict['zs']
    types = tb_dict['types']

    for m in range(dist_no):
        r1 = dists[m]
        t1 = types[m]
        x1 = xs[m]
        y1 = ys[m]
        z1 = zs[m]
        for n in range(m,dist_no):
            r2 = dists[n]
            t2 = types[n]
            x2 = xs[n]
            y2 = ys[n]
            z2 = zs[n]

            trip_type, trip_labs, trip_dists, trip_xs, trip_ys, trip_zs = order_triplet(c,r1,t1,x1,y1,z1,r2,t2,x2,y2,z2)

            # triplet already in dictionary:
            if trip_labs in trip_dict['labs']:
                lab_ind = trip_dict['labs'].index(trip_labs)
                trip_dict['dists'][lab_ind].append(trip_dists)
                trip_dict['xs'][lab_ind].append(trip_xs)
                trip_dict['ys'][lab_ind].append(trip_ys)
                trip_dict['zs'][lab_ind].append(trip_zs)
            # or else it needs to be appended:
            else:
                trip_dict['typs'].append(trip_type)
                trip_dict['labs'].append(trip_labs)
                trip_dict['dists'].append([trip_dists])
                trip_dict['xs'].append([trip_xs])
                trip_dict['ys'].append([trip_ys])
                trip_dict['zs'].append([trip_zs])
                
    return trip_dict

In [141]:
trip_dict = get_trip_dict(test)

In [144]:
trip_dict.keys()

dict_keys(['typs', 'labs', 'dists', 'xs', 'ys', 'zs'])

In [147]:
trip_dict['xs']

[['C', 'Si', 'Si'], ['Si', 'C', 'C'], ['C', 'C', 'C']]

### Write kernel function for type 1 triplet (three atoms of the same kind).

In [14]:
def get_k3_math(ri1,ri2,ri3,rj1,rj2,rj3,sig,ls):
    k3 = sig**2*math.exp(-((ri1-rj1)**2+(ri2-rj2)**2+(ri3-rj3)**2)/(2*ls**2))
    return k3

# define three-body derivative function
def k3_derv(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,sig,ls):
    k3 = get_k3_math(ri1,ri2,ri3,rj1,rj2,rj3,sig,ls)
    fac1 = (ci1/ri1)*(cj1/rj1)+(ci2/ri2)*(cj2/rj2)+(ci3/ri3)*(cj3/rj3)
    fac2 = (ri1-rj1)*(ci1/ri1)+(ri2-rj2)*(ci2/ri2)+(ri3-rj3)*(ci3/ri3)
    fac3 = (ri1-rj1)*(cj1/rj1)+(ri2-rj2)*(cj2/rj2)+(ri3-rj3)*(cj3/rj3)
    derv = (1/ls**4)*k3*(ls**2*fac1-fac2*fac3)
    return derv

# define three-body derivative function
def k3_toy(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,sig,ls):
    derv = (sig**2/ls**4)*math.exp(-((ri1-rj1)**2+(ri2-rj2)**2+(ri3-rj3)**2)/(2*ls**2))*\
    (ls**2*(ci1*cj1+ci2*cj2+ci3*cj3)-\
     ((ri1-rj1)*ci1+(ri2-rj2)*ci2+(ri3-rj3)*ci3)*\
     ((ri1-rj1)*cj1+(ri2-rj2)*cj2+(ri3-rj3)*cj3))
    return derv

# define three-body derivative function
def k3_toy_2(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,d,e,f):
    a = ri1-rj1
    b = ri2-rj2
    c = ri3-rj3
    derv = d*math.exp(-f*(a**2+b**2+c**2))*(e*(ci1*cj1+ci2*cj2+ci3*cj3)-(a*ci1+b*ci2+c*ci3)*(a*cj1+b*cj2+c*cj3))
    return derv

# define three-body derivative function
def k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,d,e,f):
    a = ri1-rj1
    b = ri2-rj2
    c = ri3-rj3
    derv = (e*(ci1*cj1+ci2*cj2+ci3*cj3)-(a*ci1+b*ci2+c*ci3)*(a*cj1+b*cj2+c*cj3))*d*math.exp(-f*(a*a+b*b+c*c))
    return derv

In [192]:
# trip type 1: all atoms the same
# slow version
def k3dt1_slow(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    kern = 0
    # sum over six permutations of second triplet
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,d,e,f)
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj3,rj2,cj1,cj3,cj2,d,e,f)
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj2,rj1,rj3,cj2,cj1,cj3,d,e,f)
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj3,rj1,rj2,cj3,cj1,cj2,d,e,f)
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj2,rj3,rj1,cj2,cj3,cj1,d,e,f)
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj3,rj2,rj1,cj3,cj2,cj1,d,e,f)
    
    return kern

In [191]:
# trip type 1: all atoms the same
def k3dt1(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    
    # define quantities that will reappear
    rsum = ri1*ri1+ri2*ri2+ri3*ri3+rj1*rj1+rj2*rj2+rj3*rj3
    
    r11 = ri1-rj1
    r12 = ri1-rj2
    r13 = ri1-rj3
    r21 = ri2-rj1
    r22 = ri2-rj2
    r23 = ri2-rj3
    r31 = ri3-rj1
    r32 = ri3-rj2
    r33 = ri3-rj3
    
    rr11 = ri1*rj1
    rr12 = ri1*rj2
    rr13 = ri1*rj3
    rr21 = ri2*rj1
    rr22 = ri2*rj2
    rr23 = ri2*rj3
    rr31 = ri3*rj1
    rr32 = ri3*rj2
    rr33 = ri3*rj3
    
    cc11 = ci1*cj1
    cc12 = ci1*cj2
    cc13 = ci1*cj3
    cc21 = ci2*cj1
    cc22 = ci2*cj2
    cc23 = ci2*cj3
    cc31 = ci3*cj1
    cc32 = ci3*cj2
    cc33 = ci3*cj3
    
    rci11 = r11*ci1
    rci12 = r12*ci1
    rci13 = r13*ci1
    rci21 = r21*ci2
    rci22 = r22*ci2
    rci23 = r23*ci2
    rci31 = r31*ci3
    rci32 = r32*ci3
    rci33 = r33*ci3
    
    rcj11 = r11*cj1
    rcj12 = r12*cj2
    rcj13 = r13*cj3
    rcj21 = r21*cj1
    rcj22 = r22*cj2
    rcj23 = r23*cj3
    rcj31 = r31*cj1
    rcj32 = r32*cj2
    rcj33 = r33*cj3
    
    # sum over six permutations
    derv = d*math.exp(-f*rsum)*\
        ((math.exp(2*f*(rr11+rr22+rr33))*(e*(cc11+cc22+cc33)-(rci11+rci22+rci33)*(rcj11+rcj22+rcj33)))+
        (math.exp(2*f*(rr11+rr23+rr32))*(e*(cc11+cc23+cc32)-(rci11+rci23+rci32)*(rcj11+rcj23+rcj32)))+
        (math.exp(2*f*(rr12+rr21+rr33))*(e*(cc12+cc21+cc33)-(rci12+rci21+rci33)*(rcj12+rcj21+rcj33)))+
        (math.exp(2*f*(rr12+rr23+rr31))*(e*(cc12+cc23+cc31)-(rci12+rci23+rci31)*(rcj12+rcj23+rcj31)))+
        (math.exp(2*f*(rr13+rr21+rr32))*(e*(cc13+cc21+cc32)-(rci13+rci21+rci32)*(rcj13+rcj21+rcj32)))+
        (math.exp(2*f*(rr13+rr22+rr31))*(e*(cc13+cc22+cc31)-(rci13+rci22+rci31)*(rcj13+rcj22+rcj31))))

    return derv

In [92]:
# trip type 1: all atoms the same
def k3dt1_mat(ri,ci,rj,cj,d,e,f,risum,rjsum):
    
    # define quantities that will reappear
    rsum = risum+rjsum
    
    r = ri-rj
    rr = ri*rj
    cc = ci*cj
    rci = r*ci
    rcj = r*cj
    
    # sum over six permutations
    derv = d*math.exp(-f*rsum)*\
        ((math.exp(2*f*(rr[0,0]+rr[1,1]+rr[2,2]))*(e*(cc[0,0]+cc[1,1]+cc[2,2])-(rci[0,0]+rci[1,1]+rci[2,2])*(rcj[0,0]+rcj[1,1]+rcj[2,2])))+
        (math.exp(2*f*(rr[0,0]+rr[1,2]+rr[2,1]))*(e*(cc[0,0]+cc[1,2]+cc[2,1])-(rci[0,0]+rci[1,2]+rci[2,1])*(rcj[0,0]+rcj[1,2]+rcj[2,1])))+
        (math.exp(2*f*(rr[0,1]+rr[1,0]+rr[2,2]))*(e*(cc[0,1]+cc[1,0]+cc[2,2])-(rci[0,1]+rci[1,0]+rci[2,2])*(rcj[0,1]+rcj[1,0]+rcj[2,2])))+
        (math.exp(2*f*(rr[0,1]+rr[1,2]+rr[2,0]))*(e*(cc[0,1]+cc[1,2]+cc[2,0])-(rci[0,1]+rci[1,2]+rci[2,0])*(rcj[0,1]+rcj[1,2]+rcj[2,0])))+
        (math.exp(2*f*(rr[0,2]+rr[1,0]+rr[2,1]))*(e*(cc[0,2]+cc[1,0]+cc[2,1])-(rci[0,2]+rci[1,0]+rci[2,1])*(rcj[0,2]+rcj[1,0]+rcj[2,1])))+
        (math.exp(2*f*(rr[0,2]+rr[1,1]+rr[2,0]))*(e*(cc[0,2]+cc[1,1]+cc[2,0])-(rci[0,2]+rci[1,1]+rci[2,0])*(rcj[0,2]+rcj[1,1]+rcj[2,0]))))

    return derv

### Write kernel function for type 2 triplet (two different atoms in the triplet)

In [152]:
# trip type 1: all atoms the same
# slow version
def k3dt2_slow(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    kern = 0
    # sum over six permutations of second triplet
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj1,rj2,rj3,cj1,cj2,cj3,d,e,f)
    kern+=k3_toy_3(ri1,ri2,ri3,ci1,ci2,ci3,rj2,rj1,rj3,cj2,cj1,cj3,d,e,f)
    
    return kern

In [187]:
# triplet type 2: two different atoms
def k3dt2(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    
    # define quantities that will reappear
    rsum = ri1*ri1+ri2*ri2+ri3*ri3+rj1*rj1+rj2*rj2+rj3*rj3
    
    r11 = ri1-rj1
    r12 = ri1-rj2
    r21 = ri2-rj1
    r22 = ri2-rj2
    
    r33 = ri3-rj3
    rr33 = ri3*rj3
    cc33 = ci3*cj3
    rci33 = r33*ci3
    rcj33 = r33*cj3
    
    # sum over permutations
    derv = d*math.exp(-f*rsum)*\
        ((math.exp(2*f*(ri1*rj1+ri2*rj2+rr33))*(e*(ci1*cj1+ci2*cj2+cc33)-(r11*ci1+r22*ci2+rci33)*(r11*cj1+r22*cj2+rcj33)))+\
        (math.exp(2*f*(ri1*rj2+ri2*rj1+rr33))*(e*(ci1*cj2+ci2*cj1+cc33)-(r12*ci1+r21*ci2+rci33)*(r12*cj2+r21*cj1+rcj33))))

    return derv

In [173]:
r11

-6

### Write kernel function for type 3 triplet.

In [189]:
# triplet type 2: two different atoms
def k3dt3(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    
    r1 = ri1-rj1
    r2 = ri2-rj2
    r3 = ri3-rj3
    derv = (e*(ci1*cj1+ci2*cj2+ci3*cj3)-(r1*ci1+r2*ci2+r3*ci3)*(r1*cj1+r2*cj2+r3*cj3))*d*math.exp(-f*(r1*r1+r2*r2+r3*r3))

    return derv

### Gather helper functions.

#### Environment helper functions.

In [3]:
# given list of Cartesian coordinates, return list of atomic environments
def get_cutoff_vecs(vec, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    # get bravais coefficients
    coeff = np.matmul(brav_inv, vec)
    
    # get bravais coefficients for atoms within one super-super-cell
    coeffs = [[],[],[]]
    for n in range(3):
        coeffs[n].append(coeff[n])
        coeffs[n].append(coeff[n]-1)
        coeffs[n].append(coeff[n]+1)
        coeffs[n].append(coeff[n]-2)
        coeffs[n].append(coeff[n]+2)

    # get vectors within cutoff
    vecs = []
    dists = []
    for m in range(len(coeffs[0])):
        for n in range(len(coeffs[1])):
            for p in range(len(coeffs[2])):
                vec_curr = coeffs[0][m]*vec1 + coeffs[1][n]*vec2 + coeffs[2][p]*vec3
                
                dist = np.linalg.norm(vec_curr)

                if dist < cutoff:
                    vecs.append(vec_curr)
                    dists.append(dist)
                    
    return vecs, dists

# ordering convention: atoms (a,b,c) correspond to distances (ab, ac, bc)
def order_triplet(c,r1,t1,x1,y1,z1,r2,t2,x2,y2,z2):
    # calculate third distance
    r3 = np.sqrt((x1-x2)**2+(y1-y2)**2+(z1-z2)**2)
    
    labs_init = [c,t1,t2]
    dists_init = [r1,r2,r3]
    xs_init = [x1,x2,0]
    ys_init = [y1,y2,0]
    zs_init = [z1,z2,0]
    xrel_init = [x1/r1,x2/r2,0]
    yrel_init = [y1/r1,y2/r2,0]
    zrel_init = [z1/r1,z2/r2,0]
    
    # order the labels
    # all atoms the same
    if (c==t1) and (c==t2):
        trip_type = 1
        atom_order = [0,1,2]
        dist_order = [0,1,2]
    # two alike, one different: put different atom first
    if (c==t1) and (c!=t2):
        trip_type = 2
        atom_order = [2,0,1]
        dist_order = [1,2,0]
    if (c==t2) and (c!=t1):
        trip_type = 2
        atom_order = [1,2,0]
        dist_order = [2,0,1]
    if (t1==t2) and (c!=t1):
        trip_type = 2
        atom_order = [0,1,2]
        dist_order = [0,1,2]
    # all atoms different: sort atom labels alphabetically
    if (c!=t1) and (c!=t2) and (t1!=t2):
        trip_type = 3
        atom_order = list(np.argsort(labs_init))
        # check all 6 possible orderings
        if inds[0]==0 and inds[1]==1 and inds[2]==2:
            dist_order = [0,1,2]
        if inds[0]==0 and inds[1]==2 and inds[2]==1:
            dist_order = [1,0,2]
        if inds[0]==1 and inds[1]==0 and inds[2]==2:
            dist_order = [0,2,1]
        if inds[0]==1 and inds[1]==2 and inds[2]==0:
            dist_order = [2,0,1]
        if inds[0]==2 and inds[1]==0 and inds[2]==1:
            dist_order = [1,2,0]
        if inds[0]==2 and inds[1]==1 and inds[2]==0:
            dist_order = [2,1,0]
            
    trip_labs = [labs_init[n] for n in atom_order]
    trip_dists = [dists_init[n] for n in dist_order]
    trip_xs = [xs_init[n] for n in dist_order]
    trip_ys = [ys_init[n] for n in dist_order]
    trip_zs = [zs_init[n] for n in dist_order]
    trip_xrel = [xrel_init[n] for n in dist_order]
    trip_yrel = [yrel_init[n] for n in dist_order]
    trip_zrel = [zrel_init[n] for n in dist_order]
    
    return trip_type, trip_labs, trip_dists, trip_xs, trip_ys, trip_zs, trip_xrel, trip_yrel, trip_zrel

# create triplet dictionary from 2-body dictionary
def get_trip_dict(tb_dict):
    trip_dict = {'typs':[],'labs':[],'dists':[],'xs':[],'ys':[],'zs':[], 'xrel':[], 'yrel':[],'zrel':[]}


    # pull relevant information from 2-body dictionary
    dist_no = len(tb_dict['dists'])
    c = tb_dict['central_atom']
    dists = tb_dict['dists']
    xs = tb_dict['xs']
    ys = tb_dict['ys']
    zs = tb_dict['zs']
    types = tb_dict['types']

    for m in range(dist_no):
        r1 = dists[m]
        t1 = types[m]
        x1 = xs[m]
        y1 = ys[m]
        z1 = zs[m]
        for n in range(m,dist_no):
            r2 = dists[n]
            t2 = types[n]
            x2 = xs[n]
            y2 = ys[n]
            z2 = zs[n]

            trip_type, trip_labs, trip_dists, trip_xs, trip_ys, trip_zs, trip_xrel, trip_yrel, trip_zrel =\
                order_triplet(c,r1,t1,x1,y1,z1,r2,t2,x2,y2,z2)

            # triplet already in dictionary:
            if trip_labs in trip_dict['labs']:
                lab_ind = trip_dict['labs'].index(trip_labs)
                trip_dict['dists'][lab_ind].append(trip_dists)
                trip_dict['xs'][lab_ind].append(trip_xs)
                trip_dict['ys'][lab_ind].append(trip_ys)
                trip_dict['zs'][lab_ind].append(trip_zs)
                trip_dict['xrel'][lab_ind].append(trip_xrel)
                trip_dict['yrel'][lab_ind].append(trip_yrel)
                trip_dict['zrel'][lab_ind].append(trip_zrel)
            # or else it needs to be appended:
            else:
                trip_dict['typs'].append(trip_type)
                trip_dict['labs'].append(trip_labs)
                trip_dict['dists'].append([trip_dists])
                trip_dict['xs'].append([trip_xs])
                trip_dict['ys'].append([trip_ys])
                trip_dict['zs'].append([trip_zs])
                trip_dict['xrel'].append([trip_xrel])
                trip_dict['yrel'].append([trip_yrel])
                trip_dict['zrel'].append([trip_zrel])
                
    return trip_dict

# given list of cartesian coordinates, get chemical environment of specified atom
# pos = list of cartesian coordinates
# typs = list of atom types
def get_env_struc(pos, typs, atom, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    pos_atom = np.array(pos[atom]).reshape(3,1)
    typ = typs[atom]
    env = {'central_atom':typ, 'dists':[],'xs':[],'ys':[],'zs':[],\
           'xrel':[],'yrel':[],'zrel':[],'types':[]}
    
    # loop through positions to find all atoms and images in the neighborhood
    for n in range(len(pos)):
        # position relative to reference atom
        diff_curr = np.array(pos[n]).reshape(3,1) - pos_atom

        # get images within cutoff
        vecs, dists = get_cutoff_vecs(diff_curr, brav_mat, \
            brav_inv, vec1, vec2, vec3, cutoff)

        for vec, dist in zip(vecs, dists):
            # ignore self interaction
            if dist != 0:
                # append distance
                env['dists'].append(dist)
                
                # append coordinate differences
                env['xs'].append(vec[0][0])
                env['ys'].append(vec[1][0])
                env['zs'].append(vec[2][0])
                
                # append relative coordinate differences
                env['xrel'].append(vec[0][0]/dist)
                env['yrel'].append(vec[1][0]/dist)
                env['zrel'].append(vec[2][0]/dist)
                
                # append atom type
                env['types'].append(typs[n])

    env['trip_dict']=get_trip_dict(env)
    
    return env

# given list of cartesian coordinates, return list of chemical environments
def get_envs(pos, typs, brav_mat, brav_inv, vec1, vec2, vec3, cutoff):
    envs = []
    for n in range(len(pos)):
        atom = n
        env = get_env_struc(pos, typs, atom, brav_mat, brav_inv, vec1, vec2, vec3, cutoff)
        envs.append(env)
        
    return envs

#### 2-body kernel helper functions.

In [4]:
# two body kernel
def k2(ri,ci,rj,cj,d,e,f):
    rr = (ri-rj)*(ri-rj)
    k2 = d*math.exp(-f*rr)*ci*cj*(e-rr)
    return k2

#### 3-body kernel helper functions.

In [5]:
# trip type 1: all atoms the same
def k3dt1(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    
    # define quantities that will reappear
    rsum = ri1*ri1+ri2*ri2+ri3*ri3+rj1*rj1+rj2*rj2+rj3*rj3
    
    r11 = ri1-rj1
    r12 = ri1-rj2
    r13 = ri1-rj3
    r21 = ri2-rj1
    r22 = ri2-rj2
    r23 = ri2-rj3
    r31 = ri3-rj1
    r32 = ri3-rj2
    r33 = ri3-rj3
    
    rr11 = ri1*rj1
    rr12 = ri1*rj2
    rr13 = ri1*rj3
    rr21 = ri2*rj1
    rr22 = ri2*rj2
    rr23 = ri2*rj3
    rr31 = ri3*rj1
    rr32 = ri3*rj2
    rr33 = ri3*rj3
    
    cc11 = ci1*cj1
    cc12 = ci1*cj2
    cc13 = ci1*cj3
    cc21 = ci2*cj1
    cc22 = ci2*cj2
    cc23 = ci2*cj3
    cc31 = ci3*cj1
    cc32 = ci3*cj2
    cc33 = ci3*cj3
    
    rci11 = r11*ci1
    rci12 = r12*ci1
    rci13 = r13*ci1
    rci21 = r21*ci2
    rci22 = r22*ci2
    rci23 = r23*ci2
    rci31 = r31*ci3
    rci32 = r32*ci3
    rci33 = r33*ci3
    
    rcj11 = r11*cj1
    rcj12 = r12*cj2
    rcj13 = r13*cj3
    rcj21 = r21*cj1
    rcj22 = r22*cj2
    rcj23 = r23*cj3
    rcj31 = r31*cj1
    rcj32 = r32*cj2
    rcj33 = r33*cj3
    
    # sum over six permutations
    derv = d*math.exp(-f*rsum)*\
        ((math.exp(2*f*(rr11+rr22+rr33))*(e*(cc11+cc22+cc33)-(rci11+rci22+rci33)*(rcj11+rcj22+rcj33)))+
        (math.exp(2*f*(rr11+rr23+rr32))*(e*(cc11+cc23+cc32)-(rci11+rci23+rci32)*(rcj11+rcj23+rcj32)))+
        (math.exp(2*f*(rr12+rr21+rr33))*(e*(cc12+cc21+cc33)-(rci12+rci21+rci33)*(rcj12+rcj21+rcj33)))+
        (math.exp(2*f*(rr12+rr23+rr31))*(e*(cc12+cc23+cc31)-(rci12+rci23+rci31)*(rcj12+rcj23+rcj31)))+
        (math.exp(2*f*(rr13+rr21+rr32))*(e*(cc13+cc21+cc32)-(rci13+rci21+rci32)*(rcj13+rcj21+rcj32)))+
        (math.exp(2*f*(rr13+rr22+rr31))*(e*(cc13+cc22+cc31)-(rci13+rci22+rci31)*(rcj13+rcj22+rcj31))))

    return derv

# triplet type 2: two different atoms
def k3dt2(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    
    # define quantities that will reappear
    rsum = ri1*ri1+ri2*ri2+ri3*ri3+rj1*rj1+rj2*rj2+rj3*rj3
    
    r11 = ri1-rj1
    r12 = ri1-rj2
    r21 = ri2-rj1
    r22 = ri2-rj2
    
    r33 = ri3-rj3
    rr33 = ri3*rj3
    cc33 = ci3*cj3
    rci33 = r33*ci3
    rcj33 = r33*cj3
    
    # sum over permutations
    derv = d*math.exp(-f*rsum)*\
        ((math.exp(2*f*(ri1*rj1+ri2*rj2+rr33))*(e*(ci1*cj1+ci2*cj2+cc33)-(r11*ci1+r22*ci2+rci33)*(r11*cj1+r22*cj2+rcj33)))+\
        (math.exp(2*f*(ri1*rj2+ri2*rj1+rr33))*(e*(ci1*cj2+ci2*cj1+cc33)-(r12*ci1+r21*ci2+rci33)*(r12*cj2+r21*cj1+rcj33))))

    return derv

# triplet type 3: three different atoms
def k3dt3(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    
    r1 = ri1-rj1
    r2 = ri2-rj2
    r3 = ri3-rj3
    derv = (e*(ci1*cj1+ci2*cj2+ci3*cj3)-(r1*ci1+r2*ci2+r3*ci3)*(r1*cj1+r2*cj2+r3*cj3))*\
            d*math.exp(-f*(r1*r1+r2*r2+r3*r3))

    return derv

def k3(typ,ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f):
    if typ==1:
        k3 = k3dt1(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f)
    if typ==2:
        k3 = k3dt2(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f)
    if typ==3:
        k3 = k3dt3(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f)
        
    return k3     

In [6]:
# get two body kernel between chemical environments
def two_body(x1, x2, d1, d2, sig, ls):
    d= sig**2/ls**4
    e= ls**2
    f= 1/(2*ls**2)
    kern = 0
    
    # record central atom types
    c1 = x1['central_atom']
    c2 = x2['central_atom']
    
    for m in range(len(x1['dists'])):
        e1 = x1['types'][m]
        r1 = x1['dists'][m]
        coord1 = x1[d1][m]
        for n in range(len(x2['dists'])):
            e2 = x2['types'][n]
            r2 = x2['dists'][n]
            coord2 = x2[d2][n]
            
            # check that atom types match
            if (c1==c2 and e1==e2) or (c1==e2 and c2==e1):
                kern+=k2(r1,coord1,r2,coord2,d,e,f)
                
    return kern

# get three body kernel between two chemical environments
def three_body(x1, x2, d1, d2, sig, ls):
    d= sig**2/ls**4
    e= ls**2
    f= 1/(2*ls**2)
    kern = 0
    
    for m in range(len(x1['trip_dict']['labs'])):
        x1_lab = x1['trip_dict']['labs'][m]

        for n in range(len(x2['trip_dict']['labs'])):
            x2_lab = x2['trip_dict']['labs'][n]

            # check triplet type
            if x1_lab==x2_lab:
                # loop over tripets of the same type
                typ = x1['trip_dict']['typs'][m]

                # loop over triplets in environment 1
                for p in range(len(x1['trip_dict']['dists'][m])):
                    # set distances
                    ri1 = x1['trip_dict']['dists'][m][p][0]
                    ri2 = x1['trip_dict']['dists'][m][p][1]
                    ri3 = x1['trip_dict']['dists'][m][p][2]

                    # set coordinates
                    ci1 = x1['trip_dict'][d1][m][p][0]
                    ci2 = x1['trip_dict'][d1][m][p][1]
                    ci3 = x1['trip_dict'][d1][m][p][2]

                    # loop over triplets in environment 2
                    for q in range(len(x2['trip_dict']['dists'][n])):
                        # set distances
                        rj1 = x2['trip_dict']['dists'][n][q][0]
                        rj2 = x2['trip_dict']['dists'][n][q][1]
                        rj3 = x2['trip_dict']['dists'][n][q][2]

                        # set coordinates
                        cj1 = x2['trip_dict'][d2][n][q][0]
                        cj2 = x2['trip_dict'][d2][n][q][1]
                        cj3 = x2['trip_dict'][d2][n][q][2]

                        # add to kernel
                        kern+=k3(typ,ri1,ri2,ri3,ci1,ci2,ci3,\
                         rj1,rj2,rj3,cj1,cj2,cj3,\
                         d,e,f)
                        
    return kern

#### GP helper functions.

In [7]:
# get 3Nx3N noiseless kernel matrix
# assume all 3 force components are known for each configuration
# X is assumed to be a list of environments
def get_K(X,sig,ls,noise,kern_func):
    ds = ['xrel','yrel','zrel']
    
    # initialize matrix
    size = len(X)*3
    K = np.zeros([size, size])
    
    # calculate elements
    for m in range(size):
        x1 = X[int(math.floor(m/3))]
        d1 = ds[m%3]
        for n in range(m,size):
            x2 = X[int(math.floor(n/3))]
            d2 = ds[n%3]
           
            # calculate kernel
            cov = kern_func(x1, x2, d1, d2, sig, ls)
            K[m,n] = cov
            K[n,m] = cov
    # perform cholesky decomposition
    L = np.linalg.cholesky(K+noise**2*np.eye(size))
    
    return K, L

# get row of covariances
def get_cov_row(x1, d1, m, size, X, sig, ls):
    ds = ['xrel','yrel','zrel']
    covs = []
    for n in range(m,size):
        x2 = X[int(math.floor(n/3))]
        d2 = ds[n%3]

        # calculate kernel
        cov = two_body(x1, x2, d1, d2, sig, ls)
        covs.append(cov)
        
    return covs

# get covariance matrix with multiple processors
def get_K_par(X,sig,ls,noise,pool):
    ds = ['xrel','yrel','zrel']
    # initialize matrix
    size = len(X)*3
    K = np.zeros([size, size])

    # calculate elements
    results = []
    for m in range(size):
        x1 = X[int(math.floor(m/3))]
        d1 = ds[m%3]

        results.append(pool.apply_async(get_cov_row, \
            args=(x1, d1, m, size, X, sig, ls)))
        
    print('reconstructing...')
    # construct covariance matrix
    for m in range(size):
        time1 = time.time()
        res_cur = results[m].get()
        time2 = time.time()
        print(time2-time1)
        for n in range(m,size):
            K[m,n]=res_cur[n-m]
            K[n,m]=res_cur[n-m]

    # perform cholesky decomposition
    L = np.linalg.cholesky(K+noise**2*np.eye(size))
    
    return K,L,results

# get kernel vector
def get_kv(X,x,d1,sig,ls,kern_func):
    ds = ['xrel','yrel','zrel']
    size = len(X)*3
    kv=np.zeros([size,1])
    for m in range(size):
        x2 = X[int(math.floor(m/3))]
        d2 = ds[m%3]
        kv[m]=kern_func(x,x2,d1,d2,sig,ls)
        
    return kv

# get alpha
def get_alpha(K,L,y): 
    # get alpha
    ts1 = sp.linalg.solve_triangular(L,y,lower=True)
    alpha = sp.linalg.solve_triangular(L.transpose(),ts1)
            
    return alpha

# get likelihood
def get_like(K,L,y,alpha): 
    # get log marginal likelihood
    like = -(1/2)*np.matmul(y.transpose(),alpha)-\
            np.sum(np.log(np.diagonal(L)))-\
            np.log(2*np.pi)*K.shape[1]/2
            
    return like

# get likelihood as a function of hyperparameters
def like_hyp(hyp,X,y,kern_func):
    # unpack hyperparameters
    sig = hyp[0]
    ls = hyp[1]
    noise = hyp[2]
    
    # calculate likelihood
    K, L = get_K(X,sig,ls,noise,kern_func)
    alpha = get_alpha(K,L,y)
    like = get_like(K,L,y,alpha)
    
#     # print
#     print('sig: '+str(sig))
#     print('ls: '+str(ls))
#     print('noise: '+str(noise))
#     print('log like: '+str(like))
    
    return like

# get minus likelihood as a function of hyperparameters
def minus_like_hyp(hyp,X,y,kern_func):
    like = like_hyp(hyp,X,y,kern_func)
    minus_like = -like
    return minus_like

# make GP prediction with SE kernel
def GP_pred(X,K,L,alpha,sig,ls,xt,d,kern_func):
    # get kernel vector
    kv = get_kv(X,xt,d,sig,ls,kernel)
    
    # get predictive mean
    f = np.matmul(kv.transpose(),alpha)
    
    # get predictive variance
    v = sp.linalg.solve_triangular(L,kv,lower=True)
    self_kern = kern_func(xt, xt, d, d, sig, ls)
    var = self_kern - np.matmul(v.transpose(),v)
    
    return f, var

# convert list of triplets to column vector
def fc_conv(fcs):
    comp_len = len(fcs)*3
    comps = []
    for n in range(comp_len):
        fc_ind = int(math.floor(n/3))
        d = n%3
        comps.append(fcs[fc_ind][d])
    
    return np.array(comps).reshape(comp_len,1)

### Test 3-body kernel.

In [16]:
outfile ='/Users/jonpvandermause/Research/GP/Datasets/SiC_MD/sic_md.out'
Si_MD_Parsed = parse_qe_pwscf_md_output(outfile)

# set crystal structure
dim = 3
alat = 4.344404578
unit_cell = [[0.0, alat/2, alat/2], [alat/2, 0.0, alat/2], \
                    [alat/2, alat/2, 0.0]] # fcc primitive cell
unit_pos = [['Si',[0,0,0]],['Si',[alat/4, alat/4, alat/4]]]
brav_mat = np.array([[0.0, alat/2, alat/2], [alat/2, 0.0, alat/2], \
                    [alat/2, alat/2, 0.0]])*dim
brav_inv = np.linalg.inv(brav_mat)

# bravais vectors
vec1 = brav_mat[:,0].reshape(3,1)
vec2 = brav_mat[:,1].reshape(3,1)
vec3 = brav_mat[:,2].reshape(3,1)

# build force field from single snapshot
cutoff = 4.5
pos = Si_MD_Parsed[1]['positions']
typs = Si_MD_Parsed[1]['elements']
fcs = fc_conv(Si_MD_Parsed[2]['forces'])

In [17]:
atom = 0
test1 = get_env_struc(pos, typs, 0, brav_mat, brav_inv, vec1, vec2, vec3, cutoff)
test2 = get_env_struc(pos, typs, 1, brav_mat, brav_inv, vec1, vec2, vec3, cutoff)
x1 = test1
x2 = test2

In [10]:
envs = get_envs(pos, typs, brav_mat, brav_inv, vec1, vec2, vec3, cutoff)

In [13]:
sig=1
ls=1
noise=0.01
kern_func=two_body
time1=time.time()
test1 = get_K(envs,sig,ls,noise,kern_func)
time2 = time.time()
print(time2-time1)

7.373988151550293


In [11]:
pool_test = mp.Pool(processes=2)

Process ForkPoolWorker-1:
Process ForkPoolWorker-2:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/jonpvandermause/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/jonpvandermause/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/jonpvandermause/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jonpvandermause/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jonpvandermause/anaconda3/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/Users/jonpvandermause/anaconda3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/Users/jonpvandermause/anaconda3/lib/python3.6/multiprocessing/queues.py

In [28]:
sig = 0.02084984
ls = 0.33739727
noise = 0.00100585
kern_func=three_body
time1=time.time()
K,L,results = get_K_par(envs,sig,ls,noise,pool_test)
time2 = time.time()
print(time2-time1)

reconstructing...
0.5906219482421875
0.6120519638061523
0.6034982204437256
0.5994119644165039
0.5989737510681152
0.5961740016937256
0.610220193862915
0.6373817920684814
0.6722331047058105
0.6561939716339111
0.7042429447174072


KeyboardInterrupt: 

### Testing ground.

In [93]:
ri1=1
ri2=2
ri3=3
ci1=4
ci2=5
ci3=6
rj1=7
rj2=8
rj3=9
cj1=1
cj2=2
cj3=3

ri = np.array([[ri1,ri1,ri1],[ri2,ri2,ri2],[ri3,ri3,ri3]])
ci = np.array([[ci1,ci1,ci1],[ci2,ci2,ci2],[ci3,ci3,ci3]])
rj = np.array([[rj1,rj2,rj3],[rj1,rj2,rj3],[rj1,rj2,rj3]])
cj = np.array([[cj1,cj2,cj3],[cj1,cj2,cj3],[cj1,cj2,cj3]])
d=1
e=1
f=1
risum=1
rjsum=1


times = []
for n in range(1000000):
    time1 = time.time()
    test=k3dt1_mat(ri,ci,rj,cj,d,e,f,risum,rjsum)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)

print(test)
np.mean(times)

-1.49475096056726e+46


2.80629563331604e-05

In [183]:
ri1=3
ri2=1
ri3=4
ci1=1
ci2=5
ci3=9
rj1=2
rj2=6
rj3=5
cj1=1
cj2=2
cj3=3
d=1
e=1
f=1
risum=1
rjsum=1


times = []
for n in range(100000):
    time1 = time.time()
    test=k3dt1_fast(ri1,ri2,ri3,ci1,ci2,ci3,\
             rj1,rj2,rj3,cj1,cj2,cj3,\
             d,e,f)
    time2 = time.time()
    time_diff = time2 - time1
    times.append(time_diff)

print(test)
np.mean(times)

-0.03300086826354655


4.4444513320922855e-06