In [1]:
import qml 

In [2]:
from glob import glob
import numpy as np

In [3]:
database_xyzs = sorted(glob("../qm7/*.xyz"))

In [4]:
database_mols = [qml.Compound(x) for x in database_xyzs]

In [5]:
def cutoff_func(R_ij, central_cutoff=4.8, central_decay=1):
    if R_ij <= (central_cutoff - central_decay):
        func = 1.
    elif ((central_cutoff - central_decay) < R_ij) and (R_ij <= (central_cutoff + central_decay)):
        func = 0.5 * (1. + np.cos((np.pi * R_ij - central_cutoff + central_decay)/central_decay))
    else:
        func = 0.
    return func

In [6]:
def get_atomic_CM(mol, max_natoms=28, central_cutoff=4.8, central_decay=1):
    ncharges, coords = get_ncharges_coords(mol)
    size = int((max_natoms + 1)*max_natoms / 2)
    rep = np.zeros((len(ncharges), size))
    
    # central atom loop
    for k in range(len(ncharges)):
        M = np.zeros((len(ncharges), len(ncharges)))
        for i in range(len(ncharges)):
            R_ik = np.linalg.norm(coords[i]-coords[k])
           # print('R_ik', R_ik)
            f_ik = cutoff_func(R_ik, central_cutoff=central_cutoff,
                              central_decay=central_decay)
            for j in range(len(ncharges)):
                if i <=j:
                    if i == j:
                        M[i,j] = 0.5 * ncharges[i]**2.4 * f_ik**2
                        M[j,i] = M[i,j]

                    else:
                        R_jk = np.linalg.norm(coords[j]-coords[k])
                      #  print('R_jk', R_jk)
                        f_jk = cutoff_func(R_jk, central_cutoff=central_cutoff,
                                          central_decay=central_decay)
                        R_ij = np.linalg.norm(coords[i]-coords[j])
                      #  print('R_ij', R_ij)
                        f_ij = cutoff_func(R_ij, central_cutoff=central_cutoff,
                                          central_decay=central_decay)
                        M[i,j] = (ncharges[i]*ncharges[j]/R_ij)*f_ik*f_jk*f_ij
                        M[j,i] = M[i,j]


        # concat upper triangular and diagonal
        upper_triang = M[np.triu_indices(len(M))]
        s_upper_triang = np.sort(upper_triang)[::-1]
        
        # pad to full size
        n_zeros = size - len(s_upper_triang)
        zeros = np.zeros(n_zeros)
        rep[k] = np.concatenate((s_upper_triang, zeros))

    return ncharges, rep

In [7]:
def get_ncharges_coords(mol):
    ncharges = mol.nuclear_charges
    heavy_ints = [i for i,x in enumerate(ncharges) if x!=1]
    heavy_ncharges = [ncharges[i] for i in heavy_ints]
    coords = mol.coordinates
    heavy_coords = [coords[i] for i in heavy_ints]
    return heavy_ncharges, heavy_coords

In [8]:
# pad size is based on largest target 

In [9]:
database_ncharges = []
database_reps = []
for mol in database_mols:
    ncharge, rep = get_atomic_CM(mol)
    database_ncharges.append(ncharge)
    database_reps.append(rep)

In [10]:
database_reps = np.array(database_reps)

  database_reps = np.array(database_reps)


In [11]:
database_labels = [t.split("/")[-1].split(".xyz")[0] for t in database_xyzs]

In [12]:
database_labels = np.array(database_labels)

In [13]:
database_ncharges = np.array(database_ncharges)

  database_ncharges = np.array(database_ncharges)


In [14]:
np.savez("../representations/database_aCM.npz", 
         database_labels=database_labels, 
         database_reps=database_reps,
        database_ncharges=database_ncharges)