In [3]:
import numpy as np
import numpy.linalg as lin

In [6]:
def getZ(label:str) -> int:    
    elements="H   He\
        Li  Be  B   C   N   O   F   Ne\
        Na  Mg  Al  Si  P   S   Cl  Ar\
        K   Ca  Sc  Ti  V   Cr  Mn  Fe  Co  Ni  Cu  Zn  Ga  Ge  As  Se  Br  Kr\
        Rb  Sr  Y   Zr  Nb  Mo  Tc  Ru  Rh  Pd  Ag  Cd  In  Sn  Sb  Te  I   Xe\
        Cs  Ba  La  Ce  Pr  Nd  Pm  Sm  Eu  Gd  Tb  Dy  Ho  Er  Tm  Yb\
        Lu  Hf  Ta  W   Re  Os  Ir  Pt  Au  Hg  Tl  Pb  Bi  Po  At  Rn\
        Fr  Ra  Ac  Th  Pa  U".split()    
    
    return elements.index(label)+1

In [4]:
def importQM7(structure_file:str, energy_file:str):
    """
    Return: Z, R, E\n
    Z: list of 1D-arrays containing atomic identities\n
    R: list of 2D-arrays containing atomic positions\n
    E: 1D-array containing atomization energy\n
    """
    structures = open(structure_file,  'r').readlines()

    Z = []
    R = []
    E = []
    n_max = 0

    for line in range(len(structures)):
        x = structures[line].split()

        #Check for start of molecule structure data:
        if len(x) == 1:
            n_atoms = int(x[0])
            if n_atoms > n_max: n_max = n_atoms

            Zs   = np.zeros(n_atoms)
            xyzs = np.zeros((n_atoms, 3))

            #Go through every atom in the molecule:
            atom_index = 0
            for j in range(line+2, line+2+n_atoms):
                Zs  [atom_index] = getZ(structures[j].split()[0])
                xyzs[atom_index] = np.array([float(val) for val in structures[j].split()[1:]])

                atom_index += 1
            
            Z.append(Zs)
            R.append(xyzs)
        
    file = open(energy_file,  'r').readlines()
    for line in range(len(file)):
        E.append(float(file[line].split()[0]))
    
    return Z, R, E

In [10]:
def coulomb_eigenvalues(Z, R, n_max):
    n_mols = len(Z)
    n_max  = n_max
    #Generate Descriptors, eigenvalues of Coulomb Matrix M
    coulomb_eVs = np.zeros((n_mols, n_max))
    
    for k in range(n_mols):
        n_atoms = len(Z[k])

        M = np.zeros((n_atoms, n_atoms))

        for i in range(n_atoms):
            for j in range(n_atoms):
                if i == j:
                    M[i][j] = 0.5 * (Z[k][i])**2.4
                else:
                    M[i][j] = (Z[k][i]*Z[k][j]) / lin.norm(R[k][i] - R[k][j])**2

        eigenValues = lin.eigvals(M)
        sorted_eVal = np.array(sorted(eigenValues, key=abs, reverse=True))

        #Append 0s to match molecule with largest number of eigenvalues
        if n_atoms == n_max:
            coulomb_eVs[k] = sorted_eVal
        else:
            coulomb_eVs[k] = np.concatenate((sorted_eVal, [0]*(n_max-n_atoms)))

    np.savetxt(fname="ev.txt", X=coulomb_eVs, delimiter=" ", newline="\n")
    return coulomb_eVs

In [11]:
Z, R, E = importQM7(structure_file = "qm7.txt",
                    energy_file    = "PBE0.txt")

n_max = len(max(Z, key=len))

e = coulomb_eigenvalues(Z, R, n_max)