In [1]:
import qml 

In [2]:
from glob import glob
import numpy as np

In [3]:
target_xyzs = sorted(glob("targets/*.xyz"))

In [4]:
database_xyzs = sorted(glob("qm7/*.xyz"))

In [5]:
target_mols = [qml.Compound(x) for x in target_xyzs]

In [6]:
database_mols = [qml.Compound(x) for x in database_xyzs]

In [7]:
def get_CM(mol):
    ncharges = mol.nuclear_charges
    coords = mol.coordinates 
    CM = np.zeros((len(coords), len(coords)))
    for i in range(len(coords)):
        for j in range(len(coords)):
            if i==j:
                CM[i,j] = 0.5 * ncharges[i]**2.4
            else:
                CM[i,j] = ncharges[i] * ncharges[j] / np.linalg.norm(coords[j] - coords[i])
                
    return ncharges, CM

In [8]:
mol = target_mols[0]

In [9]:
ncharges, CM = get_CM(mol)

In [10]:
target_ncharges = []
target_CMs = []
for mol in target_mols: 
    ncharge, CM = get_CM(mol)
    target_ncharges.append(ncharge)
    target_CMs.append(CM)

In [11]:
database_ncharges = []
database_CMs = []
for mol in database_mols:
    ncharge, CM = get_CM(mol)
    database_ncharges.append(ncharge)
    database_CMs.append(CM)

In [12]:
target_CMs = np.array(target_CMs)

  """Entry point for launching an IPython kernel.


In [13]:
database_CMs = np.array(database_CMs)

  """Entry point for launching an IPython kernel.


In [14]:
target_labels = [t.split("/")[-1].split(".xyz")[0] for t in target_xyzs]

In [15]:
database_labels = [t.split("/")[-1].split(".xyz")[0] for t in database_xyzs]

In [16]:
target_labels = np.array(target_labels)

In [17]:
database_labels = np.array(database_labels)

In [18]:
target_ncharges = np.array(target_ncharges)

  """Entry point for launching an IPython kernel.


In [19]:
database_ncharges = np.array(database_ncharges)

  """Entry point for launching an IPython kernel.


In [20]:
np.savez("data.npz", 
         target_labels=target_labels, 
         target_CMs=target_CMs, 
         target_ncharges=target_ncharges,
         database_labels=database_labels, 
         database_CMs=database_CMs,
        database_ncharges=database_ncharges)

In [21]:
x = np.load("data.npz", allow_pickle=True)

In [22]:
x.files

['target_labels',
 'target_CMs',
 'target_ncharges',
 'database_labels',
 'database_CMs',
 'database_ncharges']