In [15]:
import qml 

In [16]:
from glob import glob
import numpy as np

In [17]:
from rdkit import Chem

In [18]:
target_xyzs = sorted(glob("../targets/*.xyz"))

In [19]:
def read_sdf(sdf):
    with open(sdf, "r") as f:
        txt = f.read().rstrip()
    return txt

In [20]:
def get_ncharges_coords(sdf):
    mol = Chem.MolFromMolBlock(sdf)
   #mol = Chem.AddHs(mol)
    # rdkit molobj
    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    conf = mol.GetConformer()
    coords = np.asarray(conf.GetPositions())
    return ncharges, coords

In [21]:
target_files = sorted(glob("../targets/*.sdf"))
target_files

['../targets/qm9.sdf', '../targets/vitc.sdf', '../targets/vitd.sdf']

In [22]:
target_sdfs = [read_sdf(x) for x in target_files]

In [23]:
conf_data = [get_ncharges_coords(x) for x in target_sdfs]

In [24]:
ncharges_list, coords_list = zip(*conf_data)

In [25]:
# mbtypes separate to each target

In [26]:
target_reps = np.array(
[np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], 
                                    mbtypes=qml.representations.get_slatm_mbtypes([[6,7,8,16]]),
                                            local=False))
for i in range(len(ncharges_list))])

In [27]:
target_reps[2].shape

(3110,)

In [28]:
target_labels = [t.split("/")[-1].split(".xyz")[0] for t in target_sdfs]

In [29]:
np.savez("../representations/target_SLATM_global_data.npz", 
         target_labels=target_labels, 
         target_reps=target_reps, 
         target_ncharges=ncharges_list,)