In [1]:
import qml 

In [2]:
from glob import glob
import numpy as np

In [3]:
from rdkit import Chem

In [4]:
def read_sdf(sdf):
    with open(sdf, "r") as f:
        txt = f.read().rstrip()
    return txt

In [5]:
def get_ncharges_coords(sdf):
    mol = Chem.MolFromMolBlock(sdf)
    # rdkit molobj
    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    conf = mol.GetConformer()
    coords = np.asarray(conf.GetPositions())
    return ncharges, coords

In [6]:
target_sdfs = sorted(glob("../targets/*.sdf"), reverse=True)
target_sdfs

['../targets/qm9.sdf', '../targets/penicillin.sdf']

In [7]:
qm9_amons_files = sorted(glob("../amons-qm9/*.sdf"))

In [8]:
qm9_amons_sdfs = [read_sdf(x) for x in qm9_amons_files]

In [9]:
conf_data = [get_ncharges_coords(x) for x in qm9_amons_sdfs]

In [10]:
ncharges_list, coords_list = zip(*conf_data)

In [11]:
ncharges_list

([8],
 [6, 6],
 [6, 7],
 [8, 6],
 [6, 8, 8],
 [6, 8, 7],
 [6, 6, 7, 7],
 [8, 6, 6, 7],
 [8, 6, 6, 6],
 [6, 7, 6, 8],
 [6, 7, 6, 8, 8],
 [6, 8, 8, 7, 6],
 [8, 6, 6, 7, 7, 6],
 [8, 6, 6, 7, 6, 8],
 [8, 6, 6, 7, 6, 8, 8],
 [6, 7, 6, 8, 8, 7, 6])

In [12]:
qm9_ncharges = [8, 6, 6, 7, 6, 8, 8, 7, 6]

In [13]:
mbtypes = qml.representations.get_slatm_mbtypes([qm9_ncharges])

In [14]:
qm9_reps = [np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], mbtypes,
                                              local=False)) for i in 
       range(len(ncharges_list))]

In [15]:
qm9_reps = np.array(qm9_reps)

In [16]:
qm9_reps[0].shape

(3121,)

In [17]:
qm9_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in qm9_amons_files]

In [18]:
penicillin_amons_files = sorted(glob("../amons-penicillin/*.sdf"))

In [19]:
penicillin_amons_sdfs = [read_sdf(x) for x in penicillin_amons_files]

In [20]:
p_conf_data = [get_ncharges_coords(x) for x in penicillin_amons_sdfs]

In [21]:
p_ncharges_list, p_coords_list = zip(*p_conf_data)

In [22]:
p_ncharges = [16, 8, 8, 8, 8, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]

In [23]:
p_ncharges

[16, 8, 8, 8, 8, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]

In [24]:
mbtypes_p = qml.representations.get_slatm_mbtypes([p_ncharges])

In [25]:
p_reps = [np.array(qml.representations.generate_slatm(p_coords_list[i], p_ncharges_list[i], mbtypes_p,
                                              local=False)) for i in 
       range(len(p_ncharges_list))]

In [26]:
p_reps = np.array(p_reps)

In [27]:
p_reps[0].shape

(5670,)

In [28]:
p_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in penicillin_amons_files]

In [29]:
# np save 

In [31]:
np.savez("../representations/amons_SLATM_global_data.npz", 
         qm9_amons_labels=qm9_amons_labels,
         qm9_amons_ncharges=ncharges_list,
         qm9_amons_reps=qm9_reps,
         penicillin_amons_labels=p_amons_labels,
         penicillin_amons_ncharges=p_ncharges_list,
         penicillin_amons_reps=p_reps
        )

  return array(a, dtype, copy=False, order=order, subok=True)


In [30]:
ncharges_list

([8],
 [6, 6],
 [6, 7],
 [8, 6],
 [6, 8, 8],
 [6, 8, 7],
 [6, 6, 7, 7],
 [8, 6, 6, 7],
 [8, 6, 6, 6],
 [6, 7, 6, 8],
 [6, 7, 6, 8, 8],
 [6, 8, 8, 7, 6],
 [8, 6, 6, 7, 7, 6],
 [8, 6, 6, 7, 6, 8],
 [8, 6, 6, 7, 6, 8, 8],
 [6, 7, 6, 8, 8, 7, 6])