In [1]:
import qml 

In [2]:
from glob import glob
import numpy as np

In [3]:
from rdkit import Chem

In [4]:
def read_sdf(sdf):
    with open(sdf, "r") as f:
        txt = f.read().rstrip()
    return txt

In [5]:
def get_ncharges_coords(sdf):
    mol = Chem.MolFromMolBlock(sdf)
   #mol = Chem.AddHs(mol)
    # rdkit molobj
    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    print(ncharges)
    conf = mol.GetConformer()
    coords = np.asarray(conf.GetPositions())
    return ncharges, coords

In [6]:
target_sdfs = sorted(glob("../targets/*.sdf"))
target_sdfs

['../targets/qm9.sdf', '../targets/vitc.sdf', '../targets/vitd.sdf']

In [7]:
qm9_amons_files = sorted(glob("../amons-qm9/*.sdf"))

In [8]:
qm9_amons_sdfs = [read_sdf(x) for x in qm9_amons_files]

In [9]:
conf_data = [get_ncharges_coords(x) for x in qm9_amons_sdfs]

[8]
[6, 6]
[6, 7]
[8, 6]
[6, 8, 8]
[6, 8, 7]
[6, 6, 7, 7]
[8, 6, 6, 7]
[8, 6, 6, 6]
[6, 7, 6, 8]
[6, 7, 6, 8, 8]
[6, 8, 8, 7, 6]
[8, 6, 6, 7, 7, 6]
[8, 6, 6, 7, 6, 8]
[8, 6, 6, 7, 6, 8, 8]
[6, 7, 6, 8, 8, 7, 6]


In [10]:
ncharges_list, coords_list = zip(*conf_data)

In [11]:
qm9_ncharges = ncharges_list

In [12]:
mbtypes = qml.representations.get_slatm_mbtypes([[6,7,8,16]])

In [13]:
mbtypes

[[8],
 [16],
 [6],
 [7],
 [8, 8],
 [16, 16],
 [6, 6],
 [7, 7],
 (8, 16),
 (8, 6),
 (8, 7),
 (16, 6),
 (16, 7),
 (6, 7),
 [8, 16, 6],
 [8, 6, 16],
 [16, 8, 6],
 [8, 16, 7],
 [8, 7, 16],
 [16, 8, 7],
 [8, 6, 7],
 [8, 7, 6],
 [6, 8, 7],
 [16, 6, 7],
 [16, 7, 6],
 [6, 16, 7]]

In [14]:
qm9_reps = [np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], mbtypes,
                                              local=True)) for i in 
       range(len(ncharges_list))]

In [15]:
qm9_reps = np.array(qm9_reps)

  """Entry point for launching an IPython kernel.


In [16]:
qm9_reps[0].shape

(1, 3110)

In [17]:
qm9_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in qm9_amons_files]

In [26]:
vitc_amons_files = sorted(glob("../amons-vitc/*.sdf"))

In [28]:
vitc_amons_sdfs = [read_sdf(x) for x in vitc_amons_files]

In [29]:
conf_data = [get_ncharges_coords(x) for x in vitc_amons_sdfs]

[6]
[8]
[6, 6]
[8, 6]
[6, 6]
[8, 6]
[8, 6, 8]
[6, 6, 8]
[6, 6, 6]
[6, 6, 6]
[6, 6, 8]
[6, 6, 8]
[6, 8, 6, 8]
[6, 8, 6, 8]
[8, 6, 6, 8]
[8, 6, 6, 6]
[6, 8, 6, 6]
[6, 6, 8, 6]
[6, 8, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 8]
[8, 6, 6, 6]
[8, 6, 8, 6]
[8, 6, 6, 8, 6]
[8, 6, 6, 6, 8]
[8, 6, 6, 6, 8]
[6, 6, 8, 6, 6]
[6, 6, 8, 6, 8]
[6, 6, 8, 6, 8]
[8, 6, 6, 8, 6]
[6, 6, 6, 8, 6]
[6, 6, 6, 6, 8]
[6, 6, 6, 6, 6]
[8, 6, 8, 6, 6]
[8, 6, 6, 8, 6, 8]
[8, 6, 6, 8, 6, 8]
[6, 6, 8, 6, 6, 8]
[8, 6, 8, 6, 6, 8]
[8, 6, 8, 6, 6, 6]
[6, 6, 8, 6, 8, 6]
[6, 6, 6, 8, 6, 6]
[6, 6, 8, 6, 6, 6]
[6, 6, 6, 8, 6, 8]
[6, 6, 6, 6, 8, 6]
[6, 6, 6, 6, 6, 8]
[8, 6, 8, 6, 6, 6]
[6, 6, 6, 8, 6, 6, 8]
[6, 6, 8, 6, 8, 6, 8]
[8, 6, 8, 6, 6, 6, 8]
[8, 6, 8, 6, 6, 8, 6]
[8, 6, 8, 6, 6, 6, 8]
[6, 6, 6, 8, 6, 8, 6]
[6, 6, 8, 6, 8, 6, 6]
[6, 6, 8, 6, 6, 8, 6]
[6, 6, 8, 6, 6, 6, 8]
[8, 6, 8, 6, 6, 8, 6]
[8, 6, 8, 6, 6, 6, 6]
[6, 6, 6, 6, 8, 6, 8]
[8, 6, 8, 6, 6, 8, 6, 8]
[6, 6, 8, 6, 8, 6, 6, 8]
[6, 6, 6, 8, 6, 8, 6, 8]
[8, 6, 8, 6, 6, 8,

In [30]:
conf_data

[([6], array([[-0.299 , -0.3443,  0.546 ]])),
 ([8], array([[ 0.7028, -1.3555,  0.379 ]])),
 ([6, 6],
  array([[ 0.4138,  0.954 ,  0.2268],
         [ 1.6926,  0.6749, -0.0402]])),
 ([8, 6],
  array([[ 2.9115, -1.393 , -0.169 ],
         [ 1.8813, -0.7636,  0.0364]])),
 ([6, 6],
  array([[-0.3101, -0.3482,  0.5375],
         [-1.471 , -0.7497, -0.3442]])),
 ([8, 6],
  array([[ 0.6964, -1.3489,  0.3801],
         [-0.293 , -0.3498,  0.5452]])),
 ([8, 6, 8],
  array([[ 2.9079, -1.3866, -0.1686],
         [ 1.8766, -0.7741,  0.0384],
         [ 0.7073, -1.3489,  0.3773]])),
 ([6, 6, 8],
  array([[ 0.4355,  0.9555,  0.2216],
         [ 1.6916,  0.5944, -0.0319],
         [ 2.6527,  1.5114, -0.3449]])),
 ([6, 6, 6],
  array([[ 0.4893,  0.8129,  0.2257],
         [ 1.7912,  0.7067, -0.0669],
         [-0.3904, -0.3445,  0.5673]])),
 ([6, 6, 6],
  array([[-0.3254, -0.3427,  0.5382],
         [-1.4748, -0.7639, -0.3618],
         [-2.7618, -0.0404, -0.0035]])),
 ([6, 6, 8],
  array([[-1.4872, 

In [31]:
ncharges_list, coords_list = zip(*conf_data)

In [32]:
vitc_ncharges = ncharges_list

In [33]:
vitc_reps = [np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], 
                                                         mbtypes, local=True)) for i in 
            range(len(ncharges_list))]

In [34]:
vitc_reps = np.array(vitc_reps)

  """Entry point for launching an IPython kernel.


In [35]:
vitc_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in vitc_amons_files]

In [36]:
vitd_amons_files = sorted(glob("../amons-vitd/*.sdf"))

In [37]:
vitd_amons_sdfs = [read_sdf(x) for x in vitd_amons_files]

In [38]:
conf_data = [get_ncharges_coords(x) for x in vitd_amons_sdfs]

[6]
[8]
[6, 6]
[6, 6]
[8, 6]
[6, 6, 6]
[6, 6, 6]
[8, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[8, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[6, 6, 6, 6]
[8, 6, 6, 6]
[6, 6, 6, 6, 6]
[8, 6, 6, 6, 6]
[8, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6, 6, 6]
[6, 6, 6,

In [39]:
ncharges_list, coords_list = zip(*conf_data)

In [40]:
vitd_ncharges = ncharges_list

In [41]:
vitd_reps = [np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], 
                                                         mbtypes, local=True)) for i 
            in range(len(ncharges_list))]

In [42]:
vitd_reps = np.array(vitd_reps)

  """Entry point for launching an IPython kernel.


In [43]:
vitd_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in vitd_amons_files]

In [34]:
# np save 

In [44]:
np.savez("../representations/amons_SLATM_data.npz", 
         vitd_amons_labels=vitd_amons_labels,
         vitc_amons_labels=vitc_amons_labels,
         qm9_amons_labels=qm9_amons_labels,
         vitd_amons_ncharges=vitd_ncharges,
         vitc_amons_ncharges=vitc_ncharges,
         qm9_amons_ncharges=qm9_ncharges,
         vitd_amons_reps=vitd_reps,
         vitc_amons_reps=vitc_reps,
         qm9_amons_reps=qm9_reps)

  return array(a, dtype, copy=False, order=order, subok=True)


In [45]:
vitd_reps[0].shape

(1, 3110)