In [1]:
import qml 

In [2]:
from glob import glob
import numpy as np

In [3]:
from rdkit import Chem

In [4]:
target_xyzs = sorted(glob("../targets/*.xyz"))

In [5]:
def read_sdf(sdf):
    with open(sdf, "r") as f:
        txt = f.read().rstrip()
    return txt

In [6]:
def get_ncharges_coords(sdf):
    mol = Chem.MolFromMolBlock(sdf)
    # rdkit molobj
    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    conf = mol.GetConformer()
    coords = np.asarray(conf.GetPositions())
    return ncharges, coords

In [7]:
target_files = sorted(glob("../targets/*.sdf"))
target_files

['../targets/qm9.sdf', '../targets/vitc.sdf', '../targets/vitd.sdf']

In [8]:
target_sdfs = [read_sdf(x) for x in target_files]

In [9]:
conf_data = [get_ncharges_coords(x) for x in target_sdfs]

In [10]:
ncharges_list, coords_list = zip(*conf_data)

In [11]:
sizes = [len(x) for x in ncharges_list]
sizes

[9, 12, 28]

In [12]:
elements_list = [np.unique(x) for x in ncharges_list]
elements_list

[array([6, 7, 8]), array([6, 8]), array([6, 8])]

In [13]:
target_reps = np.array(
[np.sum(np.array(qml.representations.generate_fchl_acsf(ncharges_list[i],
                                                 coords_list[i],
                                                 elements=[1,6,7,8,16],
                                                       rcut=4.8)), axis=0)
for i in range(len(ncharges_list))])

In [14]:
target_reps[0].shape

(720,)

In [16]:
target_labels = [t.split("/")[-1].split(".xyz")[0] for t in target_sdfs]

In [17]:
np.savez("../representations/target_FCHL_4.8_global_data.npz", 
         target_labels=target_labels, 
         target_reps=target_reps, 
         target_ncharges=ncharges_list,)

  return array(a, dtype, copy=False, order=order, subok=True)
