In [1]:
import qml 

In [2]:
from glob import glob
import numpy as np

In [3]:
from rdkit import Chem

In [4]:
def read_sdf(sdf):
    with open(sdf, "r") as f:
        txt = f.read().rstrip()
    return txt

In [5]:
def get_ncharges_coords(sdf):
    mol = Chem.MolFromMolBlock(sdf)
    # rdkit molobj
    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    conf = mol.GetConformer()
    coords = np.asarray(conf.GetPositions())
    return ncharges, coords

In [6]:
target_sdfs = sorted(glob("../targets/*.sdf"))
target_sdfs

['../targets/qm9.sdf', '../targets/vitc.sdf', '../targets/vitd.sdf']

In [7]:
qm9_amons_files = sorted(glob("../amons-qm9/*.sdf"))

In [8]:
qm9_amons_sdfs = [read_sdf(x) for x in qm9_amons_files]

In [9]:
conf_data = [get_ncharges_coords(x) for x in qm9_amons_sdfs]

In [10]:
ncharges_list, coords_list = zip(*conf_data)

In [11]:
qm9_ncharges = ncharges_list

In [19]:
elements = [x for x in np.unique(np.concatenate(qm9_ncharges))]

In [20]:
elements

[6, 7, 8]

In [21]:
qm9_reps = [np.sum(np.array(qml.representations.generate_fchl_acsf(
                                                ncharges_list[i],
                                                coords_list[i],
                                                elements=elements)),axis=0) for i in 
       range(len(ncharges_list))]

In [22]:
qm9_reps = np.array(qm9_reps)

In [23]:
qm9_reps[0].shape

(312,)

In [24]:
qm9_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in qm9_amons_files]

In [25]:
vitc_amons_files = sorted(glob("../amons-vitc/*.sdf"))

In [26]:
vitc_amons_sdfs = [read_sdf(x) for x in vitc_amons_files]

In [27]:
conf_data = [get_ncharges_coords(x) for x in vitc_amons_sdfs]

In [28]:
ncharges_list, coords_list = zip(*conf_data)

In [34]:
# np save 

In [30]:
np.savez("../representations/amons_FCHL_global_data.npz", 
         qm9_amons_labels=qm9_amons_labels,
         qm9_amons_ncharges=qm9_ncharges,
         qm9_amons_reps=qm9_reps)