In [1]:
from glob import glob
import numpy as np

In [2]:
from rdkit import Chem

In [3]:
target_xyzs = sorted(glob("../targets/*.xyz"))

In [4]:
target_xyzs

['../targets/qm9_0.xyz', '../targets/vitc.xyz', '../targets/vitd.xyz']

In [5]:
def read_sdf(sdf):
    with open(sdf, "r") as f:
        txt = f.read().rstrip()
    return txt

In [6]:
def get_ncharges_coords(sdf):
    mol = Chem.MolFromMolBlock(sdf)
    mol = Chem.AddHs(mol)
    # rdkit molobj
    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    conf = mol.GetConformer()
    coords = np.asarray(conf.GetPositions())
    return ncharges, coords

In [7]:
target_files = sorted(glob("../targets/*.sdf"))
target_files

['../targets/qm9.sdf', '../targets/vitc.sdf', '../targets/vitd.sdf']

In [8]:
target_sdfs = [read_sdf(x) for x in target_files]

In [9]:
conf_data = [get_ncharges_coords(x) for x in target_sdfs]

In [10]:
ncharges_list, coords_list = zip(*conf_data)

In [11]:
target_labels = [t.split("/")[-1].split(".sdf")[0] for t in target_files]

In [12]:
target_labels

['qm9', 'vitc', 'vitd']

In [13]:
# need reps to load 

In [14]:
qm9_rep = np.load("../representations/X_SPAHM_QM9_target.npy")

In [15]:
vitc_rep = np.load("../representations/X_SPAHM_vitc_target.npy")

In [16]:
vitd_rep = np.load("../representations/X_SPAHM_vitd_target.npy")

In [17]:
target_reps = np.array([qm9_rep, vitc_rep, vitd_rep])

  """Entry point for launching an IPython kernel.


In [18]:
vitd_rep.shape

(107,)

In [19]:
np.savez("../representations/target_SPAHM_global_data.npz", 
         target_labels=target_labels, 
         target_reps=target_reps, 
         target_ncharges=ncharges_list,)

  return array(a, dtype, copy=False, order=order, subok=True)
