In [None]:
!pip install rdkit

Collecting rdkit
  Downloading rdkit-2024.9.5-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Downloading rdkit-2024.9.5-cp311-cp311-manylinux_2_28_x86_64.whl (34.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.3/34.3 MB[0m [31m51.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit
Successfully installed rdkit-2024.9.5


In [None]:



import os
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem



#load stored geometries
geometries_numpy = np.load("directory!", allow_pickle=True)

#define save directory and output SDF file
sdf_dir = "directory!"
os.makedirs(sdf_dir, exist_ok=True)
output_sdf = os.path.join(sdf_dir, "acetone_water_all.sdf")

ACETONE_WATER_BONDS = [
    (1, 2, Chem.BondType.SINGLE),  # C1 - C2
    (1, 3, Chem.BondType.SINGLE),  # C1 - C3
    (1, 0, Chem.BondType.DOUBLE),  # C1 = O1
    (2, 4, Chem.BondType.SINGLE),  # C2 - H1
    (2, 5, Chem.BondType.SINGLE),  # C2 - H2
    (2, 6, Chem.BondType.SINGLE),  # C2 - H3
    (3, 7, Chem.BondType.SINGLE),  # C3 - H4
    (3, 8, Chem.BondType.SINGLE),  # C3 - H5
    (3, 9, Chem.BondType.SINGLE),  # C3 - H6
    (10, 11, Chem.BondType.SINGLE), # O2 - H7 (Water)
    (10, 12, Chem.BondType.SINGLE), # O2 - H8 (Water)
]


#convert numpy to RDKit molecule
def numpy_to_mol(geometry, mol_name):

    #extract atom types and coordinates
    atoms = geometry[:, 0].astype(str)
    coords = geometry[:, 1:].astype(float)

    #create RDKit molecule
    mol = Chem.RWMol()

    #add atoms
    for atom in atoms:
        mol.AddAtom(Chem.Atom(atom))

    #add fixed bonds
    for (i, j, bond_type) in ACETONE_WATER_BONDS:
        mol.AddBond(i, j, bond_type)

    #create conformer and set coordinates
    conf = Chem.Conformer(len(atoms))
    for i, (x, y, z) in enumerate(coords):
        conf.SetAtomPosition(i, (x, y, z))

    mol.AddConformer(conf)

    #set molecule name
    mol.SetProp("_Name", mol_name)

    return mol

#open SDF writer
writer = Chem.SDWriter(output_sdf)

#process all geometries and write them to a single SDF file
for idx, geometry in enumerate(geometries_numpy):
    mol = numpy_to_mol(geometry, mol_name=f"Complex_{idx+1}")  # Create RDKit molecule
    writer.write(mol)

#close writer
writer.close()

print(f"saved all {len(geometries_numpy)} geometries into {output_sdf}")


Mounted at /content/drive
✅ Saved all 499 geometries into /content/drive/My Drive/acetone_water_all.sdf
