In [1]:
#Author: samusmaster210
#This script generates .xyz files optimized with RDKit's UFF from SMILES strings

In [2]:
import os
import csv
import glob

from rdkit import Chem
from rdkit.Chem import MolFromSmiles, MolToSmiles, rdmolops, rdDistGeom, AllChem 

In [3]:
#Import the SMILES database to run here
receptor = 'GCR'
smiles_database = open(f"{receptor}_V_SMILES.smi")
molecules = smiles_database.read()
molecules = molecules.split()
#Transforms the list into a list of RdKit molecule objects
mol_list = [Chem.MolFromSmiles(smiles) for smiles in molecules]
length=len(mol_list) 
#print(length)

In [4]:
#Checks the validity of the RDKit molecule object along with the generated conformer and opt. structure
max_iterations = 10000

def try_conformer_generation(molecule):
    if AllChem.EmbedMolecule(molecule, AllChem.ETKDGv3()) != -1:
        return True
    if AllChem.EmbedMolecule(molecule, AllChem.ETKDG()) != -1:
        return True
    return False


for idx, molecule in enumerate(mol_list, start = 0):
    if molecule is None:
        print(f"Molecule at index {idx}: {molecule} is invalid.")
        continue

    Chem.SanitizeMol(molecule)
    molecule_with_hs = Chem.AddHs(molecule)

    if not try_conformer_generation(molecule_with_hs):
        print(f"Failed to generate conformer for molecule at index {idx}.")
        continue

    if AllChem.UFFOptimizeMolecule(molecule_with_hs, maxIters=max_iterations) != 0:
        print(f"UFF optimization failed for molecule at index {idx}.")
        continue

    # Padding filename
    pad = '_' + '0' * (4 - len(str(idx)))
    
    with open(receptor + pad + str(idx) + ".xyz", "w") as a_file:
        a_file.write(str(molecule_with_hs.GetNumAtoms()))
        a_file.write("\n\n")
        
        # Ensure you use molecule_with_hs (the optimized version with Hs)
        for atom_idx, atom in enumerate(molecule_with_hs.GetAtoms()):
            positions = molecule_with_hs.GetConformer().GetAtomPosition(atom_idx)
            x, y, z = '{:.6f}'.format(positions.x), '{:.6f}'.format(positions.y), '{:.6f}'.format(positions.z)
            a_file.write(f'{atom.GetSymbol()} {x:>13} {y:>13} {z:>13}\n')

        a_file.write("\n")