In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from openbabel import openbabel
import os

In [2]:
df = pd.read_csv("aganitha_data/bbio_mols.csv")

display(df.head())
print(df.shape)

Unnamed: 0,Name,SMILES
0,AMG650,CC1=CC(=NC(=N1)N2CCC(CC2)(F)F)NC(=O)C3=C(C=C(C...
1,BB_35,c1(c(ccc(c1)NS(CCO)(=O)=O)C(Nc1cccc(c1)N1CCC(C...
2,BB_40,c12ccc(cc1C1(CN2C(c2cc(ccc2)S(NC(C)(C)C)(=O)=O...
3,BB_36,c1(c(ncc(n1)NC(CO)(C)C)C(Nc1cccc(n1)S(NC(C)(C)...
4,BB_37,c12c(cc(cc1N1CCC3(CC1)CC3)NS(CCO)(=O)=O)ncnc2N...


(23, 2)


In [3]:
def convert_smiles_to_pdbqt(smiles, output_pdbqt_file):
    
    # Convert SMILES to 3D structure
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol, AllChem.ETKDG())

    # Convert RDKit Mol to PDB format
    pdb_file = output_pdbqt_file.replace('.pdbqt', '.pdb')
    with open(pdb_file, 'w') as f:
        f.write(Chem.MolToPDBBlock(mol))

    # Convert PDB to PDBQT using Open Babel
    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats("pdb", "pdbqt")
    ob_mol = openbabel.OBMol()
    if not obConversion.ReadFile(ob_mol, pdb_file):
        raise IOError(f"Could not read file: {pdb_file}")

    obConversion.WriteFile(ob_mol, output_pdbqt_file)
    os.remove(pdb_file)  

In [4]:
output_folder = "aganitha_data/bbio_mols_pdbqt"
os.makedirs(output_folder, exist_ok=True)
for index, row in df.iterrows():
    smiles = row["SMILES"]
    compound_name = row["Name"]
    output_pdbqt_file = os.path.join(output_folder, f"{compound_name}.pdbqt")
    convert_smiles_to_pdbqt(smiles, output_pdbqt_file)

print(f"\033[1m\033[34mSMILES to PDBQT Conversion Completed and saved in folder: \033[91m{output_folder}\033[0m")

[1m[34mSMILES to PDBQT Conversion Completed and saved in folder: [91maganitha_data/bbio_mols_pdbqt[0m
