<a href="https://colab.research.google.com/github/porekhov/drug_design_2024/blob/main/rdkit_formats.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title **Install Conda Colab and rdkit**

%%capture
!pip install -q condacolab
import condacolab
condacolab.install()
!conda install -c conda-forge rdkit -y
!pip install py3Dmol

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from IPython.display import display
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions
from sklearn.decomposition import PCA

In [None]:
# RDKit docs: https://rdkit.readthedocs.io/en/latest/index.html

smiles = 'O=C(C)Oc1ccccc1C(=O)O'

# create an object molecule from the smiles string
mol = Chem.MolFromSmiles(smiles)

# create an object molecule from the mol file
#Chem.MolFromMolFile('molecule.mol')

# show the structural formula
display(Draw.MolToImage(mol))

mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol, randomSeed = 12345)
AllChem.UFFOptimizeMolecule(mol)

print(Chem.MolToMolBlock(mol))
# print mol block to a file
print(Chem.MolToMolBlock(mol), file=open('aspirin.mol','w+'))

In [None]:
import py3Dmol

view = py3Dmol.view(width=400, height=300)
view.addModel(open('aspirin.mol', 'r').read(),'mol')
view.setStyle({'model': -1}, {"stick": {}})
view.zoomTo()
view.setBackgroundColor('white')
view.show()

In [None]:
mols = Chem.SDMolSupplier('library.sdf')
print('Number of molecules in the SDF file:', len(mols))

for mol in mols:
    print(mol.GetNumAtoms())
    print(Chem.MolToSmiles(mol))
    display(Draw.MolToImage(mol))

In [None]:
ibu_smi = 'CC(C)Cc1ccc(cc1)[C@@H](C)C(=O)O'

ibu_mol = Chem.MolFromSmiles(ibu_smi)
ibu_mol = Chem.AddHs(ibu_mol)

# show the structural formula
display(Draw.MolToImage(ibu_mol))

In [None]:
# generate multiple (10) conformers for ibu_mol
conf_ids = AllChem.EmbedMultipleConfs(ibu_mol,
                                  clearConfs=True,
                                  numConfs=10,
                                  pruneRmsThresh=0.1)

# optimize structure of each conformer
for conf_id in conf_ids:
    AllChem.UFFOptimizeMolecule(ibu_mol, confId=conf_id)

# create a pattern (benzene ring) to align
patt = Chem.MolFromSmarts('c1ccccc1')
patt

In [None]:
match = ibu_mol.GetSubstructMatch(patt)
print(match)
DrawingOptions.includeAtomNumbers=True
ibu_mol

In [None]:
# align molecules using the selected pattern (benzene ring)
AllChem.AlignMolConformers(ibu_mol, atomIds=match)

# write an sdf file with all conformations
writer = Chem.SDWriter('ibu.sdf')

for conf_id in conf_ids:
    writer.write(ibu_mol, confId=conf_id)
writer.close()

In [None]:
# show conformers from sdf using py3Dmol
# open sdf, iterate over the conformations,
# shown each molecule with the different color

view = py3Dmol.view(width=400, height=300)
view.setBackgroundColor('white')
colors=('cyanCarbon','redCarbon','blueCarbon')

suppl = Chem.SDMolSupplier('ibu.sdf')

for idx, mol in enumerate(suppl):
    if mol:
        mol_block = Chem.MolToMolBlock(mol)
        view.addModel(mol_block, 'sdf', {'index': idx, 'keepH': True, 'doAssembly': True})
        view.setStyle({'model': idx}, {'stick': {'colorscheme':colors[idx%len(colors)]}})

view.zoomTo()
view.setBackgroundColor('white')
view.show()

In [None]:
# load the file with smiles and -log(IC50) of a series of kinase Aurora C inhibitors
!wget http://mscbio2025.csb.pitt.edu/files/er.smi

In [None]:
from rdkit.Chem import rdFingerprintGenerator
import numpy as np
import matplotlib.pyplot as plt
# covert smiles -> to fingerprints, perform PCA and plot
smiles_list = []
activities = []
fps = []

# create a generator for fingerprints
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2,fpSize=2048)

with open("er.smi", "r") as file:
    for line in file:
        parts = line.strip().split()
        if len(parts) >= 2:
            # smiles is the first string in line
            smiles = parts[0]
            # activity follows the smiles
            activity = float(parts[1])
            smiles_list.append(smiles)
            activities.append(activity)
            # create molecule object from smiles
            mol = Chem.MolFromSmiles(smiles)
            # create a fingerprint from a molecule
            fp = mfpgen.GetFingerprint(mol)
            fps.append(fp.ToList())

fps = np.array(fps)
print(fps.shape)
fps

In [None]:
pca = PCA(n_components = 2)
pca_prj = pca.fit_transform(fps)

sc = plt.scatter(pca_prj[:, 0], pca_prj[:, 1], c = activities, cmap='viridis')
plt.colorbar(sc, label='Activity')
plt.gca().set_aspect('equal', adjustable='box')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.title('PCA of Morgan Fingerprints Colored by Activity')
plt.tight_layout()
plt.show()