# Aligning molecules in RDKit

In [1]:
import os, sys
import glob
import py3Dmol
import numpy as np

# functions used in this notebook are now in scripts/utils.py
from scripts.utils import *

2023.03.3


## Example 1 - a simple alignment

This example demonstrates aligning molecular structures using [RDKit library](https://www.rdkit.org/).

* Structures used in this example correspond to a few selected conformers of a macrocycle.
* This example works on the `sdf` molecular format. Please see the scripts in `scripts` directory to see how to transform `xyz` from/to `sdf`.

Play with `scripts/utils.py` functions and test different RDKit options to optimize the visualization for your needs.

In [2]:
# First, we select input files with molecular geometries (note that this example uses sdf format);
inps = glob.glob('coordinates/m1*.sdf')

# Then, we write molecules to a dictionary;
# This is handy, as we can use the dictionary key to keep track of names (here: "name" variable)
moldict = {}
for id, inp in enumerate(inps):
    mol = Chem.MolFromMolFile(inp)
    name = "mol_" + str(id)
    moldict[name] = mol

In [3]:
# As we want to align molecules, we need to decide:

# 1. Which structure is our "reference" (i.e., against which structure we want to align all others).
# Here, we arbitrarily select one of the available structures:
ref_inp = 'coordinates/m1_h2o_in.sdf'
ref_mol = Chem.MolFromMolFile(ref_inp)

# 2. What is the "core" - a part of a molecule, which we wish to be most aligned (rmsd-wise) among all the structures;
# this, we give in smiles format
mol_smiles = 'O=C1NCCNC(=O)c2nc(C(=O)NCCNC(=O)c3nc1ccc3)ccc2'
core_smiles = 'n1ccccc1'

In [4]:
# Finally, we can align the structures:
p = align_and_show(moldict, core_smiles, ref_mol) 
p.show()