In [None]:
from hydra import compose, initialize
from omegaconf import OmegaConf
from rdkit import Chem
from rdkit.Chem import rdDetermineBonds, AllChem
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.ipython_3d = True
import pandas as pd

from strain_relief import compute_strain

In [None]:
! pip install py3Dmol
import py3Dmol

## Running StrainRelief 

In [None]:
# First, lets generate some example poses with 3D coordinates.
smiles = ["CCO", "CCN", "CCC"]
poses = []

for s in smiles:
    mol = Chem.MolFromSmiles(s)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    mol.SetProp("smiles", s)
    poses.append(mol)

# Note: to run StrainRelief your molecules must either have specified 
# bonds or they must be able to be passed through RDKit's rdDetermineBonds 
# function. This is needed for the conformer enumeration.

In [None]:
# We now need to initialise a run configuration.
# We use MMFF94s here so that runs will only take a few seconds.
with initialize(version_base="1.1", config_path="../src/strain_relief/hydra_config"):
    cfg = compose(
        config_name="default", 
        overrides=["experiment=mmff94s",]
    )

print(OmegaConf.to_yaml(cfg))

StrainRelief is run either via the `compute_strain` function or via the command line with the `strain-relief` command. The following three examples demonstrate different ways of running the tool, all giving the same output.

In [None]:
# EXAMPLE 1

# The minimal requirement to run StrainRelief is a list of RDKit.Mols with 3D poses and a run configration.
# If ids are not given then they will be generated.
results = compute_strain(mols=poses, ids=None, cfg=cfg)
results.head()

In [None]:
# EXAMPLE 2

# Instead of a list of mols, StrainRelief can be passed a dataframe with a "mol_bytes" and "id" column.
# "mol_bytes" is a column of RDKit.Mol objects stroed as bytes.
# Any additional columns, such as "smiles" here are also returned.

df = pd.DataFrame([{"mol_bytes": mol.ToBinary(), **mol.GetPropsAsDict()} for mol in poses])
df = df.reset_index(drop=False, names='id')

results2 = compute_strain(df, cfg)
results2.head()

In [None]:
# EXAMPLE 3

# Finally, StrainRelief can be run from the command line by specifying an input parquet path
# There are more example scripts in StrainRelief/examples/
df.to_parquet("../data/tutorial_example.parquet")
! strain-relief io.input.parquet_path=../data/tutorial_example.parquet io.output.parquet_path=../data/tutorial_output.parquet experiment=mmff94s

results3 = pd.read_parquet("../data/tutorial_output.parquet")
results3.head()

## Examining the Output

In [None]:
! strain-relief io.input.parquet_path=../data/example_ligboundconf_input.parquet io.output.parquet_path=../data/example_ligboundconf_output.parquet experiment=mmff94s
lig = pd.read_parquet("../data/example_ligboundconf_output.parquet")
lig.head()

The `lig` dataframe contains all input columns (in this case `id`, `mol_bytes` and `some_property`) and all calculated columns:
- `formal_charge` (int): RDKit's formal charge
- `local_min_mol` (bytes): the coordinates of the local minimum
- `local_min_e` (float): the energy of the local minimum (in kcal/mol)
- `global_min_mol` (bytes): the coordinates of the global minimum
- `global_min_e` (float): the energy of the global minimum (in kcal/mol)
- `ligand_strain` (float): difference between local and global minima
- `passes_strain_filter` (bool): whether `ligand_strain` is lower than the config threshold
- `nconfs_converged` (int): the number of conformers that convereged when searching for the global minimum 

Lets have a look at the three poses from ligand 3Q4_3QD0_A_370. 

In [None]:
docked = Chem.Mol(lig.mol_bytes[0])
local_min = Chem.Mol(lig.local_min_mol[0])
global_min = Chem.Mol(lig.global_min_mol[0])

In [None]:
rdDetermineBonds.DetermineBonds(docked)
rdDetermineBonds.DetermineBonds(local_min)
rdDetermineBonds.DetermineBonds(global_min)

In [None]:
IPythonConsole.drawMol3D(docked)

In [None]:
IPythonConsole.drawMol3D(local_min)

In [None]:
IPythonConsole.drawMol3D(global_min)

The original and local minimum conformers look very similar to the eye. This is because local minimisation has a loose convergence criteria and is simply to clean up any high energy artifacts left by docking. The global minimum is noticably different, with all aromatic rings having relaxed into a similar plane.

You may again want to convert your results back into an sdf. You can do this with the function below:

Hopefully you now have a good grasp on how to run the StrainRelief tool! I hope you find it as useful as we have.

Ewan