# Stable species conformer search
Leverage ETKDG for stochastic conformer generation

Use this as a base for ML conformer generation

The idea is to have modular methods for each step, which are currently hardcoded. This includes:
- initial conformer embedding (ETKDG, GeoMol)
- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB)
- pruning (torsion fingerprints, RMSD)
- convergence metrics (conformational entropy/partition function)

In [1]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from rdmc import OpenBabelFF, RDKitFF, optimize_mol
from rdmc.mol import RDKitMol
from rdmc.view import interactive_conformer_viewer
import numpy as np
import copy


T = 298  # K
R = 0.0019872  # kcal/(K*mol)

In [111]:
rad_angle_compare = lambda x,y: np.abs(np.arctan2(np.sin(x-y), np.cos(x-y))) * 180 / np.pi
torsion_list_compare = lambda c1_ts, c2_ts: [rad_angle_compare(t1, t2) for t1, t2 in zip(c1_ts, c2_ts)]


class StochasticConformerGenerator:
    def __init__(self, smiles, min_iters=5, max_iters=10):
        super(StochasticConformerGenerator, self).__init__()

        self.smiles = smiles
        self.mol = RDKitMol.FromSmiles(smiles)
        self.current_mols = []
        self.unique_mols = []
        self.torsions_list = self.get_torsions_list()
        self.iter = 0
        self.metric = []
        self.min_iters = min_iters
        self.max_iters = max_iters
        
    def get_torsions_list(self):
        
        mol = RDKitMol.FromSmiles(self.smiles)
        mol.EmbedNullConformer()
        return mol.GetConformer().GetTorsionalModes()
    
    def calculate_torsions(self, mol):
        
        current_mols = []
        c_index = len(self.unique_mols)
        for c_id in range(mol.GetNumConformers()):
            conf = copy.copy(mol.GetConformer(c_id))
            torsions = [conf.GetTorsionDeg(t) for t in self.torsions_list]
            positions = conf.GetPositions()
            current_mols.append({"conf_id": c_index + c_id,
                                 "torsions": torsions,
                                 "positions": positions,
                                 "conf": conf,
                                 "energy": np.nan})
        return current_mols

    def embed_initial_conformers(self, n_conformers):
        
        embedded_mol = self.mol.Copy()
        embedded_mol.EmbedMultipleConfs(n_conformers)
        
        return embedded_mol
    
    def optimize_mols(self, mol):
        
        # ff for now
        ff = RDKitFF()
        ff.setup(mol.Copy())
        ff.optimize_confs()

        return ff.get_optimized_mol()
    
    def calculate_energy(self, mol, unique_mols):
        
        # ff for now
        ff = RDKitFF()
        for c in unique_mols:
            if np.isnan(c["energy"]):
                ff.setup(mol.Copy(), conf_id=c["conf_id"])
                energy = ff.get_energy()
                c.update({"energy": energy})  # kJ

        return unique_mols
    
    
    def calculate_metric(self, unique_mols):
        
        # conformational entropy for now
        # ignoring degeneracy for now
        energies = np.array([c["energy"] for c in unique_mols])
        _prob = np.exp(-energies / (R*T))
        prob = _prob / _prob.sum()
        entropy = -R * np.sum(prob * np.log(prob))
        return entropy
    
    def check_metric(self, threshold, window=5):
        
        min_metric = np.min(self.metric[-window:])
        max_metric = np.max(self.metric[-window:])
        change = (max_metric-min_metric)/min_metric
        return True if change <= threshold else False
    
    def prune(self, current_mols, chk1_threshold=10, chk2_threshold=20):
        
        # torsion-based pruning for now
        n_unique_mols = max(1, len(self.unique_mols))  # set to 1 if 0
        mols_list = self.unique_mols + current_mols
        torsion_matrix = np.array([c["torsions"] for c in mols_list]) % 360
        torsion_matrix_rad = torsion_matrix * np.pi / 180
        
        n_confs = len(mols_list)
        conf_ids = np.arange(n_confs).tolist()

        # start comparison at new mols
        for i in conf_ids[n_unique_mols:]:

            c_torsions = torsion_matrix_rad[i]
            c_before_torsions = torsion_matrix_rad[:i]

            comp = comp = np.array([torsion_list_compare(c_torsions, ct) for ct in c_before_torsions])
            chk1 = (np.mean(comp, axis=1) < chk1_threshold).any()
            chk2 = (np.max(comp, axis=1) < chk2_threshold).any()
            
            if chk1 or chk2:
                conf_ids.remove(i)
        
        # update mols
        unique_mols = [mols_list[i] for i in conf_ids]
        energy_sorted_ids = np.array([c["energy"] for i, c in enumerate(mols_list) if i in conf_ids]).argsort()
        updated_mol = self.mol.Copy()
        [updated_mol._mol.AddConformer(c["conf"].ToConformer(), assignId=True) for c in unique_mols]
        [c.update({"conf_id": idx}) for idx, c in enumerate(unique_mols)]
        return updated_mol, unique_mols
    
    def __call__(self, n_conformers_per_iter):
        
        print(f"Generating conformers for {self.smiles}")
        for it in range(self.max_iters):
            self.iter += 1
            
            print(f"\nIteration {self.iter}: embedding initial guesses...")
            initial_mol = self.embed_initial_conformers(n_conformers_per_iter)
            
            print(f"Iteration {self.iter}: optimizing initial guesses...")
            opt_mol = self.optimize_mols(initial_mol)
            
            print(f"Iteration {self.iter}: pruning conformers...")
            current_mols = self.calculate_torsions(opt_mol)
            
            # unique_mols first used here; return mols bc don't want to recalculate energies
            updated_mol, unique_mols = self.prune(current_mols)
            unique_mols = self.calculate_energy(updated_mol, unique_mols)
            metric = self.calculate_metric(unique_mols)
            
            self.metric.append(metric)
            self.unique_mols = unique_mols
            
            if it < self.min_iters:
                continue
                
            if self.check_metric(threshold=0.01):
                print(f"Iteration {self.iter}: stop crietria reached")
                return unique_mols
            
        print(f"Iteration {self.iter}: max iterations reached")
        return unique_mols

In [133]:
scg = StochasticConformerGenerator("CCCCCC", max_iters=100)
n_conformers_per_iter = 20
unique_conformers = scg(n_conformers_per_iter)
print(len(unique_conformers))

Generating conformers for CCCCCC

Iteration 1: embedding initial guesses...
Iteration 1: optimizing initial guesses...
Iteration 1: pruning conformers...

Iteration 2: embedding initial guesses...
Iteration 2: optimizing initial guesses...
Iteration 2: pruning conformers...

Iteration 3: embedding initial guesses...
Iteration 3: optimizing initial guesses...
Iteration 3: pruning conformers...

Iteration 4: embedding initial guesses...
Iteration 4: optimizing initial guesses...
Iteration 4: pruning conformers...

Iteration 5: embedding initial guesses...
Iteration 5: optimizing initial guesses...
Iteration 5: pruning conformers...

Iteration 6: embedding initial guesses...
Iteration 6: optimizing initial guesses...
Iteration 6: pruning conformers...

Iteration 7: embedding initial guesses...
Iteration 7: optimizing initial guesses...
Iteration 7: pruning conformers...

Iteration 8: embedding initial guesses...
Iteration 8: optimizing initial guesses...
Iteration 8: pruning conformers...

In [132]:
final_mol = scg.mol.Copy()
[final_mol._mol.AddConformer(c["conf"].ToConformer(), assignId=True) for c in unique_conformers];

In [129]:
interactive_conformer_viewer(final_mol, viewer_size=(800, 800), atom_index=True)

interactive(children=(IntSlider(value=0, description='confId', max=74), Output()), _dom_classes=('widget-inter…

<function rdmc.view.interactive_conformer_viewer.<locals>.<lambda>(confId)>