# Stable species conformer search
Leverage ETKDG for stochastic conformer generation

Use this as a base for ML conformer generation

The idea is to have modular methods for each step, which are currently hardcoded. This includes:
- initial conformer embedding (ETKDG, GeoMol)
- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB)
- pruning (torsion fingerprints, CREGEN)
- convergence metrics (conformational entropy/partition function)

In [1]:
from rdmc.conformer_generation.embedders import *
from rdmc.conformer_generation.optimizers import *
from rdmc.conformer_generation.pruners import *
from rdmc.conformer_generation.metrics import *
from rdmc.conformer_generation.generators import StochasticConformerGenerator

from rdmc.view import mol_viewer, interactive_conformer_viewer, conformer_viewer

T = 298  # K
R = 0.0019872  # kcal/(K*mol)
HARTREE_TO_KCAL_MOL = 627.503

In [4]:
from rdkit import Chem

smi = "[C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]"

In [5]:
embedder = ETKDGEmbedder(track_stats=True)
# embedder = GeoMolEmbedder("../rdmc/external/GeoMol/trained_models/both/", dataset="drugs", track_stats=True, temp_schedule="none")
optimizer = XTBOptimizer()
pruner = TorsionPruner(max_chk_threshold=30)
metric = SCGMetric(metric="entropy", window=5, threshold=0.005)
n_conformers_per_iter = 100

scg = StochasticConformerGenerator(
    smiles=smi,
    config="normal",
    embedder=embedder,
    min_iters=5
)

unique_mol_data = scg(n_conformers_per_iter)
print(len(unique_mol_data), scg.metric.metric_history[-1])

2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Config specified: using default settings for normal config
2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Generating conformers for [C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]
2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: 
Iteration 1: embedding 100 initial guesses...
2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Iteration 1: optimizing initial guesses...
2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: Iteration 1: pruning conformers...
2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: Iteration 1: kept 9 unique conformers
2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: 
Iteration 2: embedding 100 initial guesses...
2022/04/01 03:17:46 PM | StochasticConformerGenerator | INFO: Iteration 2: optimizing initial guesses...
2022/04/01 03:17:49 PM | StochasticConformerGenerator | I

2022/04/01 03:18:45 PM | StochasticConformerGenerator | INFO: Iteration 20: optimizing initial guesses...
2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 20: pruning conformers...
2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 20: kept 66 unique conformers
2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: 
Iteration 21: embedding 100 initial guesses...
2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 21: optimizing initial guesses...
2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 21: pruning conformers...
2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 21: kept 65 unique conformers
2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: 
Iteration 22: embedding 100 initial guesses...
2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 22: optimizing initial guesses...
2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: 

9 0.008233205857910033


In [6]:
mol = dict_to_mol(unique_mol_data)
interactive_conformer_viewer(mol)

interactive(children=(IntSlider(value=0, description='confId', max=8), Output()), _dom_classes=('widget-intera…

<function rdmc.view.interactive_conformer_viewer.<locals>.<lambda>(confId)>

In [None]:
test_smiles = [
    "C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O",
    "CC[C@@H]1CCCC[C@@H]1C",
    "CCCCCCCCCC",
    "CC1=CCCC(C1/C=C/C(=O)C)(C)C",
    "C[C@H]1CC[C@H](CC2=C1CC[C@@H]2C)C(C)(C)O",
    "N[C@@H](Cc1ccccc1)C(=O)N[C@@H](Cc2ccccc2)C(O)=O",
    "C1COCC(=O)N1C2=CC=C(C=C2)N3C[C@@H](OC3=O)CNC(=O)C4=CC=C(S4)Cl",
    "CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@H]2[C@@H]([C@@H]([C@H](O2)C)O)O",
    "CCC(CC)O[C@@H]1C=C(C[C@@H]([C@H]1NC(=O)C)N)C(=O)OCC",
    "C[C@]12CC[C@@H](CC1=CC[C@@H]3[C@@H]2CC[C@]4([C@H]3CC=C4C5=CN=CC=C5)C)O",
    "C1CCN(C1)CCOC2=C3COC/C=C/COCC4=CC(=CC=C4)C5=NC(=NC=C5)NC(=C3)C=C2",
    "C[C@@H]1OC[C@@H]2[C@@H](O1)[C@@H]([C@H]([C@@H](O2)O[C@H]3[C@H]4COC(=O)[C@@H]4[C@@H](C5=CC6=C(C=C35)OCO6)C7=CC(=C(C(=C7)OC)O)OC)O)O",
    "CC(C)C[C@@H](C1=C(C(=C(C(=C1O)C=O)O)C=O)O)[C@]2(CC[C@@H]3[C@@H]2[C@H]4[C@H](C4(C)C)CC[C@@]3(C)O)C",
    "C[C@H](N)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)O",
    "CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@H]3[C@@H]([C@@](C2(C)C)(C[C@@H]1OC(=O)[C@@H]([C@H](C5=CC=CC=C5)NC(=O)C6=CC=CC=C6)O)O)OC(=O)C7=CC=CC=C7)(CO4)OC(=O)C)O)C)OC(=O)C",
    "C[C@@H]1C[C@H]2CC[C@H](O2)[C@@H](C(=O)O[C@H](C[C@@H]3CC[C@@H](O3)[C@H](C(=O)O[C@@H](C[C@H]4CC[C@H](O4)[C@@H](C(=O)O[C@H](C[C@@H]5CC[C@@H](O5)[C@H](C(=O)O1)C)C)C)C)C)C)C",
    "CC[C@@]1(C[C@H]2C[C@@](C3=C(CCN(C2)C1)C4=CC=CC=C4N3)(C5=C(C=C6C(=C5)[C@]78CCN9[C@H]7[C@@](C=CC9)([C@H]([C@@]([C@@H]8N6C)(C(=O)OC)O)OC(=O)C)CC)OC)C(=O)OC)O",
    "CC(C)[C@@H](C(=O)N1CC2(CC2)C[C@H]1C3=NC=C(N3)C4=CC5=C(C=C4)C6=C(C5(F)F)C=C(C=C6)C7=CC8=C(C=C7)N=C(N8)[C@@H]9[C@H]1CC[C@H](C1)N9C(=O)[C@H](C(C)C)NC(=O)OC)NC(=O)OC",
    "C[C@@H]1CC[C@H]2C[C@@H](/C(=C/C=C/C=C/[C@H](C[C@H](C(=O)[C@@H]([C@@H](/C(=C/[C@H](C(=O)C[C@H](OC(=O)[C@@H]3CCCCN3C(=O)C(=O)[C@@]1(O2)O)[C@H](C)C[C@@H]4CC[C@H]([C@@H](C4)OC)OCCO)C)/C)O)OC)C)C)/C)OC",
    "C[C@H]1[C@H]([C@@](C[C@@H](O1)O[C@@H]2[C@H]([C@@H]([C@H](O[C@H]2OC3=C4C=C5C=C3OC6=C(C=C(C=C6)[C@H]([C@H](C(=O)N[C@H](C(=O)N[C@H]5C(=O)N[C@@H]7C8=CC(=C(C=C8)O)C9=C(C=C(C=C9O)O)[C@H](NC(=O)[C@H]([C@@H](C1=CC(=C(O4)C=C1)Cl)O)NC7=O)C(=O)O)CC(=O)N)NC(=O)[C@@H](CC(C)C)NC)O)Cl)CO)O)O)(C)N)O"
]

import csv

with open("./../rdmc/conformer_exps/organic_conf_gen/smiles.csv", "w") as f:
    writer = csv.writer(f, delimiter="\n")
    writer.writerow(test_smiles)