# Example of setting up a optimization pipeline based on coordinates

More explicit example for when you want to have fall-back calculations for failed geomeotry optimization 


In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import collections
import functools
import logging
import sys

In [None]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem.Draw import MolsToGridImage

In [None]:
try:
    import ppqm
except ModuleNotFoundError:
    import pathlib

    cwd = pathlib.Path().resolve().parent
    sys.path.append(str(cwd))
    import ppqm

In [None]:
import rmsd

In [None]:
from ppqm import chembridge, xtb
from ppqm import jupyter as ppqm_jupyter

## Set logging level

In [None]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger("ppqm").setLevel(logging.INFO)
logging.getLogger("xtb").setLevel(logging.DEBUG)
SHOW_PROGRESS = False

In [None]:
_logger = logging.getLogger(__name__)

## Settings

In [None]:
N_CORES = 2

## Define xtb options and functions

TODO Add some documentation


In [None]:
def optimize_molobj(
    molobj,
    show_progress=True,
    n_cores=1,
    scr=None,
    calculation_options={},
    xtb_options={},
    rmsd_threshold=3.0,
):
    """
    Optimize all the conformers in molobj.

    :param rmsd_threshold: Check if molcule moved away from local minima.
    """
    n_atoms = molobj.GetNumAtoms()
    molobj_prime = chembridge.copy_molobj(molobj)
    energies = []

    atoms, _, charge = chembridge.get_axyzc(molobj, atomfmt=str)

    n_conformers = molobj.GetNumConformers()
    coordinates_list = [
        np.asarray(conformer.GetPositions()) for conformer in molobj.GetConformers()
    ]

    n_procs = min(n_cores, n_conformers)
    results = []

    if "n_cores" in xtb_options:
        del xtb_options["n_cores"]

    if scr:
        xtb_options["scr"] = scr

    func = functools.partial(
        optimize_acxyz,
        atoms,
        charge,
        calculation_options=calculation_options,
        xtb_options=xtb_options,
    )

    results = ppqm.misc.func_parallel(
        func,
        coordinates_list,
        n_cores=n_procs,
        show_progress=show_progress,
        title="Optimize",
    )

    for idx, (coord, energy) in enumerate(results):

        # if conformer is unconverged, ignore
        if coord is None or energy is None:
            _logger.warning(f"conformer {idx} unconverged")
            continue

        # if conformer has changed a lot, warn
        original = chembridge.get_coordinates(molobj, confid=idx)
        displacement = rmsd.kabsch_rmsd(coord, original)
        if displacement > rmsd_threshold:
            _logger.warning(f"conformer {idx} has large displacement")

        # Molecule is converged, add to new molobj
        conformer = Chem.Conformer(n_atoms)
        chembridge.conformer_set_coordinates(conformer, coord)
        molobj_prime.AddConformer(conformer, assignId=True)
        energies.append(energy)

    energies = np.asarray(energies)

    return molobj_prime, energies

In [None]:
def optimize_acxyz(atoms, charge, coordinates, **kwargs):
    """ Meta function for parallel func mapping """
    return optimize_axyzc(atoms, coordinates, charge, **kwargs)

In [None]:
def optimize_axyzc(atoms, coordinates, charge, calculation_options={}, xtb_options={}) -> np.array:
    """
    Optimize conformer with fast quantum chemistry

    - optimize with XTB2
    - if not_converged
      restart with XTB0 with n_steps
      optimize with XTB2
    - if not_converged
      optimize with cartisian coordinates
    - if not_converged
      return None

    :return coordiantes: Nx3 Array of coordiantes
    """

    assert isinstance(atoms[0], str), "func requires atoms in string format"

    options_default = {
        "opt": None,
        "cycles": 100,
    }
    options_default = dict(collections.ChainMap(options_default, calculation_options))

    options_fast = {
        "gfn": 0,
        "opt": None,
        "cycles": 200,
    }
    options_fast = dict(collections.ChainMap(options_fast, calculation_options))

    options_lax = {
        "opt": "lax",
    }
    options_fast = dict(collections.ChainMap(options_lax, calculation_options))

    # Start calculating with the defined options

    properties = xtb.get_properties_from_axyzc(
        atoms, coordinates, charge, options=options_default, **xtb_options
    )

    if properties and properties[xtb.COLUMN_CONVERGED]:
        return properties[xtb.COLUMN_COORD], properties[xtb.COLUMN_ENERGY]

    properties = xtb.get_properties_from_axyzc(
        atoms, coordinates, charge, options=options_fast, **xtb_options
    )

    if not properties or xtb.COLUMN_COORD not in properties:
        return None, None

    fast_coordinates = properties[xtb.COLUMN_COORD]

    properties = xtb.get_properties_from_axyzc(
        atoms, fast_coordinates, charge, options_default, **xtb_options
    )

    if properties and properties[xtb.COLUMN_CONVERGED]:
        return properties[xtb.COLUMN_COORD], properties[xtb.COLUMN_ENERGY]

    if properties[xtb.COLUMN_COORD] is None:
        return None, None

    fast_coordinates = properties[xtb.COLUMN_COORD]

    properties = xtb.get_properties_from_axyzc(
        atoms, fast_coordinates, charge, options=options_lax, **xtb_options
    )

    if not properties or properties[xtb.COLUMN_COORD] is None:
        return None, None

    return properties[xtb.COLUMN_COORD], properties[xtb.COLUMN_ENERGY]

## Define a molecule you like

In [None]:
smiles = "O=C(OCCN1CCN(C(c2ccc(F)cc2)c2ccc(F)cc2)CC1)C12C[C@H]3C[C@@H](C1)C[C@@H](C2)C3"  # CHEMBL1191174
molobj = Chem.MolFromSmiles(smiles)

In [None]:
molobj

In [None]:
# With 3D cooordinates
molobj = ppqm.tasks.generate_conformers(molobj)

## Let's optimize a molobj

In [None]:
calculation_options = {
    "gfn": 2,
    "alpb": "water",
}

In [None]:
xtb_options = {"cmd": "xtb"}

In [None]:
kwargs = {
    "n_cores": N_CORES,
    "scr": "_tmp_directory_",
    "xtb_options": xtb_options,
    "calculation_options": calculation_options,
    "show_progress": SHOW_PROGRESS,
}

In [None]:
molobj_prime = optimize_molobj(molobj, **kwargs)