# Example of XTB on a ChEMBL compound

From a ChEMBL SMILES

- Generate 3D conformers with RDKit
- Optimize the conformers with XTB
- Update the molecule with new conformers
- View the conformers
- Calculate Fukui coefficients for the conformers


In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import logging
import sys

In [None]:
from rdkit import Chem
from rdkit.Chem.Draw import MolsToGridImage

In [None]:
try:
    import ppqm
except ModuleNotFoundError:
    import pathlib

    cwd = pathlib.Path().resolve().parent
    sys.path.append(str(cwd))
    import ppqm

In [None]:
from ppqm import jupyter as ppqm_jupyter

In [None]:
import pandas as pd
pd.options.display.max_rows = 60
pd.options.display.max_columns = 60
pd.options.display.float_format = '{:,.2f}'.format

## Set logging level

In [None]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger("ppqm").setLevel(logging.INFO)
logging.getLogger("xtb").setLevel(logging.DEBUG)
show_progress = False

## Settings

In [None]:
N_CORES = 8

## Define a molecule you like

In [None]:
smiles = "Cc1cc(NCCO)nc(-c2ccc(Br)cc2)n1"  # CHEMBL1956589
molobj = Chem.MolFromSmiles(smiles)

In [None]:
molobj

## Get some 3D conformers (RDKit)

In [None]:
molobj = ppqm.tasks.generate_conformers(molobj)

In [None]:
molobj.GetNumConformers()

## Set xTB Settings

In [None]:
xtb_options = {
    "scr": "./_tmp_directory_",  # Where should the calculations happen?
    "cmd": "xtb",  # Where is the binary executable/command?
    "n_cores": N_CORES,  # How many cores to use?
    "show_progress": show_progress,  # Show progressbar during calculation
}

In [None]:
calc = ppqm.xtb.XtbCalculator(**xtb_options)

In [None]:
calc

## Calculate some properties

We can now define calculation options and get XTB properties for each conformer in the molobj.

The options for XTB follows the --arg documentation from the xtb website https://xtb-docs.readthedocs.io/en/latest/commandline.html


In [None]:
# Optimize molecule in water
optimize_options = {
    "gfn": 2,
    "alpb": "h2o",
    "opt": None,
}

Optimize molobj and set resulting coordinates. The `.calculate` function will return a `List[Dict]` with a property dictionary for each conformer.

In [None]:
optimize_results = calc.calculate(molobj, optimize_options)

In [None]:
# The output type
print(type(optimize_results))
print(type(optimize_results[0]))
print(optimize_results[0].keys())

In [None]:
df_optimize = pd.DataFrame(optimize_results)

In [None]:
df_optimize

In [None]:
for idx, prop in enumerate(optimize_results):

    energy = prop["scc_energy"]  # AU
    energy *= ppqm.units.hartree_to_kcalmol
    coord = prop["coord"]

    print(idx, energy)
    ppqm.chembridge.molobj_set_coordinates(molobj, coord, confid=idx)

## Use nglviewer to see your molecules

In [None]:
ppqm_jupyter.show_molobj(molobj)

## Use the updated coordinates to calculate fukui properties

Again, a list of properties (one for each conformer) is returned


In [None]:
# Calculate fukui coefficients
fukui_options = {
    "gfn": 2,
    "alpb": "h2o",
    "vfukui": None,
}

In [None]:
water_results = calc.calculate(molobj, water_options)

In [None]:
chloroform_results = calc.calculate(molobj, chloroform_options)

In [None]:
octanol_results = calc.calculate(molobj, octanol_options)

In [None]:
df_solvents = pd.DataFrame()

In [None]:
df_solvents["water_energy"] = [result["scc_energy"]*ppqm.units.hartree_to_kcalmol for result in water_results]

In [None]:
df_solvents["chloroform_energy"] = [result["scc_energy"]*ppqm.units.hartree_to_kcalmol for result in chloroform_results]

In [None]:
df_solvents["octanol_energy"] = [result["scc_energy"]*ppqm.units.hartree_to_kcalmol for result in octanol_results]

In [None]:
df_solvents["water_weight"] = ppqm.chembridge.get_boltzmann_weights(df_solvents["water_energy"].values)

In [None]:
df_solvents["octanol_weight"] = ppqm.chembridge.get_boltzmann_weights(df_solvents["octanol_energy"].values)

In [None]:
df_solvents["chloroform_weight"] = ppqm.chembridge.get_boltzmann_weights(df_solvents["chloroform_energy"].values)

In [None]:
df_solvents