# Find TS conformers and calculate energy on the fly
This uses a RDkit as backend to generate the TS conformers and calculate the energy by Psi4.

Note: if the jupyter notebook crashes. It is highly possible that the `openbabel` and `rdkit` <br>
uses different dependencies during compiling, assuming you are using conda builds.<br>
You can either try to build your own version (solve the problem) or change the import sequence<br>
(can make the latter one malfunctioning when use some of the methods)<br>

## 1. Generate conformers
Necessary packages

In [None]:
import os
import multiprocessing
from copy import deepcopy
from itertools import combinations, product
from typing import List, Optional, Union

import numpy as np
import psi4
from joblib import Parallel, delayed
from rdkit import Chem

from acs.backend.rdk import RDKitConf, RDKitTS
from acs.converter.geom import (xyz_str_to_xyz_dict,
                                xyz_dict_to_xyz_str,
                                xyz_dict_to_xyz_file)
import pybel

%load_ext autoreload
%load_ext autotime
%autoreload 2

In [None]:
def get_separable_angle_list(samplings: Union[list, tuple],
                             from_angles: Optional[Union[list, tuple]] = None):
    """
    Get a angle list for each input dimension. For each dimension
    The input can be a int, indicating the angles will be evenly sampled;
    Or a list, indicate the angles to be sampled;
    Examples:
    [[120, 240,], 4, 0] => [np.array([120, 240,]), 
                            np.array([0, 90, 180, 270,]), 
                            np.array([0])]
    List of np.arrays are returned for the sake of further calculation
    
    Args:
        samplings (Union[list, tuple]): An array of sampling information.
                  For each element, it can be either list or int.
        from_angles (Union[list, tuple]): An array of initial angles.
                    If not set, angles will begin at zeros.
    
    Returns:
        list: A list of sampled angles sets.
    """
    from_angles = from_angles or len(samplings) * [0.]
    angle_list = []
    for ind, angles in enumerate(samplings):
        # Only provide a number
        # This is the step number of the angles
        if isinstance(angles, (int, float)):
            try:
                step = 360 // angles
            except ZeroDivisionError:
                # Does not change
                angles = from_angles[ind] + np.array([0])
            else:
                angles = from_angles[ind] + \
                         np.array([step * i for i in range(angles)])
        elif isinstance(angles, list):
            angles = from_angles[ind] + np.array(angles)
            
        # Set to 0 - 360 range
        for i in range(angles.shape[0]):
            while angles[i] < 0.:
                angles[i] += 360
            while angles[i] > 360.:
                angles[i] -= 360

        angle_list.append(angles.tolist())            
    return angle_list


def conformers_by_change_torsions(conf: 'RDKitConf',
                                  angle_mesh,
                                  bookkeep: dict,
                                  torsions=None,
                                  on_the_fly_check=False):
    """
    Generate conformers by rotating the angles of the torsions. The result will be saved into
    ``bookkeep``. A on-the-fly check can be applied, which identifies the conformers with colliding
    atoms.
    
    Args:
        conf (RDkitConf): A RDKit Conformer to be used.
        angle_mesh (iterable): An iterable contains the angle_list for conformers to be generated from.
        bookkeep (dict): A dictionary to save the coords.
        torsions (list): A list of four-atom-index lists indicating the torsional modes.
        on_the_fly_filter (bool): Whether to check colliding atoms on the fly.
    """
    if not torsions:
        torsions = conf.GetTorsionalModes()
        for ind, angles in enumerate(angle_mesh):
            conf.SetAllTorsionsDeg(angles)
            bookkeep[ind] = {'angles': angles,
                             'coords': conf.GetPositions().tolist()}
            bookkeep[ind]['colliding_atoms'] = conf.HasCollidingAtoms() \
                if on_the_fly_check == True else None

    else:
        all_torsions = conf.GetTorsionalModes()
        try:
            changing_torsions_index = [all_torsions.index(tor) for tor in torsions]
        except ValueError as e:
            # tor not in all_torsions
            raise
            
        original_angles = conf.GetAllTorsionsDeg()

        for ind, angles in enumerate(angle_mesh):
            for i, angle, tor in zip(range(len(angles)), angles, torsions):
                conf.SetTorsionDeg(tor, angle)
                original_angles[changing_torsions_index[i]] = angle
                
            bookkeep[ind] = {'angles': original_angles,
                             'coords': conf.GetPositions().tolist()}
            bookkeep[ind]['colliding_atoms'] = conf.HasCollidingAtoms() \
                    if on_the_fly_check == True else None

## 1.1 Perceive a molecule from xyz

In [None]:
######################################
# INPUT
xyz_str="""C         -3.17496       -0.17991       -0.27762
C         -1.71619       -0.65488       -0.23342
C         -0.83302        0.16122        0.70571
C         -0.24541        1.35767        0.26412
O          1.71387        1.10090       -0.13057
C          2.17723       -0.05127        0.10421
O          1.53478       -0.96812        0.70655
C          3.57705       -0.37007       -0.37395
H         -1.28788       -0.62276       -1.24200
H         -1.68914       -1.70716        0.06862
H          0.29977       -0.47894        0.79368
H         -1.13396        0.15959        1.75554
H         -0.33819        1.68464       -0.76466
H          0.03184        2.13363        0.96485
H         -3.24382        0.85341       -0.63149
H         -3.77690       -0.80466       -0.94410
H         -3.63023       -0.21866        0.71666
H          3.51379       -1.05719       -1.22231
H          4.09384        0.53834       -0.68122
H          4.12754       -0.87579        0.42140
"""
######################################

## 1.1 Perceive TS

### 1.1.1 Pybel (openbabel) is used to roughly perceive the molecular connectivity

In [None]:
xyz_file = f"{len(xyz_str.splitlines())}\n\n{xyz_str}"
xyz_dict = xyz_str_to_xyz_dict(xyz_str)
pybel_mol = pybel.readstring('xyz', xyz_file)

rdkitts = RDKitTS.FromOBMol(pybel_mol.OBMol)

rdkitts.EmbedConformer()
conf = rdkitts.GetConformer()
conf.SetPositions(xyz_dict['coords'])

### 1.1.2 Set the missing bonds existing in the TS. 
You can either set manually or use distance matrix and derived covalent matrix to infer which bonds are missing

In [None]:
######################################
# INPUT
bonds = []
threshold = 1.6
######################################
print('Current connectivity perception:')
display(rdkitts)
if not bonds:
    dist_mat = np.triu(rdkitts.GetDistanceMatrix())
    covl_mat = rdkitts.GetCovalentMatrix()

    for multiplier in np.arange(1.1, threshold, 0.1):
        atom1s, atom2s = np.where((dist_mat - multiplier * covl_mat) < 0)
        bonds = [(int(atom1s[i]), int(atom2s[i])) for i in range(len(atom1s))]
        if bonds:
            print(f"Possible transition state bonds: {bonds} (with threshold = {multiplier:.1f})")

### 1.1.3 Overwrite the RDKitTS with new bonding info

In [None]:
rw_mol = rdkitts.ToRDMol()

for bond in bonds:
    rw_mol.AddBond(*bond, Chem.BondType.SINGLE)
    
rdkitts = rdkitts.FromRDMol(rw_mol)
print('Modified Molecule:')
display(rdkitts)

## 1.2 Use RDKit to generate conformers

### 1.2.1 Initialize a conformer instance

In [None]:
# Create a Conformer instance and set the initial xyz
rdkitts.EmbedConformer()
conf = rdkitts.GetConformer()
conf.SetPositions(xyz_dict['coords'])

### 1.2.2 Get the torsional mode and the original angles

In [None]:
# You can set the correct (all) torsions, otherwise RDKit will perceive.
######################################
# INPUT
torsions = None
######################################
if not torsions:
    torsions = rdkitts.GetTorsionalModes()
    print(f'RDKit perceived torsions: {torsions}')

conf.SetTorsionalModes(torsions)
num_torsions = len(torsions)
original_angles = conf.GetAllTorsionsDeg()
print('Torsions highlighted in the molecule:')
display(rdkitts)
print(f'The original dihedral angles is: {original_angles}')

### 1.2.3 Generate conformers according to the mangle mesh

Example1: Sampling the angles `0, 120, 240` for each torsion <br>
For reference, a 7 heavy atom species with 5 rotors cost ~20 ms on Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz

In [None]:
sampling = [3] * len(torsions)  # sampling = [[0., 120., 240.]] is equivalent

In [None]:
angles_list = get_separable_angle_list(sampling,
                                       original_angles)
angle_mesh = product(*angles_list)

# Save to bookkeep
bookkeep = {}
conformers_by_change_torsions(conf,
                              angle_mesh,
                              bookkeep=bookkeep,
                              torsions=torsions,
                              on_the_fly_check=True)

Example2: Sampling the angles 45 x 45 points for each torsion pairs <br>
For reference, a 7 heavy atom species with 5 rotors cost 1.4 s on Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz

In [None]:
n_point_each_torsion = 45
n_dimension = 2

In [None]:
bookkeeps = []

for torsion_pair in combinations(torsions, n_dimension):
    # Reset the geometry
    conf.SetPositions(xyz_dict['coords'])
    # Get angles
    sampling = [n_point_each_torsion if tor in torsion_pair else 0
                for tor in torsions]
    angles_list = get_separable_angle_list(sampling,
                                           original_angles)
    angle_mesh = product(*angles_list)
    # Generate conformers
    bookkeep = {}
    conformers_by_change_torsions(conf,
                                  angle_mesh,
                                  bookkeep=bookkeep,
                                  torsions=torsions,
                                  on_the_fly_check=True)
    bookkeeps.append(bookkeep)

### [OPTIONAL] Check colliding atoms of conformers
if you run with on-the-fly check you can check the colliding atoms

In [None]:
check_bookkeep = bookkeep  # Example 1
# check_bookkeep = bookkeeps[0]  # Example 2

In [None]:
inds = []
for ind, value in check_bookkeep.items():
    if value['colliding_atoms']:
        inds.append(ind)
print(f"\n{len(inds)} out of {len(bookkeep)} conformers may have colliding atoms")

### [OPTIONAL] Align Conformers
You can also align molecules stored in the `bookkeep`. The RMSD of each conformer (initial conformer as reference) is also generated `rmsd_list`

In [None]:
to_align = bookkeep  # Example 1
# to_align = bookkeeps[0]  # Example 2

In [None]:
from rdkit import Chem

rdkitts.EmbedMultipleConfs(len(to_align))
confs = rdkitts.GetAllConformers()
torsions_values = []
for i, value in bookkeep.items():
    confs[i].SetPositions(value['coords'])
    torsion_value = np.array(confs[i].GetAllTorsionsDeg())
    torsions_values.append(torsion_value)
rd_mol = rdkitts.ToRDMol()

rmsd_list = []
Chem.rdMolAlign.AlignMolConformers(rd_mol, maxIters=50, RMSlist=rmsd_list)

View alignment by py3Dmol, this may take some time. {

In [None]:
import py3Dmol

view = py3Dmol.view(width=600, height=400)
for i in range(len(bookkeep)):
    view.addModel(Chem.MolToMolBlock(rd_mol, confId=i), 'sdf')
view.setStyle({'stick':{'colorscheme':'cyanCarbon'}})
view.render()

## 2. Calculate Psi4 on the notebook

This section is only for testing instead of actual tasks.
From experience, for conformer search purpose, better to use `n_threads_each_calculation = 1` and use `n_worker` as many as possible

In [None]:
# How many threads to use as worker
n_worker = 8  # -1 to use all threads
n_memory_each_calculation = 12000 / n_worker  # Assuming you have 
n_threads_each_calculation = 1
reference = 'uhf'
level_of_theory = 'b3lyp/def2-svp'

In [None]:
def geom_producer(bookkeep, xyz_dict):
    for ind, conf in bookkeep.items():
        xyz_dict['coords'] = conf['coords']
        xyz_file = xyz_dict_to_xyz_file(xyz_dict)
        yield (ind, xyz_file)
        
def get_psi4_dftenergy(ind, xyz_file):
    psi4.set_memory(f'{n_memory_each_calculation} MB')
    psi4.set_options({'reference': reference})
    try:
        psi4.geometry(xyz_file)
        psi4.set_num_threads(n_threads_each_calculation)
        return (ind, psi4.energy(level_of_theory))
    except Exception as e:
        print(e)
        return (ind, 1e4)

In [None]:
result = Parallel(n_jobs=n_worker, verbose=100) \
         (delayed(get_psi4_dftenergy)(*data) for data in geom_producer(bookkeep, xyz_dict))