# Applying BespokeFit to the mom-terphenyl system

This notebook will be used to generate OpenFF force field for the MOM-terphenyl system. We use mBuild to construct a short 3-mer of the polymer and get it's charges using AM1-BCC charges using OpenEye. Then using the BespokeFit workflow we will assign torsions/angles. Bespoke fit uses QM calculations on fragments present in our molecule to assign new more accurate torsion potentials.

In [1]:
try:
    from openmm import app
except ImportError:
    from simtk.openmm import app

from openff.toolkit.topology import FrozenMolecule, Molecule, Topology
from openff.toolkit.typing.engines.smirnoff import ForceField
import pdb
import os
import openbabel
import mbuild as mb
import numpy as np
import subprocess
from mbuild.lib.recipes.polymer import Polymer
import rdkit

ModuleNotFoundError: No module named 'openbabel'

In [2]:
comp = mb.load('COc3ccc(c1cc(C=O)ccc1)c(c2cc([C@@H](C)N)ccc2)c3', smiles = True, name="MOM")
cap_o = mb.load('CO', smiles = True)
cap_n = mb.load('CC(C)(C)OC=O', smiles = True)

In [3]:
for i, atom in enumerate(comp):
    if atom.name == "H":
        print(i, atom)

25 <H pos=([-0.3091  0.5277  0.1875]), 1 bonds, id: 139969154988400>
26 <H pos=([-0.341   0.5484  0.0094]), 1 bonds, id: 139969154988640>
27 <H pos=([-0.2264  0.6584  0.0947]), 1 bonds, id: 139969154988880>
28 <H pos=([-0.3814  0.3151  0.0825]), 1 bonds, id: 139969154989120>
29 <H pos=([-0.3927  0.0728  0.0592]), 1 bonds, id: 139969154989360>
30 <H pos=([-0.3478 -0.0973 -0.1398]), 1 bonds, id: 139969154989600>
31 <H pos=([-0.3983 -0.4721 -0.182 ]), 1 bonds, id: 139969154989840>
32 <H pos=([-0.2203 -0.488  -0.0091]), 1 bonds, id: 139969154989888>
33 <H pos=([-0.0599 -0.3833  0.1459]), 1 bonds, id: 139969155006768>
34 <H pos=([-0.042  -0.1379  0.1607]), 1 bonds, id: 139969155007008>
35 <H pos=([0.1786 0.1162 0.1312]), 1 bonds, id: 139969155007248>
36 <H pos=([0.402  0.0708 0.1773]), 1 bonds, id: 139969155007488>
37 <H pos=([ 0.5421 -0.1239  0.2313]), 1 bonds, id: 139969155007728>
38 <H pos=([ 0.3686 -0.1715  0.2228]), 1 bonds, id: 139969155007968>
39 <H pos=([ 0.4807 -0.2138  0.0875]), 1

In [4]:
view = comp.visualize(show_ports=True)
style = {
                "stick": {"radius": 0.2, "color": "grey"},
                "sphere": {"scale": 0.3, "color" : "black"},
    }
view.setStyle({'model': -1, 'serial':42},style)
view.setStyle({'model': -1, 'serial':32},style)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


<py3Dmol.view at 0x7f4d1bc61640>

In [6]:
chain = Polymer()
chain.add_monomer(compound=comp,
                  indices=[31, 41],
                  separation=.15,
                  replace=True,
                  # orientation = [[0,-1,0],[1,0,0]]
                 )
chain.add_end_groups(compound = cap_o,
                     index = -1,
                     separation=0.15,
                     label="head",
                     duplicate = False
                    )

chain.add_end_groups(compound = cap_n,
                     index = -1,
                     separation=0.15,
                     label="tail",
                     duplicate = False
                    )

chain.build(n=3, sequence='A')

In [7]:
# Set residue labels for polymer
for label in chain.labels["monomer"]:
    label.name = "TRI"
for label in chain.labels["Compound"]:
    label.name = "CAP"


In [8]:
# Rename atoms using mbuild interface
counts = {}
for particle in chain.particles():
    atom_name = particle.name
    if not atom_name in counts.keys():
        counts[atom_name] = 1
    else:
        counts[atom_name] += 1
    particle.name = atom_name + str(counts[atom_name])

In [9]:
# Write to PDB
chain.save("mom_trimer_mbuild.pdb", overwrite = True, residues = ["TRI", "CAP"])

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


# File conversion between `mbuild` and OpenFF

Ideally structures generated from `mbuild` could be directly imported into the OpenFF workflow, however they are not. We have to recover connectivity information and write structures out into `.mol` and `.pdb` files to put into the OpenFF forcefield assignment workflow.

In [5]:
# Load into RDKit to assign bond infomration in PDB file
rdmol = rdkit.Chem.rdmolfiles.MolFromPDBFile("mom_trimer_mbuild.pdb", removeHs=False)
for atom in rdmol.GetAtoms():
    ri = atom.GetPDBResidueInfo()
    ri.SetIsHeteroAtom(False)
rdkit.Chem.rdmolfiles.MolToPDBFile(rdmol, "mom_trimer_mbuild_bonds.pdb")

In [6]:
# OpenBabel to convert PDB to Mol format
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats("pdb", "mol")
mol = openbabel.OBMol()
obConversion.ReadFile(mol, "mom_trimer_mbuild_bonds.pdb")
obConversion.WriteFile(mol, "mom_trimer_mbuild.mol")



True

# OpenFF Parameter Assignment

Now that we have our input `.mol` and `.pdb` files we can assign bonded and non-bonded parameters using OpenFF.

In [15]:
# Load in with OpenFF
tri_mom = Molecule.from_file("mom_trimer_mbuild.mol")

In [16]:
pdbfile = app.PDBFile("mom_trimer_mbuild_bonds.pdb")
omm_topology = pdbfile.topology
omm_topology

<Topology; 1 chains, 5 residues, 153 atoms, 161 bonds>

In [17]:
off_topology = Topology.from_openmm(
    omm_topology, unique_molecules=[tri_mom]
)

In [7]:
# Modified OpenFF to increase maxAtoms for AM1BCC method to 500 atoms
if not os.path.exists('terphenyl_mom_trimer_charges.sdf'):
    tri_mom.assign_partial_charges(partial_charge_method="am1bcc")
    tri_mom.to_file('terphenyl_mom_trimer_charges.sdf', file_format = 'sdf')
else:
    tri_mom = Molecule.from_file('terphenyl_mom_trimer_charges.sdf')

# Submit to BespokeFit Executor

The BespokeFit workflow needs an SDF file with charges to find fragments and assign a bespoke set of force-field parameters for chemical systems. Make sure you have an instance of the Bespoke fit executor running in another terminal by running:

```
BEFLOW_OPTIMIZER_KEEP_FILES=True openff-bespoke executor launch \
    --n-fragmenter-workers 1                                    \
    --n-optimizer-workers  1                                    \
    --n-qc-compute-workers 1                                    \
    --qc-compute-n-cores   1                                    \
    --qc-compute-max-mem   1.5                                  \
    --directory            bespoke-executor
```

These flags determine how BespokeFit distributes processes to perform the bespokefit. The `BEFLOW_OPTIMIZER_KEEP_FILES=True` allows us to look at output files from the fit.

In [19]:
!BEFLOW_OPTIMIZER_KEEP_FILES=True openff-bespoke executor submit \
     --file                 "terphenyl_mom_trimer_charges.sdf" \
     --workflow             "default"                            \
     --default-qc-spec      xtb gfn2xtb none


[92m──────────────────────────────── [0mOpenFF Bespoke[92m ────────────────────────────────[0m

[1;36m1[0m. preparing the bespoke workflow                                               
                                                                                
[?25l[32m⠋[0m loading the molecules
[1A[2K[1m[[0m[32m✓[0m[1m][0m [1;34m1[0m molecules found
[2Kbuilding fitting schemas [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100%[0m [36m0:00:00[0m
[1A[2K[1m[[0m[32m✓[0m[1m][0m fitting schemas generated
                                                                                
[1;36m2[0m. submitting the workflow                                                      
                                                                                
[2Ksubmitting tasks [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100%[0m [36m0:00:00[0m
[1A[2K[1m[[0m[32m✓[0m[1m][0m the following workflows were 

In [14]:
!openff-bespoke executor list


[92m──────────────────────────────── [0mOpenFF Bespoke[92m ────────────────────────────────[0m

The following optimizations were found:
┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃[1m [0m[1mID[0m[1m [0m┃[1m [0m[1mSMILES                                                       [0m[1m [0m┃[1m [0m[1mSTATUS [0m[1m [0m┃
┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
│ 1  │ C[C@H](c1cccc(c1)c2cc(ccc2c3cccc(c3)C(=O)N[C@H](C)c4cccc(c4)c │ [32msuccess[0m │
│    │ 5cc(ccc5c6cccc(c6)C(=O)OC)OC)OC)NC(=O)c7cccc(c7)c8ccc(cc8c9cc │         │
│    │ cc(c9)[C@@H](C)NC(=O)OC(C)(C)C)OC                             │         │
└────┴───────────────────────────────────────────────────────────────┴─────────┘


In [13]:
!openff-bespoke executor watch --id 1


[92m──────────────────────────────── [0mOpenFF Bespoke[92m ────────────────────────────────[0m

[2K[32m⠸[0m fragmenting the moleculele
[1A[2K[1m[[0m[32m✓[0m[1m][0m fragmentation successful
[2K[32m⠸[0m generating bespoke QC datata
[1A[2K[1m[[0m[32m✓[0m[1m][0m qc-generation successful
[2K[32m⠸[0m optimizing the parametersrs
[1A[2K[1m[[0m[32m✓[0m[1m][0m optimization successful


In [15]:
!openff-bespoke executor retrieve --id 1 --output "mom_trimer_bespoke_fit.json" --force-field "openff-2.0.0_bespoke_mom_trimer.offxml"


[92m──────────────────────────────── [0mOpenFF Bespoke[92m ────────────────────────────────[0m

[1m[[0m[32m✓[0m[1m][0m the bespoke fit is finished
                                                                                
outputs have been saved to [95mmom_trimer_bespoke_fit.json[0m                          
                                                                                
                                                                                
the bespoke force field has been saved to [95mopenff-[0m[1;95m2.0[0m[95m.0_bespoke_mom_trimer.offxml[0m
                                                                                


In [10]:
import yaml
from openff.toolkit.topology import Molecule
from utils import display_in_grid, draw_molecule

molecules = [tri_mom]
display_in_grid([draw_molecule(mol, explicit_hydrogens=False) for mol in molecules])

GridBox(children=(Output(),), layout=Layout(grid_template_columns='repeat(auto-fill, 315px)'))