In [1]:
import numpy as np
import subprocess as sp
from openeye.oechem import *
from openforcefield.typing.engines.smirnoff import *
from networkx.algorithms import isomorphism

# Load molecules into a list of `OEMol`s

I'm going to start by loading the host and guest SYBYL-formatted `mol2` files into a list of `OEMol`s. I created these SYBYL-formatted files from files containing AM1-BCC charges and GAFF v1.7 Lennard-Jones and bonded parameters:

```
antechamber -i bcd.mol2 -fi mol2 -o bcd-sybyl.mol2 -fo mol2 -at sybyl
antechamber -i ben.mol2 -fi mol2 -o ben-sybyl.mol2 -fo mol2 -at sybyl -dr n 
# Disable `acdoctor` to handle carboxylate group
```

I think it should be possible to read in GAFF-formatted files directly, using a specific forcefield flavor, but I don't see the relvant flavor listed in their [documentation](https://docs.eyesopen.com/toolkits/python/oechemtk/molreadwrite.html#section-molreadwrite-flavoredinputandoutput). A search for "GAFF" comes up empty (except for one blog post). If I try to load the GAFF files without any special flavor, I get many things with atom type "Du," which I surmise to be a dummy atom type. I believe this can be remedied by running the files through `OETriposAtomNames`, but it is another thing that could go wrong, so for these reasons, I believe it is more straight-forward to simply start with standard atom names.

In [144]:
def load_mol2(filename, name=None, add_tripos=True):
    ifs = oemolistream()
    molecules = []
    if not ifs.open(filename):
        print(f'Unable to open {filename} for reading...')
    for mol in ifs.GetOEMols():
        if add_tripos:
            OETriposAtomNames(mol)
        if name:
            mol.SetTitle(name)
        # Add all the molecules in this file to a list, but only return the first one.
        molecules.append(OEMol(mol))
    return molecules[0]

In [145]:
host = load_mol2('original/bcd-sybyl.mol2', name='bcd', add_tripos=False)

In [146]:
guest = load_mol2('original/ben-sybyl.mol2', name='ben', add_tripos=False)

In [147]:
molecules = [host, guest]

Now, I'm going to add water and ions to the molecules list.

In [148]:
def add_waters_and_ions():
    smiles = ['[Na+]', '[Cl-]', 'O']
    molecules = []
    for molecule in smiles:
        molecules.append(process_smiles(molecule))
    return molecules

def process_smiles(string, name=None, add_hydrogens=True, add_tripos=True, charge=True):
    mol = OEMol()
    OESmilesToMol(mol, string)
    if add_hydrogens:
        OEAddExplicitHydrogens(mol)
    if add_tripos:
        OETriposAtomNames(mol)
    if charge:
        for atom in mol.GetAtoms():
            atom.SetPartialCharge(atom.GetFormalCharge())
    if name:
        mol.SetTitle(name)
    return mol

In [149]:
water_and_ions = add_waters_and_ions()

In [150]:
molecules += water_and_ions

At this point, the molecules have been converted to `OEMol`s and they have charges, but the coordinates are not set. We could use a docking procedure like [this](https://github.com/openforcefield/openforcefield/blob/master/examples/host_guest_simulation/smirnoff_host_guest.ipynb) to set the position of the host and guest, or we could take the coordinates from an existing fully solvated system. I'm going to attempt the latter approach, because in general, we are not going to want to use a docking calculation to provide the coordinate for intermediate attach-pull-relase windows, where the host and guest are not  closely bound to each other.

# Read in (existing) coordinates to set the positions of the `OEMol`s