## Using SMIRNOFF with Amber on BRD4:inhibitor complexes: Exporting parameterized complexes to Amber, Gromacs, and CHARMM

This example applies SMIRNOFF-format parameters to BRD4 inhibitors from the [living review on binding free energy benchmark systems](https://www.annualreviews.org/doi/abs/10.1146/annurev-biophys-070816-033654) by Mobley and Gilson. The BRD4 system comes from the [accompanying GitHub repository](https://github.com/MobleyLab/benchmarksets/tree/master/input_files/BRD4).

This example uses [ParmEd](http://parmed.github.io) to take a protein-ligand system parameterized with an alternate force field, and replace the force field used for the ligand with an OpenFF force field. This example is meant to illustrate how to apply parameters to a single ligand, but it's also easy to process many ligands.

In [1]:
# Retrieve protein and ligand files for BRD4 and a docked inhibitor from the benchmark systems GitHub repository
# https://github.com/MobleyLab/benchmarksets
import requests
repo_url = 'https://raw.githubusercontent.com/MobleyLab/benchmarksets/master/input_files/'
sources = {
    'system.prmtop' : repo_url + 'BRD4/prmtop-coords/BRD4-1.prmtop',
    'system.crd'   : repo_url + 'BRD4/prmtop-coords/BRD4-1.crds',
    'ligand.sdf'   : repo_url + 'BRD4/sdf/ligand-1.sdf',
    'system.pdb'   : repo_url + 'BRD4/prmtop-coords/BRD4-1.pdb',
    'ligand.pdb'   : repo_url + 'BRD4/pdb/ligand-1.pdb'
}
for (filename, url) in sources.items():
    r = requests.get(url)
    open(filename, 'w').write(r.text)

In [2]:
#Read AMBER to ParmEd Structure object
import parmed
in_prmtop = 'system.prmtop'
in_crd = 'system.crd'
structure = parmed.amber.AmberParm( in_prmtop, in_crd )


In [3]:
pieces = structure.split()

In [4]:
help(pieces[0][0].split)

Help on method split in module parmed.structure:

split() method of parmed.amber._amberparm.AmberParm instance
    Split the current Structure into separate Structure instances for each
    unique molecule. A molecule is defined as all atoms connected by a graph
    of covalent bonds.
    
    Returns
    -------
    [structs, counts] : list of (:class:`Structure`, list) tuples
        List of all molecules in the order that they appear first in the
        parent structure accompanied by the list of the molecule numbers
        in which that molecule appears in the Structure



We use both a PDB file and an SDF file for the ligand.

In [19]:
# Create an OpenFF Molecule object from the ligand SDF fiel
from openforcefield.topology import Molecule, Topology
ligand_off_molecule = Molecule('ligand.sdf')
# Give all atoms unique names so we can export to GROMACS
#for idx, atom in enumerate(ligand_off_molecule.atoms):
#    atom.name = f'{atom.element.symbol}{idx+6}'
# Load the SMIRNOFF-format Parsley force field
from openforcefield.typing.engines.smirnoff import ForceField
force_field = ForceField('openff_unconstrained-1.0.0.offxml')

# Parametrize the ligand molecule by creating a Topology object from it
ligand_system = force_field.create_openmm_system(ligand_off_molecule.to_topology())

In [20]:
# Read in the coordinates of the ligand from the PDB file
from simtk.openmm.app import PDBFile
ligand_pdbfile = PDBFile('ligand.pdb')

# Convert OpenMM System object containing ligand parameters into a ParmEd Structure.
import parmed

# It should be able to work this way -- though this begins with a PDB file which is not as elegant
#ligand_structure = parmed.openmm.load_topology(ligand_pdbfile.topology,
#                                                ligand_system,
#                                                xyz=ligand_pdbfile.positions)

# It should also be able to work this way, which does not use a PDB file
ligand_off_topology = Topology.from_molecules([ligand_off_molecule])
ligand_structure = parmed.openmm.load_topology(ligand_off_topology.to_openmm(),
                                                ligand_system,
                                                xyz=pieces[1][0].positions)

In [21]:
ligand_structure.save('tmp.prmtop', overwrite=True)
ligand_structure.save('tmp.inpcrd', overwrite=True)
ligand_structure.save('tmp.gro', overwrite=True)
ligand_structure.save('tmp.top', overwrite=True)

In [22]:
from openforcefield.topology import Molecule
from openforcefield.topology import Topology
from openforcefield.typing.engines.smirnoff import ForceField




# Check how many atoms in ligand loaded directly
n_atoms_new = 0
for atom in ligand_off_molecule.atoms:
    #Print out elements as a quick check they are in teh same order
    #print(atom.element)
    n_atoms_new +=1
    
parmed_lig, n_copies = pieces[1]

# Check how many atoms in ligand from prmtop file
n_atoms = 0
for atom in parmed_lig:
    # Print elements as quick check
    #print(atom.element)
    n_atoms +=1

print(f'there are {n_atoms_new} atoms in the new ligand')
# Print out error message if number of atoms doesn't match
if n_atoms != n_atoms_new:
    print("Error: Number of atoms in input ligand doesn't match number extracted from prmtop file.")

there are 26 atoms in the new ligand


In [27]:
# Combine receptor and ligand structures
import copy
#Combine the protein and the ligand
#protein_struct = copy.copy(pieces[0][0])
#protein_struct.remake_parm()
#complex_structure = ligand_structure + protein_struct

#protein_struct = stricture.strip(pieces[0][0]/)
#df = pieces[0][0].to_dataframe()
#idx_list = [atom.idx for atom in pieces[0][0].atoms]
#new_struct = structure[idx_list]
complex_structure = parmed.Structure()
complex_structure += pieces[0][0]
print("BEFORE SYSTEM COMBINATION (just protein)")
print("Unique atom names:", set([atom.atom_type.name for atom in complex_structure]))
print("Number of unique atom names:", len(set([atom.atom_type.name for atom in complex_structure])))
print("Number of unique epsilons:", len(set([atom.epsilon for atom in complex_structure])))
print("Number of unique sigmas:", len(set([atom.sigma for atom in complex_structure])))
print()

print("BEFORE SYSTEM COMBINATION (just ligand)")
print("Unique atom names:", set([atom.atom_type.name for atom in ligand_structure]))
print("Number of unique atom names:", len(set([atom.atom_type.name for atom in ligand_structure])))
print("Number of unique epsilons:", len(set([atom.epsilon for atom in ligand_structure])))
print("Number of unique sigmas:", len(set([atom.sigma for atom in ligand_structure])))
print()

complex_structure += ligand_structure
#new_struct.prune_empty_terms()
#new_struct.remake_parm()
#new_struct.recalculate_LJ()
print("AFTER SYSTEM COMBINATION (protein+ligand)")
print("Unique atom names:", set([atom.atom_type.name for atom in complex_structure]))
print("Number of unique atom names:", len(set([atom.atom_type.name for atom in complex_structure])))
print("Number of unique epsilons:", len(set([atom.epsilon for atom in complex_structure])))
print("Number of unique sigmas:", len(set([atom.sigma for atom in complex_structure])))

print(dir(complex_structure))
#print(len(new_struct.LJ_types))
#dir(pieces[0][0])


#Ultimately we'd like to combine the protein and ligand and the water, which would probably be something like this:
#complex_structure = pieces[0][0] + ligand_structure + pieces[2][0]*len(pieces[2][1])

BEFORE SYSTEM COMBINATION (just protein)
Unique atom names: {'CB', 'H', 'S', 'C*', 'SH', 'CN', 'H5', 'C', 'N3', 'O2', 'CC', 'NB', 'O', 'HO', 'HS', 'CA', '2C', 'CO', 'H1', 'H4', 'CT', 'OH', 'CX', 'HC', 'C8', 'N2', 'HP', 'HA', 'NA', 'CR', 'CW', '3C', 'N'}
Number of unique atom names: 33
Number of unique epsilons: 14
Number of unique sigmas: 14

BEFORE SYSTEM COMBINATION (just ligand)
Unique atom names: {'C1', 'C2', 'H3', 'N1', 'H2', 'H1'}
Number of unique atom names: 6
Number of unique epsilons: 5
Number of unique sigmas: 5

AFTER SYSTEM COMBINATION (protein+ligand)
Unique atom names: {'C2', 'CB', 'H', 'S', 'C*', 'SH', 'CN', 'H3', 'H5', 'N1', 'C', 'H2', 'N3', 'O2', 'CC', 'NB', 'O', 'HO', 'HS', 'CA', '2C', 'CO', 'H1', 'H4', 'C1', 'CT', 'OH', 'CX', 'HC', 'C8', 'N2', 'HP', 'HA', 'NA', 'CR', 'CW', '3C', 'N'}
Number of unique atom names: 38
Number of unique epsilons: 19
Number of unique sigmas: 19
['ANGLE_FORCE_GROUP', 'BOND_FORCE_GROUP', 'CMAP_FORCE_GROUP', 'DIHEDRAL_FORCE_GROUP', 'IMPROPER_

In [12]:
# For debug purposes we could try just this
#complex_structure = ligand_structure

# Export the Structure to AMBER files
complex_structure.save('complex.prmtop', overwrite=True)
complex_structure.save('complex.inpcrd', overwrite=True)

# Export the Structure to Gromacs files
complex_structure.save('complex.gro', overwrite=True)
complex_structure.save('complex.top', overwrite=True)

In [38]:
from simtk.openmm import app, unit, LangevinIntegrator
import numpy as np
from parmed.openmm import NetCDFReporter

complex_structure.box_vectors = structure.box_vectors

system = complex_structure.createSystem(nonbondedMethod=app.PME,
                                         nonbondedCutoff=9*unit.angstrom,
                                         constraints=app.HBonds,
                                         rigidWater=True)


In [39]:
integrator = LangevinIntegrator(300*unit.kelvin, 
                                1/unit.picosecond, 
                                0.002*unit.picoseconds)
simulation = app.Simulation(complex_structure.topology, system, integrator)

# The box is about 75 angstroms per side, so add (30, 30, 30) to center the protein
simulation.context.setPositions(complex_structure.positions + np.array([30, 30, 30])*unit.angstrom)

nc_reporter = NetCDFReporter('trajectory.nc', 10)
simulation.reporters.append(nc_reporter)

In [40]:
simulation.minimizeEnergy()
minimized_coords = simulation.context.getState(getPositions=True).getPositions()

In [41]:
simulation.context.setVelocitiesToTemperature(300*unit.kelvin)
simulation.step(100)

