In [None]:
# ------------------------------------------------

import os

import DTC_compchem_practical

#source /etc/os-release && echo $PRETTY_NAME
with open('/etc/os-release') as fh:
  for line in fh:
    if line and line[0] != '#':
      os.environ[line.split('=')[0]] = line.split('=')[1]
print(f'Running {os.environ["PRETTY_NAME"]}') # on {os.environ["HOST"]} as {os.environ["USER"]}')

# ------------------------------------------------
from IPython.display import display, clear_output
import time, sys
tick = time.time()
# Install mamba without resetting the kernel alla condacolab
!wget -qnc https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh
!bash Mambaforge-Linux-x86_64.sh -bfp /usr/local
sys.path.append('/usr/local/lib/python3.10/site-packages')
!mamba config --set auto_update_conda false
!mamba install -y -c omnia -c conda-forge openmm openmmtools openff-toolkit openmmforcefields pdbfixer
!pip install -q py3Dmol
!pip install -q rdkit
!pip install -q simtk
!pip install -q plotly
!pip install -q git+https://github.com/matteoferla/DTC-compchem-practical.git
tock = time.time()
clear_output()
print(f'Installation time: {tock - tick}')

# Let's look at the basics of openMM

In [None]:
from IPython.display import clear_output, display
import copy
from pathlib import Path
import requests
import numpy as np
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
import mdtraj as md
import openmm as mm
import openmm.app as mma
import openmm.unit as mmu
# nomenclature idea copied from FEGrow:
from openff.toolkit.topology import Molecule as OFFMolecule
from openff.toolkit.topology import Topology as OFFTopology
from openmmforcefields.generators import SMIRNOFFTemplateGenerator, GAFFTemplateGenerator

In [None]:
# First we need to read in a molecule
# We will use that from tutorial 1

import pkg_resources, io

template_block: str = pkg_resources.resource_string('DTC_compchem_practical', 'data/mac1-stripped.pdb').decode()

with open('template.pdb', 'w') as fh:
  fh.write(template_block)

pdb = mma.PDBFile('template.pdb')

# Alt way:
# pdb = PDBFile(io.StringIO(template_block))

Both PDBFile (the IO for PDB files) and Modeller (the builder) have a `.topology` and `.positions` attributes.

> What do they look like and what are they describing? (remember `dir` and `type`)
(An OpenMM Quantity like a Pint Quantity. It has a value and a unit. `_value` holds the actual numpy array)

👾👾👾

### Picking a forcefield

A page on the web says this `forcefield = mma.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')`

But we will use this as we will use implicit water: `forcefield = mma.ForceField('amber14-all.xml', 'implicit/gbn2.xml')`

> What is the difference? And why can't use vacuum? I read on Reddit water is an intersubjective construct...
👾👾👾

If you mix solvents, you have mixed solvent MD (good for guessing how hydrophobics and polars will interact). A Martini model is a coarse grain model, which —007 joke redacted as it was too terrible.

If we were to use TIP3 waterbox we would need to add water:

`modeller.addSolvent(pdb.getTopology().getUnitCellDimensions(), model='tip3p', padding=1.0*mmu.nanometers, ionicStrength=0.15*mmu.molar)`

In [None]:
forcefield = mma.ForceField('amber14-all.xml', 'implicit/gbn2.xml')

# Create system
# This class represents a molecular system. It stores the topology, the list of forces, and the list of particles.
# But not their positions!
system: mm.System = forcefield.createSystem(pdb.topology)

print('Oh no! The system has a problem!')

> What did we do wrong? Take a guess!
👾👾👾

In [None]:
# Spot on: we forgot to add hydrogen atoms!
modeller = mma.Modeller(pdb.topology, pdb.positions)
modeller.addHydrogens()
forcefield = mma.ForceField('amber14-all.xml', 'implicit/gbn2.xml')
system: mm.System = forcefield.createSystem(modeller.topology)
print('This system has the following forces:')
force: mm.Force
for force in system.getForces():
  print(force.__class__.__name__)

# Create an integrator as if you were to do MD...
temperature = (25+273.15) * unit.kelvin  # this is actual temperature for entropy not MCMC
friction_coefficient = 1 / unit.picosecond
timestep = 2 * unit.femtosecond
integrator: mm.Integrator = mm.LangevinIntegrator(temperature, friction_coefficient, timestep)
# Create a context
# A Context stores the complete state of a simulation.
context = mm.Context(system, integrator)

# Set the positions
context.setPositions(modeller.positions)

# Compute the energy
# A State object records a snapshot of the current state of a simulation at a point in time.
state: mm.State = context.getState(getEnergy=True)
print(f'The system has {state.getPotentialEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol in potential energy')
print(f'The system has {state.getKineticEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol in kinitic energy')


> What is this Integrator thing? (cf. http://docs.openmm.org/latest/userguide/theory/04_integrators.html)

👾👾👾

> What are these forces? (cf. http://docs.openmm.org/latest/userguide/theory/02_standard_forces.html and theory page in repo)

👾👾👾

In the above, we skipped the `Platform` object. A Platform defines an implementation of all the kernels needed to perform some calculation. But we are not going to deal with CUDA drivers & co.


> 12,000 kcal/mol is a lot of energy. What is going on?

👾👾👾

Bingo. We did not energy minimise for our forcefield so it is all over the place.
Coot and similar tools for crystallography are good but they do cause energy losses for the perfect position.

### energy minimise

`mm.LocalEnergyMinimizer.minimize(context)` is one way to do it.

The other way does not use the context but a new object

```python
# Create a Simulation object
simulation = mma.Simulation(modeller.topology, system, integrator)
simulation.context.setPositions(modeller.positions)
simulation.minimizeEnergy()
```

Obviously we don't have all day, so won't and use one prepared earlier (`mac1-stripped.min.pdb`).
But do energy minimise your templates!

In [None]:
# Get the energy of this protein!

👾👾👾

In [None]:
# Now let do a simulation of a small 10 steps
integrator: mm.Integrator = mm.LangevinIntegrator(temperature, friction_coefficient, timestep)
simulation = mma.Simulation(pdb.topology, system, integrator)
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy()
simulation.step(5)

state: mm.State = similation.context.getState(getEnergy=True)
print(f'The system has {state.getPotentialEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol in potential energy')
print(f'The system has {state.getKineticEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol in kinitic energy')

Now, let's get back to basics.

# Lenard-Jones

> We saw the Lenard-Jones potential. What does it do and look like?

👾👾👾

In [None]:
LJpair: mmt.testsystems.TestSystem = mmt.testsystems.LennardJonesPair()
system: mm.System = LJpair.system

print('This system has the following forces:')
force: mm.Force
for force in LJpair.system.getForces():
  print(force.__class__.__name__)

print('This system has the following atoms:')
atom: mma.topology.Atom
for atom in LJpair.topology.atoms():
  print(atom.index, atom.element.name)

# To calculate the energy we need some things set up first
# even if we arent integrating system in time
# Create an integrator (simple Langevin integrator in this example)
temperature = (25+273.15) * mmu.kelvin  # this is actual temperature for entropy not MCMC
friction_coefficient = 1 / mmu.picosecond
timestep = 2 * mmu.femtosecond
integrator: mm.Integrator = mm.LangevinIntegrator(temperature, friction_coefficient, timestep)
context = mm.Context(LJpair.system, integrator)
context.setPositions(LJpair.positions)

# Compute the energy
state: mm.State = context.getState(getEnergy=True)
print(f'The system has {state.getPotentialEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol')
print(f'The system has {state.getKineticEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol')

position: npt.NDArray[np.float32] = LJpair.positions._value
distance: np.float32 = np.linalg.norm(position[0,:] - position[1,:])
print(f'the distance of the two atoms ins {distance:.2f} Å')

> Why is the kinetic energy zero? Even if we _set_ an integrator in Langevin dynamics?

👾👾👾

In [None]:
# Let's do a simple experiment to test this

from typing import Dict
scores: Dict[float, float] = {}

for i in np.arange(0., 10., 0.01):
  position[1,0] = i
  context.setPositions(LJpair.positions)  # it can accept Quantity or numpy
  state: mm.State = context.getState(getEnergy=True)
  energy: mmu.Quantity = state.getPotentialEnergy()
  scores[float(i)] = float(energy.value_in_unit(mmu.kilocalorie_per_mole))

import plotly.express as px
import pandas as pd

fig = px.scatter(pd.DataFrame(list(scores.items()), columns=['distance', 'potential']), 'distance', 'potential')
fig.update_layout(yaxis_range=[-20,20])
fig.show()

In [None]:
# Let's get back to our protein and its hit

hit_block: str = dtc.get_data('QRU.mol')
# Hit as a rdkit object
hit: Chem.Mol = Chem.MolFromMolBlock(hit_block)

# But something is wrong!
dtc.get_mols_view(whiteCarbon=hit).show()

> What did we forget?

👾👾👾

In [None]:
# Yes, hydrogens!

hit_h = AllChem.AddHs(hit)

dtc.get_mols_view(whiteCarbon=hit_h).show()

Ops. We did it wrong. `AllChem.AddHs` did not add coordinates.
`help(AllChem.AddHs)` will say what it is...

In [None]:
# Hit as a OpenFF object
off_hit = OFFMolecule.from_rdkit(hit_h, allow_undefined_stereo=True)
# Now as a OpenMM Modeller one
hit_topo: mma.Topology = OFFTopology.from_molecules([off_hit]).to_openmm()
hit_pos: mmu.Quantity = off_hit.conformers[0].to_openmm()
omm_hit = mma.Modeller(hit_topo, hit_pos)
# If we hadn't added the hydrogen in RDKit `modeller.addHydrogens()` will have worked too as topology will be with bond order.
holo = mma.Modeller(pdb.topology, pdb.positions)
holo.add(omm_hit.topology, omm_hit.positions)

## What about the forcefield?
It has bond order, but how does it behave?
It needs to be parameterised for the forcefield. In Amber and co, these files were XML and you see those around.
Here we can use the Amber GAFF or Smirnoff amongst others. First let's look at GAFF2 FF.

In OpenFF we can do `gaff = GAFFTemplateGenerator(molecules=off_hit, forcefield='gaff-2.11')`, but need to do a lot of faff to get the atom types. Instead I did the following ancient line to covert the molecules
`antechamber -i hQRU.mol -fi mdl -o QRU.mol2 -fo mol2 -at gaff2 -nc -1 -rn QRU -c gas` (antechamber is the prep tool for Amber forcefields). The output file is the following:

  @<TRIPOS>MOLECULE
  QRU
     49    52     1     0     0
  SMALL
  gas


  @<TRIPOS>ATOM
        1 C1         -49.7390   -29.0190     6.5240 ca         1 QRU       0.033935
        2 C2         -49.8310   -29.1070     4.1400 ca         1 QRU      -0.047811
        3 C3         -50.4970   -29.0720     5.3590 ca         1 QRU      -0.039894
        4 C4         -48.4450   -29.0970     4.1170 ca         1 QRU       0.037172
        5 C5         -47.7820   -29.1950     2.7490 c          1 QRU       0.243385
        6 C6         -45.6900   -29.6890     1.4750 c5         1 QRU       0.056647
        7 C7         -45.5240   -28.3580     0.7540 ca         1 QRU      -0.021214
        8 C8         -46.4620   -27.6160     0.0280 ca         1 QRU      -0.056682
        9 C9         -51.7210   -29.1020     8.1160 c          1 QRU       0.302439
       10 C10        -44.1990   -31.1670     2.8010 c          1 QRU       0.047014
       11 C11        -44.2750   -30.1910     1.6370 c5         1 QRU       0.025300
       12 C12        -43.4330   -28.9220     1.8000 c5         1 QRU      -0.014732
       13 C13        -44.2260   -27.9000     0.9460 ca         1 QRU      -0.012122
       14 C14        -43.8630   -26.6710     0.3850 ca         1 QRU       0.074792
       15 C15        -44.8040   -25.9500    -0.3380 ca         1 QRU      -0.031772
       16 C16        -46.0890   -26.4110    -0.5260 ca         1 QRU      -0.059311
       17 C17        -53.3120   -29.2480    10.0250 cx         1 QRU       0.024027
       18 C18        -53.3800   -29.1550    11.5440 cx         1 QRU      -0.032870
       19 C19        -53.6510   -30.4910    10.8480 cx         1 QRU      -0.032870
       20 C20        -48.3570   -29.0240     6.5000 ca         1 QRU      -0.039894
       21 C21        -47.6950   -29.0760     5.2880 ca         1 QRU      -0.047811
       22 N1         -46.3610   -29.5050     2.7430 ns         1 QRU      -0.303729
       23 N2         -50.3150   -29.0460     7.8380 ns         1 QRU      -0.269209
       24 N3         -51.9570   -29.2160     9.5120 ns         1 QRU      -0.296183
       25 O1         -43.7810   -30.7670     3.9190 o          1 QRU      -0.549767
       26 O2         -42.5780   -26.1380     0.5300 oh         1 QRU      -0.360650
       27 O3         -52.5830   -29.0310     7.3190 o          1 QRU      -0.253965
       28 O4         -48.4100   -29.1260     1.7430 o          1 QRU      -0.271935
       29 O5         -44.5460   -32.3730     2.6250 o          1 QRU      -0.549767
       30 H1         -50.4000   -29.1420     3.1990 ha         1 QRU       0.063171
       31 H2         -51.5960   -29.0860     5.4020 ha         1 QRU       0.064288
       32 H3         -46.3160   -30.4080     0.9260 h1         1 QRU       0.056249
       33 H4         -47.4880   -27.9910    -0.0990 ha         1 QRU       0.062650
       34 H5         -43.8980   -30.7720     0.7820 hc         1 QRU       0.039121
       35 H6         -43.3810   -28.6090     2.8530 hc         1 QRU       0.032462
       36 H7         -42.3840   -29.0410     1.4900 hc         1 QRU       0.032462
       37 H8         -44.5150   -24.9820    -0.7740 ha         1 QRU       0.065046
       38 H9         -46.8100   -25.8230    -1.1130 ha         1 QRU       0.062380
       39 H10        -53.7890   -28.7270     9.1820 h1         1 QRU       0.050420
       40 H11        -52.4580   -29.0160    12.1280 hc         1 QRU       0.028620
       41 H12        -53.9290   -28.5260    12.2610 hc         1 QRU       0.028620
       42 H13        -54.6700   -30.9040    10.8270 hc         1 QRU       0.028620
       43 H14        -53.2410   -31.5090    10.9280 hc         1 QRU       0.028620
       44 H15        -47.7870   -28.9870     7.4400 ha         1 QRU       0.064288
       45 H16        -46.5960   -29.1000     5.2500 ha         1 QRU       0.063171
       46 H17        -45.8410   -29.5920     3.6280 hn         1 QRU       0.150365
       47 H18        -49.6710   -29.0230     8.6410 hn         1 QRU       0.157352
       48 H19        -51.1610   -29.2760    10.1630 hn         1 QRU       0.151314
       49 H20        -42.3550   -25.2650     0.1200 ho         1 QRU       0.218261
  @<TRIPOS>BOND
       1     1     3 ar
       2     2     3 ar
       3     2     4 ar
       4     4     5 1
       5     6     7 1
       6     7     8 ar
       7     6    11 1
       8    11    10 1
       9    11    12 1
      10     7    13 ar
      11    12    13 1
      12    13    14 ar
      13    14    15 ar
      14     8    16 ar
      15    15    16 ar
      16    17    18 1
      17    17    19 1
      18    18    19 1
      19     1    20 ar
      20     4    21 ar
      21    20    21 ar
      22     5    22 1
      23     6    22 1
      24     1    23 1
      25     9    23 1
      26     9    24 1
      27    17    24 1
      28    10    25 1
      29    14    26 1
      30     9    27 2
      31     5    28 2
      32    10    29 1
      33     2    30 1
      34     3    31 1
      35     6    32 1
      36     8    33 1
      37    11    34 1
      38    12    35 1
      39    12    36 1
      40    15    37 1
      41    16    38 1
      42    17    39 1
      43    18    40 1
      44    18    41 1
      45    19    42 1
      46    19    43 1
      47    20    44 1
      48    21    45 1
      49    22    46 1
      50    23    47 1
      51    24    48 1
      52    26    49 1
  @<TRIPOS>SUBSTRUCTURE
       1 QRU         1 TEMP              0 ****  ****    0 ROOT

In the Atoms block, we have index, name, x, y, z, atom type, residue number, residue name (technically chemical component identifier) and partial charge.
NB. In molecular mechanics atom types has a different meaning that in most deep learning chemistry models, where it is used as a synonym for element symbol.

> What atom types can you see (cf. https://ambermd.org/antechamber/gaff.html#atomtype or paper for GAFF2)?

👾👾👾

> Why use atom types instead of element symbols?

👾👾👾

> Does an `aromatic sp2 C` form trans or cis isomers? What about `aliphatic sp2 C`? Is the hydrogen - heavy atom bond the same length?

👾👾👾

In [None]:
# Having look at the classics, we will move into the modern era
smirnoff = SMIRNOFFTemplateGenerator(molecules=off_hit)
if forcefield_names is None:
    forcefield_names = self.forcefield_names
forcefield = mma.ForceField(*forcefield_names)
forcefield.registerTemplateGenerator(smirnoff.generator)

# Now the molecule will be recognised and we can get the energy of the complex
system: mm.System = forcefield.createSystem(holo.topology)
integrator: mm.Integrator = mm.LangevinIntegrator(temperature, friction_coefficient, timestep)
context = mm.Context(system, integrator)
context.setPositions(holo.positions)
state: mm.State = context.getState(getEnergy=True)
print(f'The system has {state.getPotentialEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol in potential energy')
print(f'The system has {state.getKineticEnergy().value_in_unit(mmu.kilocalorie_per_mole):.1f} kcal/mol in kinitic energy')

This is just the internal energy. In reality to get Gibbs free energy we need entropy.
This requires a simulation in both bound and unbound form. See FEP or dynamic undocking.