# Computing relative binding free energies with an alchemical approach

In [15]:
import numpy as np
from itertools import product
from atom.functions import make_alchemical_system, compute_mcs_VF2
from dltoolbox import make_psf_from_topology
import openmm as mm
import openmm.unit as unit
import openmm.app as app
from openmm import XmlSerializer
import xml.etree.ElementTree as ET
import pandas as pd
import pickle
import mdtraj
import argparse
import os
from sys import exit
import time
import tqdm
from FastMBAR import FastMBAR

In [2]:
liga_name, ligb_name = "ejm_55", "ejm_50"

liga_topology = app.AmberPrmtopFile(
    f"./structures/water_phase/{liga_name}.prmtop"
).topology

ligb_topology = app.AmberPrmtopFile(
    f"./structures/water_phase/{ligb_name}.prmtop"
).topology

mcs = compute_mcs_VF2(liga_topology, ligb_topology, timeout=5)

In [3]:
lambdas_list = [
    [(1.0, 1.0), (0.0, 0.0)],
    [(0.5, 1.0), (0.0, 0.0)],
    [(0.0, 1.0), (0.0, 0.0)],
    [(0.0, 0.9), (0.0, 0.1)],
    [(0.0, 0.7), (0.0, 0.3)],
    [(0.0, 0.5), (0.0, 0.5)],
    [(0.0, 0.3), (0.0, 0.7)],
    [(0.0, 0.1), (0.0, 0.9)],
    [(0.0, 0.0), (0.0, 1.0)],
    [(0.0, 0.0), (0.5, 1.0)],
    [(0.0, 0.0), (1.0, 1.0)],
]

In [4]:
for phase in ["water", "protein"]:
    envi_prmtop = app.AmberPrmtopFile(
        f"./structures/{phase}_phase/env.prmtop"
    )
    envi_coor = app.PDBFile(
        f"./structures/{phase}_phase/env.pdb"
    ).getPositions()

    envi_system = envi_prmtop.createSystem(
        nonbondedMethod=app.PME,
        nonbondedCutoff=1.0 * unit.nanometer,
        constraints=app.HBonds,
        switchDistance=0.9 * unit.nanometer,
    )
    envi_top = envi_prmtop.topology
    envi_coor = np.array(envi_coor.value_in_unit(unit.nanometer))

    # load prmtop and xyz files
    liga_prmtop = app.AmberPrmtopFile(
        f"./structures/{phase}_phase/{liga_name}.prmtop",
        envi_top.getPeriodicBoxVectors(),
    )

    liga_top = liga_prmtop.topology
    liga_system = liga_prmtop.createSystem(
        nonbondedMethod=app.PME,
        nonbondedCutoff=1.0 * unit.nanometer,
        constraints=app.HBonds,
        switchDistance=0.9 * unit.nanometer,
    )

    ligb_prmtop = app.AmberPrmtopFile(
        f"./structures/{phase}_phase/{ligb_name}.prmtop",
        envi_top.getPeriodicBoxVectors(),
    )


    ligb_top = ligb_prmtop.topology
    ligb_system = ligb_prmtop.createSystem(
        nonbondedMethod=app.PME,
        nonbondedCutoff=1.0 * unit.nanometer,
        constraints=app.HBonds,
        switchDistance=0.9 * unit.nanometer,
    )


    liga_coor = mdtraj.load_xyz(
        f"./structures/{phase}_phase/{liga_name}_aligned.xyz",
        top=mdtraj.Topology.from_openmm(liga_prmtop.topology),
    ).xyz[0]
    ligb_coor = mdtraj.load_xyz(
        f"./structures/{phase}_phase/{ligb_name}_aligned.xyz",
        top=mdtraj.Topology.from_openmm(ligb_prmtop.topology),
    ).xyz[0]


    ligs_coor = [liga_coor, ligb_coor]

    envi_xml = XmlSerializer.serializeSystem(envi_system)
    liga_xml = XmlSerializer.serializeSystem(liga_system)
    ligb_xml = XmlSerializer.serializeSystem(ligb_system)

    envi_et = ET.fromstring(envi_xml)
    liga_et = ET.fromstring(liga_xml)
    ligb_et = ET.fromstring(ligb_xml)
    ligs_et = [liga_et, ligb_et]

    liga_common_atoms = list(mcs.keys())
    ligb_common_atoms = [mcs[i] for i in liga_common_atoms]
    ligs_common_atoms = [liga_common_atoms, ligb_common_atoms]

    os.makedirs(f"./output/{phase}_phase", exist_ok=True)
    with open(
        f"./output/{phase}_phase/lambdas.pkl", "wb"
    ) as f:
        pickle.dump(lambdas_list, f)

    for lambdas in lambdas_list:
        print(lambdas)
        system_xml, top, coor = make_alchemical_system(
            ligs_et,
            [liga_top, ligb_top],
            ligs_common_atoms,
            ligs_coor,
            lambdas,
            envi_et,
            envi_top,
            envi_coor,
        )

        tree = ET.ElementTree(system_xml)
        ET.indent(tree.getroot())
        (elec0, vdw0), (elec1, vdw1) = lambdas
        os.makedirs(
            f"./output/{phase}_phase/sys", exist_ok=True
        )
        tree.write(
            f"./output/{phase}_phase/sys/{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}.xml",
            xml_declaration=True,
            method="xml",
            encoding="utf-8",
        )

        with open(
            f"./output/{phase}_phase/system.xyz", "w"
        ) as f:
            f.write(f"{len(coor)}\n")
            f.write(f"{liga_name}_{ligb_name}\n")
            for atom, xyz in zip(top.atoms(), coor):
                xyz = xyz * 10
                f.write(
                    f"{atom.element.symbol} {xyz[0]:.5f} {xyz[1]:.5f} {xyz[2]:.5f}\n"
                )

        mm.app.PDBFile.writeFile(
            top,
            coor * 10,
            f"./output/{phase}_phase/system.pdb",
            keepIds=True,
        )

        with open(
            f"./output/{phase}_phase/topology.pkl", "wb"
        ) as file_handle:
            pickle.dump(top, file_handle)

        make_psf_from_topology(
            top, f"./output/{phase}_phase/topology.psf"
        )
        


[(1.0, 1.0), (0.0, 0.0)]



single_target_shortest_path_length will return a dict instead of
an iterator in version 3.5


[(0.5, 1.0), (0.0, 0.0)]
[(0.0, 1.0), (0.0, 0.0)]
[(0.0, 0.9), (0.0, 0.1)]
[(0.0, 0.7), (0.0, 0.3)]
[(0.0, 0.5), (0.0, 0.5)]
[(0.0, 0.3), (0.0, 0.7)]
[(0.0, 0.1), (0.0, 0.9)]
[(0.0, 0.0), (0.0, 1.0)]
[(0.0, 0.0), (0.5, 1.0)]
[(0.0, 0.0), (1.0, 1.0)]
[(1.0, 1.0), (0.0, 0.0)]
[(0.5, 1.0), (0.0, 0.0)]
[(0.0, 1.0), (0.0, 0.0)]
[(0.0, 0.9), (0.0, 0.1)]
[(0.0, 0.7), (0.0, 0.3)]
[(0.0, 0.5), (0.0, 0.5)]
[(0.0, 0.3), (0.0, 0.7)]
[(0.0, 0.1), (0.0, 0.9)]
[(0.0, 0.0), (0.0, 1.0)]
[(0.0, 0.0), (0.5, 1.0)]
[(0.0, 0.0), (1.0, 1.0)]


In [5]:
for phase, lambdas in product(['water', 'protein'], lambdas_list):
    (elec0, vdw0), (elec1, vdw1) = lambdas
    lambdas_str = f"{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}"
    print(f"Running simulation for {phase} phase and lambdas {lambdas_str}", flush=True)

    ## deserialize the system
    with open(
        f"./output/{phase}_phase/sys/{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}.xml",
        "r",
    ) as f:
        system = mm.XmlSerializer.deserialize(f.read())

    ## add barostat
    system.addForce(mm.MonteCarloBarostat(1 * unit.atmospheres, 298.15 * unit.kelvin))

    with open(f"./output/{phase}_phase/topology.pkl", "rb") as f:
        topology = pickle.load(f)

    pdb = app.PDBFile(f"./output/{phase}_phase/system.pdb")

    integrator = mm.LangevinMiddleIntegrator(
        298.15 * unit.kelvin, 1.0 / unit.picosecond, 0.002 * unit.picoseconds
    )
    platform = mm.Platform.getPlatformByName("CUDA")
    simulation = app.Simulation(topology, system, integrator, platform)

    simulation.context.setPositions(pdb.positions)
    print("Minimizing energy", flush=True)
    simulation.minimizeEnergy()

    print("Equilibrating", flush=True)
    simulation.step(1_000)
    #simulation.step(100_000)

    os.makedirs(f"./output/{phase}_phase/traj", exist_ok=True)
    simulation.reporters.append(
        app.DCDReporter(
            f"./output/{phase}_phase/traj/{lambdas_str}.dcd",
            1_000,
            append=False,
        )
    )

    print("Running simulation", flush=True)
    start_time = time.time()
    simulation.step(100_000)
    #simulation.step(12_500_000)

    simulation.saveCheckpoint(
        f"./output/{phase}_phase/traj/{lambdas_str}.chk"
    )

    print(f"Simulation finished in {time.time() - start_time:.2f} seconds", flush=True)

Running simulation for water phase and lambdas 1.00_1.00_0.00_0.00
Minimizing energy
Equilibrating
Running simulation
Simulation finished in 23.06 seconds
Running simulation for water phase and lambdas 0.50_1.00_0.00_0.00
Minimizing energy
Equilibrating
Running simulation
Simulation finished in 23.15 seconds
Running simulation for water phase and lambdas 0.00_1.00_0.00_0.00
Minimizing energy
Equilibrating
Running simulation
Simulation finished in 23.54 seconds
Running simulation for water phase and lambdas 0.00_0.90_0.00_0.10
Minimizing energy
Equilibrating
Running simulation
Simulation finished in 23.21 seconds
Running simulation for water phase and lambdas 0.00_0.70_0.00_0.30
Minimizing energy
Equilibrating
Running simulation
Simulation finished in 23.65 seconds
Running simulation for water phase and lambdas 0.00_0.50_0.00_0.50
Minimizing energy
Equilibrating
Running simulation
Simulation finished in 23.29 seconds
Running simulation for water phase and lambdas 0.00_0.30_0.00_0.70
Min

In [9]:
def compute_energy(phase, lambdas):
    (elec0, vdw0), (elec1, vdw1) = lambdas
    lambdas_str = f"{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}"

    ## deserialize the system
    with open(
        f"./output/{phase}_phase/sys/{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}.xml",
        "r",
    ) as f:
        system = mm.XmlSerializer.deserialize(f.read())

    with open(f"./output/{phase}_phase/topology.pkl", "rb") as f:
        topology = pickle.load(f)
    topology = mdtraj.Topology.from_openmm(topology)

    pdb = app.PDBFile(f"./output/{phase}_phase/system.pdb")

    integrator = mm.LangevinMiddleIntegrator(
        298.15 * unit.kelvin, 1.0 / unit.picosecond, 0.002 * unit.picoseconds
    )
    kbT = 298.15 * unit.kelvin * unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA

    platform = mm.Platform.getPlatformByName("CPU")
    simulation = app.Simulation(topology, system, integrator, platform)

    start_time = time.time()

    ## load trajectories
    reduced_u = []
    for lambdas in lambdas_list:
        (elec0, vdw0), (elec1, vdw1) = lambdas
        lambdas_str_traj = f"{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}"
        traj = mdtraj.load_dcd(
            f"./output/{phase}_phase/traj/{lambdas_str_traj}.dcd",
            top=topology,
            stride = 1
        )
        
        reduced_u.append([])
        for xyz, unit_cell_vectors in zip(traj.xyz, traj.unitcell_vectors):
            simulation.context.setPositions(xyz)
            simulation.context.setPeriodicBoxVectors(*unit_cell_vectors)
            u = simulation.context.getState(getEnergy=True).getPotentialEnergy() / kbT
            reduced_u[-1].append(u)

    reduced_u = np.array(reduced_u)

    print(f"Time taken: {time.time() - start_time}", flush=True)

    return reduced_u

In [11]:
for phase, lambdas in product(['water', 'protein'], lambdas_list):
    print(f"Computing energies for {phase} phase and lambdas {lambdas}", flush=True)
    u = compute_energy(phase, lambdas)
    os.makedirs(f"./output/{phase}_phase/reduced_potentials", exist_ok=True)
    (elec0, vdw0), (elec1, vdw1) = lambdas
    lambdas_str = f"{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}"
    with open(f"./output/{phase}_phase/reduced_potentials/{lambdas_str}.pkl", "wb") as f:
        pickle.dump(u, f)    

Computing energies for water phase and lambdas [(1.0, 1.0), (0.0, 0.0)]
Computed energies for 1.00_1.00_0.00_0.00
Computed energies for 0.50_1.00_0.00_0.00
Computed energies for 0.00_1.00_0.00_0.00
Computed energies for 0.00_0.90_0.00_0.10
Computed energies for 0.00_0.70_0.00_0.30
Computed energies for 0.00_0.50_0.00_0.50
Computed energies for 0.00_0.30_0.00_0.70
Computed energies for 0.00_0.10_0.00_0.90
Computed energies for 0.00_0.00_0.00_1.00
Computed energies for 0.00_0.00_0.50_1.00
Computed energies for 0.00_0.00_1.00_1.00
Time taken: 44.98479080200195
Computing energies for water phase and lambdas [(0.5, 1.0), (0.0, 0.0)]
Computed energies for 1.00_1.00_0.00_0.00
Computed energies for 0.50_1.00_0.00_0.00
Computed energies for 0.00_1.00_0.00_0.00
Computed energies for 0.00_0.90_0.00_0.10
Computed energies for 0.00_0.70_0.00_0.30
Computed energies for 0.00_0.50_0.00_0.50
Computed energies for 0.00_0.30_0.00_0.70
Computed energies for 0.00_0.10_0.00_0.90
Computed energies for 0.00_0

In [25]:
## read potential energy
deltaF = {}
for phase in ["water", "protein"]:
    us = []
    for lambdas in lambdas_list:
        (elec0, vdw0), (elec1, vdw1) = lambdas
        lambdas_str = f"{elec0:.2f}_{vdw0:.2f}_{elec1:.2f}_{vdw1:.2f}"

        with open(
            f"./output/{phase}_phase/reduced_potentials/{lambdas_str}.pkl",
            "rb",
        ) as f:
            u = pickle.load(f)
            u = u.reshape(-1)
            us.append(u)
    us = np.array(us)

    num_conf = np.array([us.shape[1] // us.shape[0] for _ in range(us.shape[0])])
    fastmbar = FastMBAR(us, num_conf, verbose=True, method="L-BFGS-B")
    kbT = 298.15 * unit.kelvin * unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA
    kbT = kbT.value_in_unit(unit.kilocalorie_per_mole)
    deltaF[phase] = fastmbar.DeltaF[0, -1].item()*kbT

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =           10     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  3.80788D+04    |proj g|=  1.58422D-01

At iterate    1    f=  3.80776D+04    |proj g|=  7.61257D-02

At iterate    2    f=  3.80755D+04    |proj g|=  7.88632D-02

At iterate    3    f=  3.80710D+04    |proj g|=  9.07229D-02

At iterate    4    f=  3.80640D+04    |proj g|=  9.09091D-02

At iterate    5    f=  3.80638D+04    |proj g|=  1.02706D-01

At iterate    6    f=  3.80636D+04    |proj g|=  1.30909D-01

At iterate    7    f=  3.80631D+04    |proj g|=  9.09091D-02

At iterate    8    f=  3.80618D+04    |proj g|=  1.36179D-01

At iterate    9    f=  3.80609D+04    |proj g|=  1.19421D-01

At iterate   10    f=  3.80605D+04    |proj g|=  9.56204D-02

At iterate   11    f=  3.80603D+04    |proj g|=  8.84397D-02

At iterate   12    f=  3.80600D+04    |proj g|=  8.58838D-02

At iterate   13    f=  3.8

 This problem is unconstrained.
 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =           10     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  3.10728D+05    |proj g|=  1.49601D-01

At iterate    1    f=  3.10728D+05    |proj g|=  1.33000D-01

At iterate    2    f=  3.10726D+05    |proj g|=  8.44995D-02

At iterate    3    f=  3.10723D+05    |proj g|=  8.96321D-02

At iterate    4    f=  3.10713D+05    |proj g|=  1.20753D-01

At iterate    5    f=  3.10713D+05    |proj g|=  1.49063D-01

At iterate    6    f=  3.10712D+05    |proj g|=  9.09091D-02

At iterate    7    f=  3.10711D+05    |proj g|=  1.67054D-01

At iterate    8    f=  3.10711D+05    |proj g|=  1.43710D-01

At iterate    9    f=  3.10711D+05    |proj g|=  9.09091D-02

At iterate   10    f=  3.10709D+05    |proj g|=  9.07550D-02

At iterate   11    f=  3.10709D+05    |proj g|=  9.04509D-02

At iterate   12    f=  3.10708D+05    |proj g|=  2.26355D-01

At iterate   13    f=  3.1

In [28]:
ddF = deltaF['protein'] - deltaF['water']
print(f"ddG: {ddF:.2f} kcal/mol")

ddG: 0.98 kcal/mol


In [30]:
kbT

0.592484949713764