# Core Imports

In [1]:
# Generic Imports
import re
from functools import partial, cached_property
from collections import defaultdict
from itertools import combinations, chain
from ast import literal_eval

# Numeric imports
import pandas as pd
import numpy as np

# File I/O
from pathlib import Path
import csv, json, openpyxl

# Logging
from tqdm import tqdm as tqdm_text
from tqdm.notebook import tqdm as tqdm_notebook

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Generator, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty

# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdChemReactions

from openff.toolkit import ForceField
from openff.toolkit.topology import Topology, Molecule

from openforcefields.openforcefields import get_forcefield_dirs_paths
OPENFF_DIR = Path(get_forcefield_dirs_paths()[0])

# File and chemistry type definitions

In [2]:
topo_dir = Path('Topologies')
topo_dir.mkdir(exist_ok=True)

# lammps_dir = Path('LAMMPS')
lammps_dir = Path('LAMMPS')
lammps_dir.mkdir(exist_ok=True)

omm_dir = Path('OpenMM')
omm_dir.mkdir(exist_ok=True)

In [3]:
# defining reacting functional groups
reaction_pairs = {
    'NIPU' : ('cyclocarbonate', 'amine'),
    'urethane' : ('isocyanate', 'hydroxyl')
}
# chemistries = ('urethane', 'NIPU')
chemistries = [i for i in reaction_pairs.keys()]

# Creating OpenMM and LAMMPS systems

## Generating Interchange dict

In [4]:
# specify forcefield
# ff_name = 'openff-2.0.0.offxml'
ff_name = 'openff_unconstrained-2.0.0.offxml'
ff_path = OPENFF_DIR / ff_name
forcefield = ForceField(ff_path)

# Interchange generation
success_ics = defaultdict(defaultdict)
failed_ics  = defaultdict(list)

for chemistry in chemistries:
    chem_dir = topo_dir / chemistry
    
    progress = tqdm_notebook([path for path in chem_dir.iterdir()]) # unpack into list for progress bar
    for sdf_path in progress:
        mol_name = sdf_path.stem
        progress.set_postfix_str(f'{chemistry} : {mol_name}')

        try:
            offmol = Molecule.from_file(sdf_path, allow_undefined_stereo=True)
            offtop = Topology.from_molecules(offmol) 
            ic = forcefield.create_interchange(offtop, charge_from_molecules=[offmol])
            success_ics[chemistry][mol_name] = ic
        except Exception as e:
            print(e)
            failed_ics[e.__class__.__name__].append(sdf_path)

for err_name, err_list in failed_ics.items():
    for sdf_path in err_list:
        sdf_path.unlink() # delete dud files

  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/61 [00:00<?, ?it/s]

Unable to read molecule from file: Topologies/urethane/urethane_38.sdf
BondHandler was not able to find parameters for the following valence terms:

- Topology indices (476, 2064): names and elements ( H), ( H), 
- Topology indices (504, 2144): names and elements ( H), ( H), 
- Topology indices (620, 2456): names and elements ( H), ( H), 
- Topology indices (525, 2185): names and elements ( H), ( H), 
- Topology indices (536, 2215): names and elements ( H), ( H), 
- Topology indices (184, 1236): names and elements ( H), ( H), 
- Topology indices (616, 2452): names and elements ( H), ( H), 
- Topology indices (647, 2522): names and elements ( H), ( H), 
- Topology indices (964, 3425): names and elements ( H), ( H), 
- Topology indices (806, 2988): names and elements ( H), ( H), 
- Topology indices (157, 1163): names and elements ( H), ( H), 
- Topology indices (641, 2515): names and elements ( H), ( H), 
- Topology indices (727, 2759): names and elements ( H), ( H), 
- Topology indices 

Problematic atoms are:
Atom atomic num: 7, name: , idx: 38, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 35, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 39, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 60, aromatic: False, chiral: False
Atom atomic num: 7, name: , idx: 105, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 102, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 106, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 127, aromatic: False, chiral: False
Atom atomic num: 7, name: , idx: 172, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 169, aromatic: False, chiral: False
bond order: 1, chiral: False to atom

Unable to read molecule from file: Topologies/urethane/urethane_22.sdf
Unable to read molecule from file: Topologies/urethane/urethane_45.sdf
BondHandler was not able to find parameters for the following valence terms:

- Topology indices (260, 1454): names and elements ( H), ( H), 
- Topology indices (484, 2077): names and elements ( H), ( H), 
- Topology indices (129, 1074): names and elements ( H), ( H), 
- Topology indices (914, 3284): names and elements ( H), ( H), 
- Topology indices (297, 1543): names and elements ( H), ( H), 
- Topology indices (639, 2500): names and elements ( H), ( H), 
- Topology indices (537, 2214): names and elements ( H), ( H), 
- Topology indices (431, 1912): names and elements ( H), ( H), 
- Topology indices (517, 2156): names and elements ( H), ( H), 
- Topology indices (158, 1162): names and elements ( H), ( H), 
- Topology indices (910, 3271): names and elements ( H), ( H), 
- Topology indices (590, 2378): names and elements ( H), ( H), 
- Topology i

Problematic atoms are:
Atom atomic num: 7, name: , idx: 23, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 22, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 24, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 31, aromatic: True, chiral: False
Atom atomic num: 7, name: , idx: 69, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 68, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 70, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 77, aromatic: True, chiral: False
Atom atomic num: 7, name: , idx: 115, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 114, aromatic: False, chiral: False
bond order: 1, chiral: False to atom ato

Unable to read molecule from file: Topologies/urethane/urethane_27.sdf
Unable to read molecule from file: Topologies/urethane/urethane_0.sdf


Problematic atoms are:
Atom atomic num: 7, name: , idx: 34, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 33, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 35, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 42, aromatic: True, chiral: False
Atom atomic num: 7, name: , idx: 91, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 90, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 92, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 99, aromatic: True, chiral: False
Atom atomic num: 7, name: , idx: 148, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 147, aromatic: False, chiral: False
bond order: 1, chiral: False to atom ato

Unable to read molecule from file: Topologies/urethane/urethane_30.sdf
Unable to read molecule from file: Topologies/urethane/urethane_11.sdf


## Defining utility functions

In [5]:
from openmm import XmlSerializer
from openmm import System, Context, State
from openmm import Integrator, Force
from openmm.app import Simulation
from openmm.unit import nanometer

from openff.interchange import Interchange
from openff.units import unit as offunit


DEFAULT_STATE_PARAMS : dict[str, bool] = {
    'getPositions'  : True,
    'getVelocities' : True,
    'getForces'     : True,
    'getEnergy'     : True,
    'getParameters' : True,
    'getParameterDerivatives' : False,
    'getIntegratorParameters' : False
}


def serialize_state_and_sys(sim : Simulation, out_dir : Path, out_name : str, state_params : dict[str, bool]=DEFAULT_STATE_PARAMS) -> None:
    '''For saving State and System info of a Simulation to disc'''
    sim_dict = {
        'system' : sim.system,
        'state' : sim.context.getState(**state_params)
    }
    
    for affix, save_data in sim_dict.items():
        save_path = out_dir / f'{out_name}_{affix}.xml'
        save_path.touch()

        with save_path.open('w') as file:
            file.write( XmlSerializer.serialize(save_data) )

def apply_state_to_context(state : State, context : Context) -> None:
    '''For applying saved State data to an existing OpenMM Simulation'''
    context.setPeriodicBoxVectors(*state.getPeriodicBoxVectors())
    context.setPositions(state.getPositions())
    context.setVelocities(state.getVelocities())
    context.setTime(state.getTime())

    context.reinitialize(preserveState=True)    

def load_openmm_system(sys_path : Path, extra_forces : Optional[Union[Force, Iterable[Force]]]=None, sep_force_grps : bool=True, remove_constrs : bool=False) -> System:
    '''Load and configure a serialized OpenMM system, with optional additional parameters'''
    assert(sys_path.suffix == '.xml')
    with sys_path.open('r') as file:
        ommsys = XmlSerializer.deserialize(file.read())

    if extra_forces: # deliberately sparse to handle both Nonetype and empty list
        for force in extra_forces: 
            ommsys.addForce(force)

    if sep_force_grps:
        for i, force in enumerate(ommsys.getForces()):
            force.setForceGroup(i)

    if remove_constrs:
        for i in range(ommsys.getNumConstraints())[::-1]: # need to remove in reverse order to avoid having prior constraints "fall back down"
            ommsys.removeConstraint(i)

    return ommsys

def create_simulation2(interchange : Interchange, integrator : Integrator, forces : Optional[Iterable[Force]]=None,
                        sep_force_grps : bool=True, remove_constrs : bool=True, combine_nonbonded_forces : bool=True) -> Simulation:
    '''Specifies configuration for an OpenMM Simulation - Interchange load alows many routes for creation'''
    openmm_sys = interchange.to_openmm(combine_nonbonded_forces=combine_nonbonded_forces) 
    openmm_top = interchange.topology.to_openmm()
    openmm_pos = interchange.positions.m_as(offunit.nanometer) * nanometer

    if forces: # deliberately sparse to handle both Nonetype and empty list
        for force in forces: 
            openmm_sys.addForce(force)

    if sep_force_grps:
        for i, force in enumerate(openmm_sys.getForces()):
            force.setForceGroup(i)

    if remove_constrs:
        for i in range(openmm_sys.getNumConstraints())[::-1]: # need to remove in reverse order to avoid having prior constraints "fall back down"
            openmm_sys.removeConstraint(i)

    simulation = Simulation(openmm_top, openmm_sys, integrator)
    simulation.context.setPositions(openmm_pos)

    return simulation

## Manually create OpenMM sims from Interchange

In [6]:
# specifying simulation and ensemble parameters
from openff.interchange.components.mdconfig import MDConfig

from openmm.app import Simulation
from openmm import NonbondedForce, CustomNonbondedForce
from openmm import MonteCarloBarostat, LangevinMiddleIntegrator

from openmm.unit import atmosphere, kelvin, nanometer
from openmm.unit import femtosecond, picosecond
from openff.units import unit as offunit

# Box sizes
BOX_VECS = np.eye(3) * 10 * nanometer

# Long-range parameters
# CUTOFF = 2.0 * nanometer
CUTOFF = 0.9 * nanometer
# CUTOFF_METHOD = NonbondedForce.NoCutoff
# CUTOFF_METHOD = NonbondedForce.CutoffNonPeriodic
CUTOFF_METHOD = NonbondedForce.CutoffPeriodic

DISPERSION = True
SWITCHING  = False

# Thermodynamic/integrator parameters
T = 300*kelvin
P = 1*atmosphere

timestep = 2*femtosecond
friction = 1*picosecond**-1

# ======================================

force_names = (
    'vdW',
    'Electrostatic',
    'vdW 1-4',
    'Electrostatic 1-4',
    'Dihedral',
    'Angle',
    'Bond'
)

# looping over all urethanes
omm_sims = defaultdict(defaultdict)
for chemistry, ic_dict in success_ics.items():
    lmp_chem_dir = lammps_dir / chemistry
    lmp_chem_dir.mkdir(exist_ok=True)
    
    omm_chem_dir = omm_dir/ chemistry
    omm_chem_dir.mkdir(exist_ok=True)

    progress = tqdm_notebook(ic_dict.items())
    for mol_name, interchange in progress:
        progress.set_postfix_str(f'{chemistry} : {mol_name}')
        
    # creating OpenMM Simulation
        progress.set_description('Building OpenMM Simulation')
        # specifying thermo/baro to determine ensemble
        integrator = LangevinMiddleIntegrator(T, friction, timestep)
        # extra_forces = [MonteCarloBarostat(P, T, baro_freq)]
        extra_forces = None

        # loading OpenMM sim components from Interchange
        interchange.box = BOX_VECS
        omm_top = interchange.topology.to_openmm()
        omm_sys = interchange.to_openmm(combine_nonbonded_forces=False)
        omm_pos = interchange.positions.m_as(offunit.nanometer)

        ## Setting box vectors for periodic forces
        omm_top.setPeriodicBoxVectors(BOX_VECS)
        omm_sys.setDefaultPeriodicBoxVectors(*BOX_VECS)

        # configuring bound Force objects
        if extra_forces:
            for force in extra_forces:
                omm_sys.addForce(force)

        ## number all forces into separate force groups for separability
        for i, force in enumerate(omm_sys.getForces()):
            force.setForceGroup(i)

        ## Add labels to default forces
        for force, name in zip(omm_sys.getForces(), force_names):
            force.setName(name)

        ## reconfiguring non-bonded forces
        ### Custom nonbonded
        # nonbond_custom = omm_sys.getForce(0)
        # assert(isinstance(nonbond_custom, CustomNonbondedForce))

        # nonbond_custom.setCutoffDistance(CUTOFF)
        # nonbond_custom.setUseSwitchingFunction(SWITCHING)
        # nonbond_custom.setNonbondedMethod(CUTOFF_METHOD)
        # nonbond_custom.setUseLongRangeCorrection(DISPERSION)
 
        # ### Default nonbonded
        # nonbond = omm_sys.getForce(1)
        # assert(isinstance(nonbond, NonbondedForce))

        # nonbond.setCutoffDistance(CUTOFF)
        # nonbond.setNonbondedMethod(CUTOFF_METHOD)
        # nonbond.setUseSwitchingFunction(SWITCHING)
        # nonbond.setUseDispersionCorrection(DISPERSION)

        # create OpenMM Simulation
        sim = Simulation(omm_top, omm_sys, integrator)
        sim.context.setPositions(omm_pos)
        omm_sims[chemistry][mol_name] = sim

    # saving OpenMM files
        progress.set_description('Generating OpenMM files')
        omm_mol_dir = omm_chem_dir / mol_name
        omm_mol_dir.mkdir(exist_ok=True)

        sdf_out_path = omm_mol_dir / f'{mol_name}_topology.sdf'
        sdf_out_path.touch()

        for mol in interchange.topology.molecules: # use OpenFF format for saving Molecules (much more convenient to work with)
            mol.to_file(str(sdf_out_path), file_format=sdf_out_path.suffix[1:])
        serialize_state_and_sys(sim, out_dir=omm_mol_dir, out_name=mol_name)

    # saving LAMMPS files
        progress.set_description('Generating LAMMPS files')
        lmp_mol_dir = lmp_chem_dir / mol_name
        lmp_mol_dir.mkdir(exist_ok=True)

        lmp_path = lmp_mol_dir / f'{mol_name}.lammps'
        lmp_in_path = lmp_mol_dir / f'{mol_name}.in'

        ### creating .lmp file
        lmp = interchange.to_lammps(lmp_path)
        mdc = MDConfig.from_interchange(interchange)
        mdc.write_lammps_input(lmp_in_path)

        ### creating .in file, replacing input file with .lmp from above
        with lmp_in_path.open('r') as in_file:
            in_file_block = in_file.read()

        in_file_block = in_file_block.replace('out.lmp', str(lmp_path))

        with lmp_in_path.open('w') as in_file:
            in_file.write(in_file_block)

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

# Evaluating LAMMPS energies

In [7]:
ENERGY_EVAL_INP = Path('in.urethane') # path to 

E_MAP = {
    'ebond'  : 'Bond',
    'eangle' : 'Angle',
    'edihed' : 'Proper Torsion',
    'eimp'   : 'Improper Torsion',
    'ecoul'  : 'Coulomb Short',
    'elong'  : 'Coulomb Long',
    'evdwl'  : 'vdW',
    'etail'  : 'Dispersion',
    'epair'  : 'Nonbonded',
    'pe'     : 'Potential',
    'ke'     : 'Kinetic',
    'etotal' : 'Total'
}

CELL_KW = ( # keywords for probing unit cell sizes and angles
    'cella',
    'cellb',
    'cellc',
    'cellalpha',
    'cellbeta',
    'cellgamma',
)

In [8]:
def get_calc_lmp_energies(lmp_block : str) -> tuple[str, list[str]]:
    '''Read which thermodynamic energy contributions will be calculated from a LAMMPS input file block'''
    ENERGY_CONTRIB_REGEX = re.compile(r'^thermo_style\s(?P<thermo_style>\b\w*?\b)\s(?P<calc_energies>.*$)')

    for line in lmp_block.split('\n'):
        if (match := re.search(ENERGY_CONTRIB_REGEX, line)):
            groups = match.groupdict()
            return groups['thermo_style'], groups['calc_energies'].split(' ')
    else:
        raise ValueError('No thermo_style energy commands found in input file')

In [9]:
import lammps
from IPython.display import clear_output


records = {}
cell_sizes = {}
for subdir in lammps_dir.iterdir():
    if subdir.is_dir():
        chemistry = subdir.name
        for mol_dir in subdir.iterdir():
            mol_name = mol_dir.stem
            lammps_file = mol_dir / f'{mol_name}.lammps'
            lammps_in   = mol_dir / f'{mol_name}.in'
            
            # craete LAMMPS wrapper and execute input calc
            with lammps.lammps() as lmp: # need to create new lammps() object instance for each run
                # lmp.commands_string( ENERGY_EVAL_STR.replace('$INP_FILE', str(lammps_file)) )
                # lmp.file(str(ENERGY_EVAL_INP))
                lmp.file(str(lammps_in)) # read input file and calculate energies

                ## Getting energies
                with lammps_in.open('r') as in_file:
                    thermo_style, calc_energies = get_calc_lmp_energies(in_file.read())

                energies = {
                    E_MAP[contrib] : lmp.get_thermo(contrib)
                        for contrib in calc_energies
                }

                ## Getting unit cell dimensions
                cell_params = {
                    cp : lmp.get_thermo(cp)
                        for cp in CELL_KW
                }

            # reformatting energies
            energies = {
                f'{contrib} (kcal/mol)' : energy # add units to labels
                    for contrib, energy in energies.items()
            }
            
            # save records for Pandas DataFrames
            records[(chemistry, mol_name)] = energies
            cell_sizes[(chemistry, mol_name)] = cell_params
            clear_output() # wipe lengthy LAMMPS printouts

In [10]:
lmp_table = pd.DataFrame.from_dict(records, 'index')
lmp_table.index.names  = ['Chemistry', 'Molecule'] # ensure index labels are labelled consistently
lmp_table.sort_values('Molecule', inplace=True)
lmp_table.to_csv(lammps_dir/f'{lammps_dir.name}_PEs.csv')

# Evaluating OpenMM energies

## Loading simulations from file

In [11]:
skip = True

# parameters
sep_force_grps : bool = True
remove_constrs : bool = False

# iterate over serialized directory tree and load
if not skip:
    omm_sims = defaultdict(defaultdict)
    for subdir in omm_dir.iterdir():
        if subdir.is_dir():
            chemistry = subdir.name
            for mol_dir in subdir.iterdir():
                mol_name = mol_dir.name

                state_file = mol_dir / f'{mol_name}_state.xml'
                sys_file   = mol_dir / f'{mol_name}_system.xml'
                top_file   = mol_dir / f'{mol_name}_topology.sdf'

                offmol = Molecule.from_file(top_file)
                offtop = Topology.from_molecules(offmol)
                
                integrator = LangevinMiddleIntegrator(T, friction, timestep)
                # extra_forces = [MonteCarloBarostat(P, T, baro_freq)]
                extra_forces = None

                # load and configure System
                omm_top = offtop.to_openmm()
                omm_sys = load_openmm_system(
                    sys_file,
                    extra_forces=extra_forces,
                    sep_force_grps=sep_force_grps,
                    remove_constrs=remove_constrs
                )

                # putting it all together into a Simulation
                sim = Simulation(
                    topology=omm_top,
                    system=omm_sys,
                    integrator=integrator,
                    state=state_file
                )
                omm_sims[chemistry][mol_name] = sim

## Evaluating starting structure energies

In [12]:
from openmm.unit import kilojoule_per_mole, kilocalorie_per_mole

NULL_ENERGY = 0.0*kilojoule_per_mole
PRECISION : int = 4

data_dicts = []
for chemistry, mol_dict in omm_sims.items():
    progress = tqdm_notebook(mol_dict.items())
    for mol_name, sim in progress:
        progress.set_postfix_str(f'{chemistry} : {mol_name}')
        
        # extract total and component energies from OpenMM force groups
        data_dict = {
            'Chemistry' : chemistry,
            'Molecule'  : mol_name
        }
        omm_energies = {}

        ## Total Potential
        overall_state = sim.context.getState(getEnergy=True) # get total potential energy
        PE = overall_state.getPotentialEnergy()
        omm_energies['Potential'] = PE

        ## Total Kinetic (to verify no integration is being done)
        KE = overall_state.getKineticEnergy()
        omm_energies['Kinetic'] = KE
        assert(KE == NULL_ENERGY)

        ## Individual force contributions
        for i, force in enumerate(sim.system.getForces()):
            state = sim.context.getState(getEnergy=True, groups={i})
            omm_energies[force.getName()] = state.getPotentialEnergy()

        # reformat to desired units and precision
        omm_energies_kcal = {}
        for contrib_name, energy_kj in omm_energies.items():
            energy_kcal = energy_kj.in_units_of(kilocalorie_per_mole)
            omm_energies_kcal[f'{contrib_name} ({energy_kcal.unit.get_symbol()})'] = round(energy_kcal._value, PRECISION)

        # compile data
        data_dict = {**data_dict, **omm_energies_kcal}
        data_dicts.append(data_dict)

omm_table = pd.DataFrame.from_records(data_dicts)
omm_table.sort_values('Molecule', inplace=True)
omm_table.set_index(['Chemistry', 'Molecule'], inplace=True)
omm_table.to_csv(omm_dir / f'{omm_dir.name}_PEs.csv')

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

# Comparing energies

## Loading energy tables and comparing contributions

In [13]:
pd.options.display.float_format = '{:.4f}'.format # disable scientific notation

@dataclass
class TableFormats:
    table_key : str
    sum_terms : dict[str, list[str]]
    del_terms : list[str]

omm_formats = TableFormats(
    table_key = omm_dir.stem,
    sum_terms = {
        'vdW (kcal/mol)' : ['vdW (kcal/mol)', 'vdW 1-4 (kcal/mol)'],
        'Coulomb (kcal/mol)' : ['Electrostatic (kcal/mol)', 'Electrostatic 1-4 (kcal/mol)']
    },
    del_terms = ['Kinetic (kcal/mol)']
)

lmp_formats = TableFormats(
    table_key = lammps_dir.stem,
    sum_terms = {
        'vdW (kcal/mol)' : ['vdW (kcal/mol)', 'Dispersion (kcal/mol)'],
        'Dihedral (kcal/mol)' : ['Proper Torsion (kcal/mol)', 'Improper Torsion (kcal/mol)'],
        'Coulomb (kcal/mol)' : ['Coulomb Short (kcal/mol)', 'Coulomb Long (kcal/mol)']
    },
    del_terms = ['Nonbonded (kcal/mol)']
)

# apply reformatting to respective tables
for fmt in (omm_formats, lmp_formats):
    table_in_path  = Path(fmt.table_key) / f'{fmt.table_key}_PEs.csv'
    table_out_path = Path(fmt.table_key) / f'{fmt.table_key}_PEs_processed.csv'
    table = pd.read_csv(table_in_path, index_col=(0, 1)).sort_index(axis=1)

    # combine selected terms
    for combined_contrib, contribs in fmt.sum_terms.items():
        new_term = sum(
            table[contrib]
                for contrib in contribs
        ) # merge contributions into a single new named term
        table.drop(columns=contribs, inplace=True) # clear contributions
        table[combined_contrib] = new_term # done after drop to ensure name clashes don;t result in extra deletion
    
    # delete redundant terms
    for del_contrib in fmt.del_terms:
        table.drop(columns=[del_contrib], inplace=True) # clear contributions

    globals()[f'{fmt.table_key.lower()}_table'] = table
    table.to_csv(table_out_path)

In [14]:
openmm_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Angle (kcal/mol),Bond (kcal/mol),Dihedral (kcal/mol),Potential (kcal/mol),vdW (kcal/mol),Coulomb (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
NIPU,NIPU_10,1464.979,1177.0361,315.7242,43627.066,41702.7698,-1033.4404
NIPU,NIPU_11,940.1827,1039.7645,276.4011,3418594.7646,3416824.3483,-485.4893
NIPU,NIPU_12,936.3028,1033.4167,300.9877,2129.6208,698.2564,-839.3429
NIPU,NIPU_2,1725.102,1050.6371,390.4701,64613163.8853,64611236.6278,-1235.0015
NIPU,NIPU_8,994.7223,939.9698,312.5088,2994.6808,1173.0706,-425.5897
NIPU,NIPU_9,953.5693,1030.6256,281.7358,576201.7948,574282.4734,-346.5743
urethane,urethane_1,725.4003,841.3459,156.821,3775.9851,3169.3338,-1116.916
urethane,urethane_10,1070.3106,633.0817,465.9121,1843.4328,501.4488,-827.3203
urethane,urethane_12,741.1011,936.4742,210.015,17941.8348,17671.0111,-1616.7657
urethane,urethane_13,575.8502,717.7118,112.4079,1784.6714,1117.694,-738.9924


In [15]:
lammps_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Angle (kcal/mol),Bond (kcal/mol),Potential (kcal/mol),vdW (kcal/mol),Dihedral (kcal/mol),Coulomb (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
NIPU,NIPU_10,1464.9796,1177.0406,43620.7571,41700.6257,315.7242,-1038.0411
NIPU,NIPU_11,940.1828,1039.7616,3418654.3262,3416884.9484,276.4013,-487.1453
NIPU,NIPU_12,936.3032,1033.4171,2125.6462,697.5299,300.9878,-842.7875
NIPU,NIPU_2,1725.1022,1050.6351,64615673.9763,64613747.1921,390.4702,-1240.092
NIPU,NIPU_8,994.7213,939.9715,2992.1983,1172.4854,312.5088,-427.6951
NIPU,NIPU_9,953.5694,1030.6327,576177.7428,574263.9738,281.7357,-352.3755
urethane,urethane_0,4417.842,745.0806,3378.4396,1753.1228,465.7065,-4004.2918
urethane,urethane_1,725.4,841.3442,3773.1845,3168.7484,156.8209,-1119.2968
urethane,urethane_10,1070.3101,633.0824,1843.6074,500.7948,465.912,-826.6392
urethane,urethane_12,741.1011,936.4751,17936.8852,17670.2312,210.015,-1621.1304


In [16]:
diff = openmm_table - lammps_table
diff

Unnamed: 0_level_0,Unnamed: 1_level_0,Angle (kcal/mol),Bond (kcal/mol),Coulomb (kcal/mol),Dihedral (kcal/mol),Potential (kcal/mol),vdW (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
NIPU,NIPU_10,-0.0006,-0.0045,4.6007,0.0,6.3089,2.1441
NIPU,NIPU_11,-0.0001,0.0029,1.656,-0.0002,-59.5616,-60.6001
NIPU,NIPU_12,-0.0004,-0.0004,3.4446,-0.0001,3.9746,0.7265
NIPU,NIPU_2,-0.0002,0.002,5.0905,-0.0001,-2510.091,-2510.5643
NIPU,NIPU_8,0.001,-0.0017,2.1054,0.0,2.4825,0.5852
NIPU,NIPU_9,-0.0001,-0.0071,5.8012,0.0001,24.052,18.4996
urethane,urethane_0,,,,,,
urethane,urethane_1,0.0003,0.0017,2.3808,0.0001,2.8006,0.5854
urethane,urethane_10,0.0005,-0.0007,-0.6811,0.0001,-0.1746,0.654
urethane,urethane_12,0.0,-0.0009,4.3647,0.0,4.9496,0.7799


In [17]:
common_cols = ['Angle (kcal/mol)', 'Bond (kcal/mol)']# 'Torsion (kcal/mol)']

omm_redux = omm_table.drop(columns=common_cols)
lmp_redux = lmp_table.drop(columns=common_cols)

In [18]:
omm_table[common_cols] - lmp_table[common_cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,Angle (kcal/mol),Bond (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1
NIPU,NIPU_10,-0.0006,-0.0045
NIPU,NIPU_11,-0.0001,0.0029
NIPU,NIPU_12,-0.0004,-0.0004
NIPU,NIPU_2,-0.0002,0.002
NIPU,NIPU_8,0.001,-0.0017
NIPU,NIPU_9,-0.0001,-0.0071
urethane,urethane_0,,
urethane,urethane_1,0.0003,0.0017
urethane,urethane_10,0.0005,-0.0007
urethane,urethane_12,0.0,-0.0009


In [19]:
omm_redux

Unnamed: 0_level_0,Unnamed: 1_level_0,Potential (kcal/mol),Kinetic (kcal/mol),vdW (kcal/mol),Electrostatic (kcal/mol),vdW 1-4 (kcal/mol),Electrostatic 1-4 (kcal/mol),Dihedral (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
NIPU,NIPU_10,43627.066,0.0,41182.9644,-1183.6075,519.8054,150.1671,315.7242
NIPU,NIPU_11,3418594.7646,0.0,3416642.236,-1075.3657,182.1123,589.8764,276.4011
NIPU,NIPU_12,2129.6208,0.0,267.975,-1017.9113,430.2814,178.5684,300.9877
NIPU,NIPU_2,64613163.8853,0.0,64610767.8459,-610.544,468.7819,-624.4575,390.4701
NIPU,NIPU_8,2994.6808,0.0,729.5067,-212.1175,443.5639,-213.4722,312.5088
NIPU,NIPU_9,576201.7948,0.0,573791.4602,-618.6374,491.0132,272.0631,281.7358
urethane,urethane_1,3775.9851,0.0,2200.4732,785.794,968.8606,-1902.71,156.821
urethane,urethane_10,1843.4328,0.0,236.2629,299.7661,265.1859,-1127.0864,465.9121
urethane,urethane_12,17941.8348,0.0,16953.681,276.3731,717.3301,-1893.1388,210.015
urethane,urethane_13,1784.6714,0.0,625.7336,-809.0914,491.9604,70.099,112.4079


## Evaluating energies with drivers

In [20]:
from openff.interchange.drivers.openmm import get_openmm_energies, _get_openmm_energies
from openff.interchange.drivers.lammps import get_lammps_energies, _get_lammps_energies,  _find_lammps_executable
from openff.units.openmm import to_openmm as openff_units_to_openmm

In [21]:
{
    contrib : openff_units_to_openmm(value).in_units_of(kilocalorie_per_mole)
        for contrib, value in get_openmm_energies(interchange, detailed=True, combine_nonbonded_forces=False).energies.items()
}

{'vdW': Quantity(value=23988746576.956284, unit=kilocalorie/mole),
 'Electrostatics': Quantity(value=110.10667610613562, unit=kilocalorie/mole),
 'vdW 1-4': Quantity(value=473.62292642905527, unit=kilocalorie/mole),
 'Electrostatics 1-4': Quantity(value=-686.2929735580487, unit=kilocalorie/mole),
 'Torsion': Quantity(value=187.7549812559763, unit=kilocalorie/mole),
 'Angle': Quantity(value=977.0855728882693, unit=kilocalorie/mole),
 'Bond': Quantity(value=628.290289497926, unit=kilocalorie/mole)}

In [22]:
get_lammps_energies(interchange).energies

{'Bond': 628.29029 <Unit('kilocalorie_per_mole')>,
 'Angle': 977.08558 <Unit('kilocalorie_per_mole')>,
 'Torsion': 187.75496789099998 <Unit('kilocalorie_per_mole')>,
 'vdW': 23988754999.810043 <Unit('kilocalorie_per_mole')>,
 'Electrostatics': -570.5814300000001 <Unit('kilocalorie_per_mole')>}

## Comparing ParmEd energy decomposition to native OpenMM force-group-based decomposition

In [23]:
import parmed
from openmm.openmm import Force

NULL_ENERGY = 0.0*kilojoule_per_mole

sim = omm_sims['urethane']['urethane_41']
# assign and initialize unique force groups for simulation
for i, force in enumerate(sim.system.getForces()):
    force.setForceGroup(i)
    # print(force.getName(), force.getForceGroup())
sim.context.reinitialize(preserveState=True) # need to reinitialize to get force labelling changes to "stick"

# energies from OpenMM force groups
print('\nOpenMM:')
print('='*30)
omm_energies = {}

## extract total energies for state
overall_state = sim.context.getState(getEnergy=True) # get total potential energy
PE = overall_state.getPotentialEnergy()
omm_energies['Total Potential Energy'] = PE

KE = overall_state.getKineticEnergy()
assert(KE == NULL_ENERGY)

for i, force in enumerate(sim.system.getForces()):
    state = sim.context.getState(getEnergy=True, groups={i})
    force_name = force.getName().removesuffix('Force')
    pe = state.getPotentialEnergy()

    omm_energies[force_name] = pe
    print(f'{force_name} : {pe}')

## converting name to match with ParmEd for comparison
namemap = {
    'Nonbonded' : 'bond',
    'PeriodicTorsion' : 'angle',
    'HarmonicAngle' : 'dihedral',
    'HarmonicBond' : 'urey_bradley',
    'Total Potential Energy' : 'total'
}
compat_omm_energies = {
    namemap[contrib] : energy
        for contrib, energy in omm_energies.items()
}

total = sum(omm_energies.values(), start=NULL_ENERGY) # need "seed" to have Quantity datatype to sum
print(f'{general.GREEK_UPPER["delta"]}E_contrib: ', PE - total)

# ParmEd energy decomposition
print('\nParmEd:')
print('='*30)
parm_energies = {}
parm_struct = parmed.openmm.load_topology(sim.topology, sim.system)
for contrib, energy_val in parmed.openmm.energy_decomposition(parm_struct, sim.context).items():
    parm_energies[contrib] = energy = energy_val*kilocalorie_per_mole # assign proper units
    print(contrib, energy.in_units_of(kilojoule_per_mole))


OpenMM:
vdW : 616436.7693361759 kJ/mol
Electrostatic : 681.4612459126311 kJ/mol
vdW 1-4 : 3828.738037109375 kJ/mol
Electrostatic 1-4 : -2975.945068359375 kJ/mol
Dihedral : 1372.698486328125 kJ/mol
Angle : 7416.69921875 kJ/mol
Bond : 2840.6552734375 kJ/mol


KeyError: 'vdW'