# Core Imports

In [1]:
# Custom Imports
from polysaccharide import general
from polysaccharide.general import optional_in_place
from polysaccharide.extratypes import ResidueSmarts

from polysaccharide.molutils import reactions
from polysaccharide.molutils.rdmol.rdtypes import *
from polysaccharide.molutils.rdmol import rdcompare, rdconvert, rdkdraw, rdcompare, rdprops, rdbond, rdlabels

from polysaccharide.polymer import monomer as monoutils
from polysaccharide.polymer.monomer import MonomerInfo
from polysaccharide.polymer.management import PolymerManager

from polysaccharide.polymer import building
import mbuild as mb

# Generic Imports
import re
from functools import partial, cached_property
from collections import defaultdict
from itertools import combinations, chain
from ast import literal_eval

# Numeric imports
import pandas as pd
import numpy as np

# File I/O
from pathlib import Path
import csv, json, openpyxl

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Generator, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty
from openmm.unit import Unit, Quantity

# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdChemReactions

from openff.toolkit import ForceField
from openff.toolkit.topology import Topology, Molecule

# Static Paths
RAW_DATA_PATH  = Path('raw_monomer_data')
PROC_DATA_PATH = Path('processed_monomer_data')
RXN_FILES_PATH = Path('rxn_smarts')
MONO_INFO_DIR  = Path('monomer_files')

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


# File and chemistry type definitions

In [2]:
pdb_path = Path('pdb_files')
pdb_path.mkdir(exist_ok=True)

coll_path = Path('Collections')
coll_path.mkdir(exist_ok=True)

lammps_path = Path('LAMMPS')
lammps_path.mkdir(exist_ok=True)

omm_path = Path('OpenMM')
omm_path.mkdir(exist_ok=True)

In [3]:
# defining reacting functional groups
reaction_pairs = {
    'NIPU' : ('cyclocarbonate', 'amine'),
    'urethane' : ('isocyanate', 'hydroxyl')
}
# chemistries = ('urethane', 'NIPU')
chemistries = [i for i in reaction_pairs.keys()]

# Collating urethanes into collections and generating Interchange files

## Saving topologies to single location

In [None]:
from tqdm.notebook import tqdm
from polysaccharide.charging.application import MolCharger


chg_method = 'Espaloma_AM1BCC'

topo_dir = Path('Topologies')
topo_dir.mkdir(exist_ok=True)

failed_topo  = defaultdict(lambda : defaultdict(list))
for chemistry in chemistries:
    chem_path = coll_path / chemistry
    chem_dir = topo_dir / chemistry
    chem_dir.mkdir(exist_ok=True)

    mgr = PolymerManager(chem_path)
    for mol_name, polymer in ( progress := tqdm(mgr.polymers.items()) ):
        progress.set_postfix_str(f'{chemistry} : {mol_name}')
        try:
            chgr = MolCharger.subclass_registry[chg_method]()
            polymer.assert_charges_for(chgr, strict=True, return_cmol=False)

            sdf_path = polymer.structure_files_chgd[chg_method]
            cmol = polymer.charged_offmol_from_sdf(chg_method)
            cmol.to_file(str(chem_dir / f'{mol_name}.sdf'), file_format='SDF')

        except AttributeError as a:
            print(a)

        except Exception as e:
            print(e)
            failed_topo[chemistry][e.__class__.__name__].append(mol_name)

### Generating Interchange dict

In [None]:
from polysaccharide import OPENFF_DIR
from openmm.unit import nanometer

# specify forcefield and box dims
# ff_name = 'openff-2.0.0.offxml'
ff_name = 'openff_unconstrained-2.0.0.offxml'
ff_path = OPENFF_DIR / ff_name
forcefield = ForceField(ff_path)

# Interchange generation
success_ics = defaultdict(defaultdict)
failed_ics  = defaultdict(list)

for chemistry in chemistries:
    chem_dir = topo_dir / chemistry
    progress = tqdm([path for path in chem_dir.iterdir()]) # unpack into list for progress bar
    for sdf_path in progress:
        mol_name = sdf_path.stem
        progress.set_postfix_str(f'{chemistry} : {mol_name}')

        offmol = Molecule.from_file(sdf_path)
        offtop = Topology.from_molecules(offmol) 

        try:
            ic = forcefield.create_interchange(offtop, charge_from_molecules=[offmol])
            success_ics[chemistry][mol_name] = ic
        except Exception as e:
            print(e)
            failed_ics[e.__class__.__name__].append(sdf_path)

for err_name, err_list in failed_ics.items():
    for sdf_path in err_list:
        sdf_path.unlink() # delete dud files

# Running OpenMM simulations

## Defining utility functions

In [26]:
from openmm import XmlSerializer
from openmm import System, Context, State
from openmm import Integrator, Force
from openmm.app import Simulation
from openmm.unit import nanometer

from openff.interchange import Interchange
from openff.units import unit as offunit

from polysaccharide.simulation.ensemble import EnsembleSimulationFactory
from polysaccharide.simulation.records import SimulationParameters

DEFAULT_STATE_PARAMS : dict[str, bool] = {
    'getPositions'  : True,
    'getVelocities' : True,
    'getForces'     : True,
    'getEnergy'     : True,
    'getParameters' : True,
    'getParameterDerivatives' : False,
    'getIntegratorParameters' : False
}


def serialize_state_and_sys(sim : Simulation, out_dir : Path, out_name : str, state_params : dict[str, bool]=DEFAULT_STATE_PARAMS) -> None:
    '''For saving State and System info of a Simulation to disc'''
    sim_dict = {
        'system' : sim.system,
        'state' : sim.context.getState(**state_params)
    }
    
    for affix, save_data in sim_dict.items():
        save_path = out_dir / f'{out_name}_{affix}.xml'
        save_path.touch()

        with save_path.open('w') as file:
            file.write( XmlSerializer.serialize(save_data) )

def apply_state_to_context(state : State, context : Context) -> None:
    '''For applying saved State data to an existing OpenMM Simulation'''
    context.setPeriodicBoxVectors(*state.getPeriodicBoxVectors())
    context.setPositions(state.getPositions())
    context.setVelocities(state.getVelocities())
    context.setTime(state.getTime())

    context.reinitialize(preserveState=True)    

def load_openmm_system(sys_path : Path, extra_forces : Optional[Union[Force, Iterable[Force]]]=None, sep_force_grps : bool=True, remove_constrs : bool=False) -> System:
    '''Load and configure a serialized OpenMM system, with optional additional parameters'''
    assert(sys_path.suffix == '.xml')
    with sys_path.open('r') as file:
        ommsys = XmlSerializer.deserialize(file.read())

    if extra_forces: # deliberately sparse to handle both Nonetype and empty list
        for force in extra_forces: 
            ommsys.addForce(force)

    if sep_force_grps:
        for i, force in enumerate(ommsys.getForces()):
            force.setForceGroup(i)

    if remove_constrs:
        for i in range(ommsys.getNumConstraints())[::-1]: # need to remove in reverse order to avoid having prior constraints "fall back down"
            ommsys.removeConstraint(i)

    return ommsys

def create_simulation2(interchange : Interchange, integrator : Integrator, forces : Optional[Iterable[Force]]=None,
                        sep_force_grps : bool=True, remove_constrs : bool=True, combine_nonbonded_forces : bool=True) -> Simulation:
    '''Specifies configuration for an OpenMM Simulation - Interchange load alows many routes for creation'''
    openmm_sys = interchange.to_openmm(combine_nonbonded_forces=combine_nonbonded_forces) 
    openmm_top = interchange.topology.to_openmm()
    openmm_pos = interchange.positions.m_as(offunit.nanometer) * nanometer

    if forces: # deliberately sparse to handle both Nonetype and empty list
        for force in forces: 
            openmm_sys.addForce(force)

    if sep_force_grps:
        for i, force in enumerate(openmm_sys.getForces()):
            force.setForceGroup(i)

    if remove_constrs:
        for i in range(openmm_sys.getNumConstraints())[::-1]: # need to remove in reverse order to avoid having prior constraints "fall back down"
            openmm_sys.removeConstraint(i)

    simulation = Simulation(openmm_top, openmm_sys, integrator)
    simulation.context.setPositions(openmm_pos)

    return simulation

def openmm_sim_from_files(top_file : Path, sys_file : Path, state_file : Path, sim_params : SimulationParameters, sep_force_grps : bool=True, remove_constrs : bool=False, box_vectors : Optional[np.ndarray]=None) -> Simulation:
    '''Load an OpenMM simulation object from serialized Topology, System, and State'''
    offmol = Molecule.from_file(top_file)
    offtop = Topology.from_molecules(offmol)
    if box_vectors is not None:
        offtop.box_vectors = box_vectors
    ommtop = offtop.to_openmm()


    # define ensemble-specific forces and Integrator
    ens_fac = EnsembleSimulationFactory.subclass_registry[sim_params.ensemble.upper()]()
    integrator = ens_fac.integrator(sim_params)
    forces     = ens_fac.forces(    sim_params)

    # load and configure System
    ommsys = load_openmm_system(sys_file, extra_forces=forces, sep_force_grps=sep_force_grps, remove_constrs=remove_constrs)

    # putting it all together into a Simulation
    return Simulation(
        topology=ommtop,
        system=ommsys,
        integrator=integrator,
        state=state_file
    )

# load and apply State
# with state_file.open('r') as file:
#     ommstate = XmlSerializer.deserialize(file.read())
# apply_state_to_context(ommstate, sim.context)

## Manually create OpenMM sims from Interchange

In [27]:
# specifying simulation and ensemble parameters
from openmm.app import Simulation
from openmm import NonbondedForce, CustomNonbondedForce

from openmm.unit import kilojoule_per_mole, kilocalorie_per_mole
from openff.units import unit as offunit

from shutil import copyfile
from copy import deepcopy

from polysaccharide import filetree
from polysaccharide.simulation.records import SimulationParameters
from polysaccharide.simulation.ensemble import EnsembleSimulationFactory


# Cutoff settings
BOX_VECS = np.eye(3) * 10 * nanometer

CUTOFF = 2.0 * nanometer
CUTOFF_METHOD = NonbondedForce.CutoffPeriodic
# CUTOFF_METHOD = NonbondedForce.CutoffNonPeriodic
# CUTOFF_METHOD = NonbondedForce.NoCutoff

DISPERSION = False
SWITCHING = False

omm_dir = Path('OpenMM_newest')
omm_dir.mkdir(exist_ok=True)

# selecting simulation parameters and ensemble
sp_path = Path('debug_sim_NVT.json')
sim_params = SimulationParameters.from_file(sp_path)
ens_fac = EnsembleSimulationFactory.subclass_registry[sim_params.ensemble.upper()]()

force_names = (
    'vdW pairwise',
    'Electrostatic',
    '1-4 LJ',
    '1-4 Coulomb',
    'Torsion',
    'Angle',
    'Bond'
)

# looping over all urethanes
omm_sims = defaultdict(defaultdict)
for chemistry, ic_dict in success_ics.items():
    chem_dir = topo_dir / chemistry
    chem_out = omm_dir / chemistry
    chem_out.mkdir(exist_ok=True)

    for mol_name, interchange in (progress := tqdm(ic_dict.items())):
        mol_dir = chem_out / mol_name
        mol_dir.mkdir(exist_ok=True)
        progress.set_postfix_str(f'{chemistry} : {mol_name}')
        
        # loading topology
        interchange.topology.box_vectors = BOX_VECS
        omm_top = interchange.topology.to_openmm()

        # setting up system
        omm_sys = interchange.to_openmm(combine_nonbonded_forces=False)
        omm_pos = interchange.positions.m_as(offunit.nanometer)

        ## loading intergrator and forces by ensemble
        integrator = ens_fac.integrator(sim_params)
        forces = ens_fac.forces(sim_params)

        if forces:
            for force in forces:
                omm_sys.addForce(force)

        ## number all forces into separate force groups for separability
        for i, force in enumerate(omm_sys.getForces()):
            force.setForceGroup(i)
            force.setName(force_names[i])

        ## reconfiguring non-bonded forces
        ### Custom nonbonded
        nonbond_custom = omm_sys.getForce(0)
        assert(isinstance(nonbond_custom, CustomNonbondedForce))

        nonbond_custom.setCutoffDistance(CUTOFF)
        nonbond_custom.setUseSwitchingFunction(SWITCHING)
        nonbond_custom.setNonbondedMethod(CUTOFF_METHOD)
 
        ### Default nonbonded
        nonbond = omm_sys.getForce(1)
        assert(isinstance(nonbond, NonbondedForce))

        nonbond.setCutoffDistance(CUTOFF)
        nonbond.setNonbondedMethod(CUTOFF_METHOD)
        nonbond.setUseSwitchingFunction(SWITCHING)
        nonbond.setUseDispersionCorrection(DISPERSION)

        # create and register simulation
        sim = Simulation(omm_top, omm_sys, integrator)
        sim.context.setPositions(omm_pos)
        omm_sims[chemistry][mol_name] = sim

        # serialize Topology, System, and State for reloading
        sdf_out_path = mol_dir / f'{mol_name}_topology.sdf'
        sdf_out_path.touch()
        for mol in interchange.topology.molecules:
            mol.to_file(str(sdf_out_path), file_format='SDF')
        serialize_state_and_sys(sim, out_dir=mol_dir, out_name=mol_name)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

In [28]:
nonbond.getCutoffDistance(), nonbond.getUseSwitchingFunction(), nonbond.getNonbondedMethod()

(Quantity(value=2.0, unit=nanometer), False, 2)

In [29]:
nonbond_custom.getCutoffDistance(), nonbond_custom.getUseSwitchingFunction(), nonbond_custom.getNonbondedMethod()

(Quantity(value=2.0, unit=nanometer), False, 2)

## Serialize OpenMM simulations to file

In [None]:
# specifying simulation and ensemble parameters
from openmm.unit import kilojoule_per_mole, kilocalorie_per_mole
from shutil import copyfile
from copy import deepcopy

from polysaccharide import filetree
from polysaccharide.simulation.records import SimulationParameters
from polysaccharide.simulation.ensemble import EnsembleSimulationFactory


omm_dir = Path('OpenMM')
omm_dir.mkdir(exist_ok=True)

# selecting simulation parameters and ensemble
sp_path = Path('debug_sim_NVT.json')
sim_params = SimulationParameters.from_file(sp_path)
ens_fac = EnsembleSimulationFactory.subclass_registry[sim_params.ensemble.upper()]()

# looping over all urethanes
omm_sims = defaultdict(defaultdict)
for chemistry, ic_dict in success_ics.items():
    mgr = PolymerManager(coll_path / chemistry)
    chem_dir = omm_dir / chemistry
    chem_dir.mkdir(exist_ok=True)

    for mol_name, interchange in ic_dict.items():
        data_dict = {
            'Chemistry' : chemistry,
            'Molecule'  : mol_name
        }

        # creating directories
        mol_dir = chem_dir / mol_name 
        mol_dir.mkdir(exist_ok=True)

        # creating simulation and associated files
        polymer = mgr.polymers[mol_name]
        sdf_path = polymer.structure_files_chgd[chg_method]

        # create and register simulation
        sim = ens_fac.create_simulation(interchange, sim_params)
        omm_sims[chemistry][mol_name] = sim

        # serialize Topology, System, and State for reloading
        sdf_out_path = mol_dir / f'{mol_name}_topology.sdf'
        copyfile(sdf_path, sdf_out_path)
        serialize_state_and_sys(sim, out_dir=mol_dir, out_name=mol_name)

## Loading simulations from file

In [None]:
from openmm import NonbondedForce

CUTOFF = 2.0 * nanometer
BOX_VECS = np.eye(3) * 10 * nanometer

# parameters
sep_force_grps : bool = True
remove_constrs : bool = False

# paths
# omm_dir = Path('OpenMM')
omm_dir = Path('OpenMM_unconstrained')
sp_path = Path('debug_sim_NVT.json')

# iterate over serialized directory tree and load
sim_params = SimulationParameters.from_file(sp_path)

omm_sims = defaultdict(defaultdict)
for subdir in omm_dir.iterdir():
    if subdir.is_dir():
        chemistry = subdir.name
        for mol_dir in subdir.iterdir():
            mol_name = mol_dir.name

            state_file = mol_dir / f'{mol_name}_state.xml'
            sys_file   = mol_dir / f'{mol_name}_system.xml'
            top_file   = mol_dir / f'{mol_name}_topology.sdf'

            sim = openmm_sim_from_files(
                top_file,
                sys_file,
                state_file,
                sim_params,
                sep_force_grps=sep_force_grps,
                remove_constrs=remove_constrs,
            )

            omm_sims[chemistry][mol_name] = sim

## Evaluating starting structure energies

In [30]:
from openmm.unit import kilojoule_per_mole, kilocalorie_per_mole

repl_table = {
    'PeriodicTorsion' : 'Torsion',
    'HarmonicAngle' : 'Angle',
    'HarmonicBond' : 'Bond'
}

NULL_ENERGY = 0.0*kilojoule_per_mole
PRECISION : int = 4

data_dicts = []
for chemistry, mol_dict in omm_sims.items():
    for mol_name, sim in (progress := tqdm(mol_dict.items())):
        progress.set_postfix_str(f'{chemistry} : {mol_name}')
        
        # extract total and component energies from OpenMM force groups
        data_dict = {
            'Chemistry' : chemistry,
            'Molecule'  : mol_name
        }
        omm_energies = {}

        overall_state = sim.context.getState(getEnergy=True) # get total potential energy
        PE = overall_state.getPotentialEnergy()
        omm_energies['Potential'] = PE

        KE = overall_state.getKineticEnergy()
        omm_energies['Kinetic'] = KE
        assert(KE == NULL_ENERGY)

        for i, force in enumerate(sim.system.getForces()):
            state = sim.context.getState(getEnergy=True, groups={i})
            omm_energies[force.getName()] = state.getPotentialEnergy()

        # reformat
        omm_energies_kcal = {}
        for contrib_name, energy_kj in omm_energies.items():
            energy_kcal = energy_kj.in_units_of(kilocalorie_per_mole)
            omm_energies_kcal[f'{contrib_name} ({energy_kcal.unit.get_symbol()})'] = round(energy_kcal._value, PRECISION)

        # compiling data
        data_dict = {**data_dict, **omm_energies_kcal}
        data_dicts.append(data_dict)

omm_table = pd.DataFrame.from_records(data_dicts)
omm_table.sort_values('Molecule', inplace=True)
omm_table.set_index(['Chemistry', 'Molecule'], inplace=True)
# omm_table.to_csv(omm_dir / f'{omm_dir.name}_PEs.csv', index=False)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

In [31]:
omm_table.sort_index(axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,1-4 Coulomb (kcal/mol),1-4 LJ (kcal/mol),Angle (kcal/mol),Bond (kcal/mol),Electrostatic (kcal/mol),Kinetic (kcal/mol),Potential (kcal/mol),Torsion (kcal/mol),vdW pairwise (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
NIPU,NIPU_11,205.6854,447.5183,924.444,936.1703,-701.3613,0.0,26118.23,307.5251,23998.25
NIPU,NIPU_2,-715.4404,464.7511,1731.2338,1054.8592,-629.1521,0.0,16015.83,403.6802,13705.9
urethane,urethane_0,-6900.4149,621.6685,4417.8422,745.0805,2031.9201,0.0,2508.959,465.7066,1127.156
urethane,urethane_1,-2090.4597,623.4489,731.2801,591.9922,569.9974,0.0,2308.894,180.7761,1701.86
urethane,urethane_10,-1430.2346,835.3169,1165.3718,585.7152,283.0706,0.0,2611.793,350.8385,821.7148
urethane,urethane_14,-992.3145,525.7142,1333.2802,606.283,153.5438,0.0,3354.385,268.5547,1459.323
urethane,urethane_15,-310.113,366.583,814.8633,581.8832,-308.66,0.0,20924.9,269.8025,19510.54
urethane,urethane_17,-7030.1549,549.6743,1743.4136,660.9872,2615.6606,0.0,-338.7565,340.6219,781.0413
urethane,urethane_2,-1186.9846,509.9049,1287.5645,604.7534,400.494,0.0,2270775.0,254.707,2268905.0
urethane,urethane_22,-7357.8457,1014.2406,6848.7392,713.5582,1490.6015,0.0,1013185.0,567.0277,1009909.0


In [32]:
pd.options.display.float_format = '{:.4f}'.format # disable scientific notation

lmp_path = Path('LAMMPS/LAMMPS_PEs.csv')
lmp_table = pd.read_csv(lmp_path, index_col=(0,1))
lmp_table['Torsion (kcal/mol)'] = lmp_table['Dihedral (kcal/mol)'] + lmp_table['Impropers (kcal/mol)']


lmp_table.drop(columns=['Dihedral (kcal/mol)', 'Impropers (kcal/mol)'], inplace=True)
lmp_table.drop(columns=['Ranged k-space (kcal/mol)', 'vdW ranged (kcal/mol)', 'Total (kcal/mol)', 'Pairwise (kcal/mol)'], inplace=True)
lmp_table.sort_index(axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Angle (kcal/mol),Bond (kcal/mol),Coulomb (kcal/mol),Kinetic (kcal/mol),Potential (kcal/mol),Torsion (kcal/mol),vdW pairwise (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
NIPU,NIPU_11,924.4441,936.1614,-817.78,0.0,25136.3025,307.5251,23785.952
NIPU,NIPU_2,1731.2339,1054.8606,-1858.435,0.0,15100.3757,403.6802,13769.0361
urethane,urethane_0,4417.842,745.0806,-4124.393,0.0,3118.7481,465.7065,1614.512
urethane,urethane_1,731.2807,591.9917,-1311.1273,0.0,2400.3527,180.7761,2207.4314
urethane,urethane_10,1165.3723,585.7113,-1147.8505,0.0,2519.8958,350.8386,1565.8241
urethane,urethane_14,1333.2805,606.2828,-789.8303,0.0,3295.9871,268.5545,1877.6996
urethane,urethane_15,814.8639,581.8827,-650.7598,0.0,15954.7666,269.8025,14938.9774
urethane,urethane_17,1743.4138,660.9857,-3544.3699,0.0,449.3406,340.6218,1248.6892
urethane,urethane_2,1287.5646,604.7534,-724.7406,0.0,793373.6718,254.707,791951.3873
urethane,urethane_22,6848.74,713.5584,-5443.9864,0.0,918533.5556,567.0277,915848.216


In [33]:
common_cols = ['Angle (kcal/mol)', 'Bond (kcal/mol)', 'Torsion (kcal/mol)', 'Kinetic (kcal/mol)']

omm_redux = omm_table.drop(columns=common_cols)
lmp_redux = lmp_table.drop(columns=common_cols)

In [34]:
omm_table[common_cols] - lmp_table[common_cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,Angle (kcal/mol),Bond (kcal/mol),Torsion (kcal/mol),Kinetic (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NIPU,NIPU_11,-0.0001,0.0089,0.0,0.0
NIPU,NIPU_2,-0.0001,-0.0014,-0.0,0.0
urethane,urethane_0,0.0002,-0.0001,0.0001,0.0
urethane,urethane_1,-0.0006,0.0005,-0.0,0.0
urethane,urethane_10,-0.0005,0.0039,-0.0001,0.0
urethane,urethane_14,-0.0003,0.0002,0.0002,0.0
urethane,urethane_15,-0.0006,0.0005,0.0,0.0
urethane,urethane_17,-0.0002,0.0015,0.0001,0.0
urethane,urethane_2,-0.0001,0.0,0.0,0.0
urethane,urethane_22,-0.0008,-0.0002,0.0,0.0


In [35]:
(omm_redux[['vdW pairwise (kcal/mol)', 'Potential (kcal/mol)']] - lmp_redux[['vdW pairwise (kcal/mol)', 'Potential (kcal/mol)']])

Unnamed: 0_level_0,Unnamed: 1_level_0,vdW pairwise (kcal/mol),Potential (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1
NIPU,NIPU_11,212.2953,981.9238
NIPU,NIPU_2,-63.1358,915.4572
urethane,urethane_0,-487.3564,-609.7891
urethane,urethane_1,-505.5718,-91.4582
urethane,urethane_10,-744.1093,91.8974
urethane,urethane_14,-418.3764,58.3976
urethane,urethane_15,4571.5644,4970.1328
urethane,urethane_17,-467.6479,-788.0971
urethane,urethane_2,1476953.9372,1477401.6336
urethane,urethane_22,94060.6265,94651.458


In [36]:
omm_redux[['Electrostatic (kcal/mol)', '1-4 LJ (kcal/mol)', '1-4 Coulomb (kcal/mol)']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Electrostatic (kcal/mol),1-4 LJ (kcal/mol),1-4 Coulomb (kcal/mol)
Chemistry,Molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NIPU,NIPU_11,-701.3613,447.5183,205.6854
NIPU,NIPU_2,-629.1521,464.7511,-715.4404
urethane,urethane_0,2031.9201,621.6685,-6900.4149
urethane,urethane_1,569.9974,623.4489,-2090.4597
urethane,urethane_10,283.0706,835.3169,-1430.2346
urethane,urethane_14,153.5438,525.7142,-992.3145
urethane,urethane_15,-308.66,366.583,-310.113
urethane,urethane_17,2615.6606,549.6743,-7030.1549
urethane,urethane_2,400.494,509.9049,-1186.9846
urethane,urethane_22,1490.6015,1014.2406,-7357.8457


## Comparing energies from constrained and unconstrained FFs

In [None]:
e_tables = {
    platform : pd.read_csv(Path(platform) / f'{platform}_PEs.csv', index_col=(0, 1))
        for platform in ('LAMMPS', 'OpenMM', 'OpenMM_unconstrained')
}

In [None]:
cols = [
    'Nonbonded (kcal/mol)',
    'Bond (kcal/mol)',
    'Angle (kcal/mol)',
    'Torsion (kcal/mol)',
    'Potential (kcal/mol)',
    'Kinetic (kcal/mol)',
]

e_tables.pop('OpenMM')
for src, tab in e_tables.items():
    print(src)
    display(tab[cols])

In [None]:
diff = e_tables['OpenMM_unconstrained'] - e_tables['LAMMPS']
diff

In [None]:
diff['Torsion (kcal/mol)'] - e_tables['LAMMPS']['Impropers (kcal/mol)']

In [None]:
df_unconstr.loc[[('urethane', 'urethane_0'), ('urethane', 'urethane_1'), ('urethane', 'urethane_41')]]

In [None]:
nonbond = sim.system.getForce(0)
nonbond.getCutoffDistance()

In [None]:
nonbond.getName()

In [None]:
data_dicts = []
for chemistry, mol_dict in omm_sims.items():
    print(chemistry)
    for mol_name, sim in mol_dict.items():
        print(mol_name)
        nonbond = sim.system.getForce(0)
        assert(nonbond.getName() == 'NonbondedForce')
        print(nonbond.getCutoffDistance())

In [None]:
import matplotlib.pyplot as plt

bond_diffs = e_diff['HarmonicBond (kcal/mol)'].to_numpy()
plt.hist(bond_diffs, bins=20)
plt.xlim(bond_diffs.min(), bond_diffs.max())
plt.xlabel(f'{general.GREEK_UPPER["delta"]}E_bond const vs unconst')
plt.ylabel('Count')

## Comparing ParmEd energy decomposition to native OpenMM force-group-based decomposition

In [None]:
import parmed
from openmm.openmm import Force

NULL_ENERGY = 0.0*kilojoule_per_mole

sim = omm_sims['urethane']['urethane_41']
# assign and initialize unique force groups for simulation
for i, force in enumerate(sim.system.getForces()):
    force.setForceGroup(i)
    # print(force.getName(), force.getForceGroup())
sim.context.reinitialize(preserveState=True) # need to reinitialize to get force labelling changes to "stick"

# energies from OpenMM force groups
print('\nOpenMM:')
print('='*30)
omm_energies = {}

## extract total energies for state
overall_state = sim.context.getState(getEnergy=True) # get total potential energy
PE = overall_state.getPotentialEnergy()
omm_energies['Total Potential Energy'] = PE

KE = overall_state.getKineticEnergy()
assert(KE == NULL_ENERGY)

for i, force in enumerate(sim.system.getForces()):
    state = sim.context.getState(getEnergy=True, groups={i})
    force_name = force.getName().removesuffix('Force')
    pe = state.getPotentialEnergy()

    omm_energies[force_name] = pe
    print(f'{force_name} : {pe}')

## converting name to match with ParmEd for comparison
namemap = {
    'Nonbonded' : 'bond',
    'PeriodicTorsion' : 'angle',
    'HarmonicAngle' : 'dihedral',
    'HarmonicBond' : 'urey_bradley',
    'Total Potential Energy' : 'total'
}
compat_omm_energies = {
    namemap[contrib] : energy
        for contrib, energy in omm_energies.items()
}

total = sum(omm_energies.values(), start=NULL_ENERGY) # need "seed" to have Quantity datatype to sum
print(f'{general.GREEK_UPPER["delta"]}E_contrib: ', PE - total)

# ParmEd energy decomposition
print('\nParmEd:')
print('='*30)
parm_energies = {}
parm_struct = parmed.openmm.load_topology(sim.topology, sim.system)
for contrib, energy_val in parmed.openmm.energy_decomposition(parm_struct, sim.context).items():
    parm_energies[contrib] = energy = energy_val*kilocalorie_per_mole # assign proper units
    print(contrib, energy.in_units_of(kilojoule_per_mole))

## Minimizing and running single integration step, then evaluating energies from reporter

In [None]:
import re
from openmm.unit import kilojoule_per_mole, kilocalorie_per_mole

from polysaccharide.simulation import preparation
from polysaccharide.simulation.records import SimulationParameters
from polysaccharide.simulation.ensemble import EnsembleSimulationFactory

STRIP_BEFORE_PARENS = re.compile(r'(.*?)(?=\s*\(.*\))')
PRECISION = 3 # number of decimals to round reported energies to

omm_dir = Path('OpenMM_no_sim')
omm_dir.mkdir(exist_ok=True)


# selecting simulation parameters and ensemble
sp_path = Path('debug_sim_NVT.json')
sim_params = SimulationParameters.from_file(sp_path)
ens_fac = EnsembleSimulationFactory.subclass_registry[sim_params.ensemble.upper()]()

data_by_mol = []
omm_sims = defaultdict(defaultdict)
# looping over all urethanes
for chemistry, ic_dict in success_ics.items():
    chem_dir = omm_dir / chemistry
    chem_dir.mkdir(exist_ok=True)

    for mol_name, interchange in ic_dict.items():
        data_dict = {
            'Chemistry' : chemistry,
            'Molecule'  : mol_name
        }

        # creating directories
        mol_dir = chem_dir / mol_name 
        mol_dir.mkdir(exist_ok=True)

        sim_file_dir = mol_dir / f'{mol_name}_sim'
        sim_file_dir.mkdir(exist_ok=True, parents=True)

        # creating simulation and associated files
        sim = ens_fac.create_simulation(interchange, sim_params)
        sim_paths = preparation.prepare_simulation_paths(output_folder=sim_file_dir, output_name=mol_name, sim_params=sim_params)
        reporters = preparation.prepare_simulation_reporters(sim_paths, sim_params)
        preparation.config_simulation(sim, reporters, checkpoint_path=sim_paths.checkpoint)

        # energy min and single-step integration
        sim.minimizeEnergy()
        sim.step(1)

        # extracting energies

        state_data = pd.read_csv(sim_paths.state_data)
        energies = {}
        for key in ('Potential Energy (kJ/mole)', 'Kinetic Energy (kJ/mole)'):
            tag = re.search(STRIP_BEFORE_PARENS, key).group(0)
            E_kj_val = state_data[key][0]
            E_kj = E_kj_val * kilojoule_per_mole
            E_kcal = E_kj.in_units_of(kilocalorie_per_mole)

            for energy in (E_kj, E_kcal):
                energies[f'{tag} ({energy.unit.get_symbol()})'] = energy._value
        
        data_dict.update(**energies)
        data_by_mol.append(data_dict)

# collate energies into DataFrame        
df = pd.DataFrame.from_records(data_by_mol)
df = df.sort_values('Molecule')

# round energy values down to desired precision
round_fn = lambda x : round(x, PRECISION)

for col_name, col in df.items():
    try:
        df[col_name] = col.apply(round_fn) # attempt to round column and replace with rounded values
    except TypeError:
        pass

# save energies to file
energy_file = omm_dir / 'energies_1_step.csv'
df.to_csv(energy_file, index=False)

## Checking for successful residue covers of newly-generated PDB Topologies

In [None]:
chemistry = 'urethane'

mgr = PolymerManager(coll_path / chemistry)
mol_names = failed_interchanges[chemistry]['UnmatchedAtomsError']

offmols = {
    mol_name : mgr.polymers[mol_name].offmol_matched(strict=False)
        for mol_name in mol_names
}

In [None]:
sizes = {
    mol_name : offmol.n_atoms
        for mol_name, offmol in sorted(offmols.items(), key=lambda x : x[1].n_atoms)
}

In [None]:

pdir = mgr.polymers['urethane_6']
# pdir = mgr.polymers['NIPU_8']
# pdir.offmol_matched(strict=True)

for atom in offmol.atoms:
    if not atom.metadata['already_matched']:
        print(atom.metadata)

In [None]:
mgr = PolymerManager(coll_path / 'NIPU')

offmols = {}
unmatched = []
for mol_name, polymer in mgr.polymers.items():
    try:
        offmols[mol_name] = polymer.offmol
    except:
        unmatched.append(mol_name)

In [None]:
for mol_name in unmatched:
    print(mol_name)
    polymer = mgr.polymers[mol_name]
    offmol = polymer.offmol_matched(strict=False)
    
    for atom in offmol.atoms:
        if not atom.metadata['already_matched']:
            print('\t', atom.metadata)

# Experimenting with SDF files

In [None]:
benz = Chem.MolFromSmiles('C1ccccC=1')
benz = Chem.AddHs(benz)
benz.SetDoubleProp('stuff', 3.14)
benz

In [None]:
block2k = Chem.MolToMolFile(benz, 'test_2k.sdf')
block3k = Chem.MolToV3KMolFile(benz, 'test_3k.sdf')

In [None]:
block2kforce = Chem.MolToMolFile(benz, 'test_2k_force.sdf', forceV3000=True)

In [None]:
with Chem.SDWriter('test_sdw.sdf') as sdwriter:
    sdwriter.SetForceV3000(True)
    print(sdwriter.GetForceV3000())

    sdwriter.write(benz)

In [None]:
with Chem.SDMolSupplier('sdf_testing/test_off_rd.sdf', sanitize=False) as suppl:
    mols = [mol for mol in suppl]

targ = mols[0]
targ

In [None]:
omol = Molecule.from_rdkit(benz)
omol.generate_conformers(n_conformers=1)
omol.visualize(backend='nglview')

In [None]:
from polysaccharide import filetree
from polysaccharide import TOOLKITS


p = Path('sdf_testing/test_off_rd.sdf')
tkwrap = TOOLKITS['The RDKit']

omol.properties['series'] = (1,2,3)
omol.to_file(
    general.asstrpath(p),
    file_format=filetree.dotless(p.suffix),
    toolkit_registry=tkwrap
)

In [None]:
omol_load = Molecule.from_file(
    general.asstrpath(p),
    file_format=filetree.dotless(p.suffix),
    toolkit_registry=tkwrap
)