In [1]:
# Custom Imports
from polymer_utils import general, filetree
from polymer_utils import simulation as polysim

from polymer_utils import charging
from polymer_utils.charging.types import AtomIDMap, ResidueChargeMap
from polymer_utils.charging.residues import ChargedResidue

from polymer_utils.representation import PolymerDir, PolymerDirManager
from polymer_utils.representation import LOGGER as polylogger
from polymer_utils.solvation.solvents import WATER_TIP3P
from polymer_utils.logutils import config_mlf_handler

# General Imports
import numpy as np
from datetime import datetime

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty

# File I/O
from pathlib import Path
import csv, json, pickle
from shutil import copyfile, rmtree

# Logging and Shell
from IPython.display import clear_output
import subprocess
import logging
# logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
                            
# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdmolfiles

# Molecular Dynamics
from openff.units import unit
from openff.interchange import Interchange

from openff.toolkit.topology import Topology
from openff.toolkit.topology.molecule import Molecule, Atom
from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.toolkit.typing.engines.smirnoff.parameters import LibraryChargeHandler

from openff.toolkit.utils.exceptions import ConformerGenerationError
from openff.toolkit.utils.toolkits import RDKitToolkitWrapper, OpenEyeToolkitWrapper, AmberToolsToolkitWrapper

from openmm import LangevinMiddleIntegrator, Context
from openmm.vec3 import Vec3
from openmm.app import Simulation, PDBReporter, StateDataReporter

from openmm.unit import picosecond, femtosecond, nanosecond # time
from openmm.unit import nanometer, angstrom # length
from openmm.unit import Unit, kelvin # misc

# Static Paths
CORE_PATH = Path('Core')
POLY_PATH = Path('Polymers')
TEST_PATH = Path('Polymers_test')
COMPAT_PDB_PATH = Path('compatible_pdbs')

POLY_PDB_PATH = COMPAT_PDB_PATH/'simple_polymers'
SOLVENTS_PATH = CORE_PATH/'solvents'
POLYMER_SOLV_TEMPLATE = CORE_PATH/'inp_templates'/'solv_polymer_template_box.inp'

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


## Configuring and (re)loading polymers, setting solvents, checking validity

In [2]:
reset      = True #False
resolvate  = True #False
clear_sims = False 

mgr = PolymerDirManager(POLY_PATH)
desired_solvents = (WATER_TIP3P,) # (None,)

In [3]:
if reset:
    mgr.purge_dirs(really=True) 
    mgr.populate_collection(source_dir=POLY_PDB_PATH)

if resolvate:
    mgr.solvate_collection(desired_solvents, template_path=POLYMER_SOLV_TEMPLATE, exclusion=1*nanometer)

if clear_sims:
    mgr.purge_sims(really=True)

# Selecting subset of molecules which is suitable for ABE10 charging and subsequent simulation
HARD_POLYMERS = ['vulcanizedrubber', 'polyphenylenesulfone', 'polyethylene'] # pathological or otherwise difficult-to-run polymers that I've encountered
hard_polymers_solv = [
    f'{unsolv_mol}_solv_{solvent.name}'
        for solvent in desired_solvents
            if solvent is not None 
                for unsolv_mol in HARD_POLYMERS
]
HARD_POLYMERS.extend(hard_polymers_solv) # ensure solvated names are also included

valid_mols = [mol_dir.mol_name
    for mol_dir in mgr.mol_dirs_list
        if (mol_dir.mol_name not in HARD_POLYMERS)         # 1) are not manually excluded
            and (0 < mol_dir.n_atoms <= 300)               # 2) are loadable (i.e. non-zero size) but are small enough for AM1BCC (150 is speed limit, 300 is error limit)
            and (mol_dir.has_monomer_data)                 # 3) have monomer information files
            and (mol_dir.solvent in desired_solvents) # 4) is solvated in the specified solvents (could be None)
]

# display to check that loading has gone as planned
for pdir in sorted(mgr.mol_dirs_list, key = lambda mdir : mdir.n_atoms):
    print(pdir)
print(mgr.all_completed_sims)
print(valid_mols)

INFO:polymer_utils.representation:Acquiring structure and monomer files for polyethylmethacrylate from compatible_pdbs/simple_polymers
INFO:polymer_utils.representation:Acquiring structure and monomer files for PEO_PLGA from compatible_pdbs/simple_polymers
INFO:polymer_utils.representation:Acquiring structure and monomer files for bisphenolA from compatible_pdbs/simple_polymers
INFO:polymer_utils.representation:Acquiring structure and monomer files for polymethylketone from compatible_pdbs/simple_polymers
INFO:polymer_utils.representation:Acquiring structure and monomer files for polypyrrole from compatible_pdbs/simple_polymers
INFO:polymer_utils.representation:Acquiring structure and monomer files for polyphenylenesulfone from compatible_pdbs/simple_polymers
INFO:polymer_utils.representation:Acquiring structure and monomer files for naturalrubber from compatible_pdbs/simple_polymers
INFO:polymer_utils.representation:Acquiring structure and monomer files for polythiophene from compatib

PolymerDir(parent_dir=PosixPath('Polymers/polyvinylchloride'), mol_name='polyvinylchloride', solvent=None, exclusion=Quantity(value=1, unit=nanometer), charge_method=None, ff_file=None, monomer_file=PosixPath('Polymers/polyvinylchloride/polyvinylchloride/monomers/polyvinylchloride.json'), monomer_file_chgd=None, structure_file=PosixPath('Polymers/polyvinylchloride/polyvinylchloride/structures/polyvinylchloride.pdb'), structure_files_chgd={}, _off_topology=None, _offmol=None)
PolymerDir(parent_dir=PosixPath('Polymers/polyvinylchloride'), mol_name='polyvinylchloride_solv_water', solvent=Solvent(name='water', formula='H2O', smarts='[#1:1]-[#8:3]-[#1:2]', density=Quantity(value=0.997, unit=gram/(centimeter**3)), MW=Quantity(value=18.015, unit=gram/mole), charges={'1': 0.417, '2': 0.417, '3': -0.834}, structure_file=PosixPath('/home/timber/Documents/Python/openff-workspace/polymer_workspace/polymer_utils/solvation/solvents/WATER_TIP3P/water.pdb'), forcefield_file=PosixPath('/home/timber/Doc

## Charge and sim loop V2

In [4]:
# DEFINE TARGET MOLECULES AND FORCEFIELD
# sample_mols = valid_mols
sample_mols = ['polyvinylchloride_solv_water']#, 'naturalrubber_solv_water', ]
main_ff_xml = CORE_PATH/'force_fields'/'openff_unconstrained-2.0.0.offxml'
averaging_charge_method  = 'ABE10_exact' # 'Espaloma_AM1BCC'
sim_charge_method = 'ABE10_averaged'

# CHARGING / SIM LOOP BEHAVIOR
overwrite_ff_xml     = True
overwrite_chg_json   = True
distrib_mono_charges = True

run_sims = True
strict   = True
verbose  = False

# SIMULATION PARAMETERS 
temperature = 300 * kelvin
friction_coeff = 1/picosecond

sim_time = 0.001 * nanosecond  #5 * nanosecond 
timestep = 1 * femtosecond
num_samples = 100 # 2_000

# AUXILIARY PRE-FLIGHT CALCULATIONS
sample_dirs = {
    mol_name : mgr.mol_dirs[mol_name] # TOSELF : deliberately not using .get() so this raises an easier-to-debug KeyError
        for mol_name in sample_mols
}
action_str = f'Charging{" & simulation" if run_sims else ""}'

if averaging_charge_method == 'ABE10_averaged':
    raise ValueError('Charge averaging must be performed over a non-averaged (i.e. pure) set of charges')

num_steps   = round(sim_time / timestep)
record_freq = round(num_steps / num_samples)
num_mols = len(sample_dirs)
print(num_steps, record_freq)

1000 10


In [5]:
# BEGIN CHARGING / SIM LOOP - Perform charge averaging on all target molecules which don't already have averaged LCs; Load forcefield for those which already do 
main_logger = logging.getLogger(__name__)
loggers = [main_logger, polylogger] #, chg_logger]
main_log_handler = config_mlf_handler(mgr.log_dir/f'Polymer_battery_{general.timestamp_now()}.log', loggers, writemode='a')

main_logger.info(f'Beginning {action_str} loop...\n')
for i, (mol_name, mol_dir) in enumerate(sample_dirs.items()):
    
    # 0) LOAD MOLECULE AND TOPOLOGY, ATTEMPT TO APPLY LIBRARY CHARGES
    start_time = datetime.now()
    main_logger.info(f'Current molecule: "{mol_name}" ({i + 1}/{num_mols})') # +1 converts to more human-readable 1-index for step count
    polymer_log_handler = config_mlf_handler(mol_dir.logs/f'{general.timestamp_now()}.log', loggers, writemode='w') # NOTE : order matters, initial main logger call above should not record to local polymer log
    if not mol_dir.has_monomer_data:
        raise FileExistsError(f'No monomer JSONs found for {mol_name}')

    # 1) ENSURING AM1-BBC-CHARGED (UNAVERAGED) SDF FILES EXIST - WILL RECHARGE AND REGENERATE IF NONE EXIST
    try:
        cmols = {}
        for chg_method in ('Espaloma_AM1BCC', 'ABE10_exact'):
            if all(chg_method in reg_dict for reg_dict in (mol_dir.charges, mol_dir.structure_files_chgd)): # if charges and charge Molecule SDFs already exist for the current method
                main_logger.info(f'(1-precheck) Found existing pure charged molecule for {chg_method}')
                cmol = charging.application.load_matched_charged_molecule(mol_dir.structure_files_chgd[chg_method])
            else:
                main_logger.warning(f'(1-needs gen) No existing pure molecule charges found, recharging via {chg_method}')
                chgr = charging.application.CHARGER_REGISTRY[chg_method]()
                cmol, sdf_path = mol_dir.charge_and_save_molecule(chgr, strict=strict, verbose=verbose, chgd_monomers=False, topo_only=True)

            cmols[chg_method] = cmol
    except ConformerGenerationError:
        main_logger.error('Could not successfully generate conformers\n')
        continue 
    main_logger.info(f'(1) Acquired all pure charged molecules')
    
    # 2) CREATE JSON AND SDF WITH AVERAGED CHARGES IF ONE DOES NOT ALREADY EXIST
    avg_method = charging.application.ABE10AverageCharger.TAG
    if all(avg_method in reg_dict for reg_dict in (mol_dir.charges, mol_dir.structure_files_chgd)): # if charges and charge Molecule SDFs already exist for the current method
        main_logger.info(f'(2-precheck) Found existing monomer-averaged charged molecule')
        cmol_avgd = charging.application.load_matched_charged_molecule(mol_dir.structure_files_chgd[avg_method])
    else:
        main_logger.warning('(2.1-needs gen) No existing monomer-averaged molecule charges found, re-averaging')

        main_logger.info(f'Averaging charges over {mol_dir.mol_name} residues')
        cmol = cmols[averaging_charge_method]
        avgd_res, atom_id_mapping = charging.averaging.get_averaged_charges(cmol, monomer_data=mol_dir.monomer_data, distrib_mono_charges=distrib_mono_charges)
        residue_charges = {avgd_res.residue_name : avgd_res.charges for avgd_res in avgd_res}
        
        main_logger.info(f'Generating charged SDF for monomer-averaged charges')
        avg_chgr = charging.application.ABE10AverageCharger()
        avg_chgr.set_residue_charges(residue_charges)
        cmol_avgd, sdf_path = mol_dir.charge_and_save_molecule(avg_chgr, strict=strict, verbose=verbose, chgd_monomers=False, topo_only=True)
        main_logger.info(f'Monomer-averaged charging completed')
    
        if (mol_dir.monomer_file_chgd is None) or overwrite_chg_json: # can only reach this branch if a json is present but isn't identified as charged within the PolymerDir
            main_logger.warning('(2.2-needs gen) Generating new charged JSON monomer file')
            mol_dir.create_charged_monomer_file(residue_charges)

    cmols[avg_method] = cmol_avgd
    monomer_data = mol_dir.monomer_data_charged # double check that the charged data is in fact loadable
    main_logger.info('(2) Acquired charge-averaged monomer JSON')

    # 3) CREATE FORCE FIELD XML WITH MONOMER-BASED LIBRARY CHARGE ENTRIES
    if (mol_dir.ff_file is None) or overwrite_ff_xml: # can only reach if a charged monomer json already exists
        main_logger.warning('(3-needs gen) Generating new Force Field XML with Library Charges')
        forcefield = mol_dir.create_FF_file(xml_src=main_ff_xml)
    main_logger.info('(3) Acquired Force Field file with Library Charges')

    # 4) RUN OpenMM SIMULATION FOR TARGET MOLECULE
    if run_sims:
        main_logger.info('(4) Preparing simulation')
        output_folder = mol_dir.make_res_dir()
        sim_log_handler = config_mlf_handler(output_folder/f'{mol_dir.mol_name} simulation.log', loggers)

        main_logger.info('Loading Topology')
        off_topology = mol_dir.off_topology
        off_topology.box_vectors = mol_dir.box_vectors.in_units_of(nanometer) # set box vector to allow for periodic simulation (will be non-periodic if mol_dir box vectors are unset i.e. NoneType)

        main_logger.info(f'Loading Molecule (assigning partial charges according to {sim_charge_method})')
        mol_dir.assign_charges_by_lookup(sim_charge_method)
        cmol = mol_dir.offmol # caches structure for load on subsequent sessions
        
        main_logger.info('Loading Force Field')
        forcefield = ForceField(mol_dir.ff_file, allow_cosmetic_attributes=True)

        main_logger.info('Creating Simulation from Interchange')
        interchange = Interchange.from_smirnoff(force_field=forcefield, topology=off_topology, charge_from_molecules=[cmol]) # generate Interchange with new library charges prior to writing to file
        integrator  = LangevinMiddleIntegrator(temperature, friction_coeff, timestep)
        sim = polysim.create_simulation(interchange, integrator)
        
        main_logger.info(f'Running {sim_time} OpenMM sim at {temperature} for {num_steps} steps')
        polysim.run_simulation(sim, output_folder=output_folder, output_name=mol_name, num_steps=num_steps, record_freq=record_freq)

        sim_log_handler.remove_from_loggers(*loggers)  
        # filetree.startfile(output_folder)
    
    proc_time = str(datetime.now() - start_time)
    main_logger.info(f'Successfully completed actions on {mol_name} in {proc_time}\n')
    # clear_output() # for Jupyter notebooks only, can freely comment this out
    polymer_log_handler.remove_from_loggers(*loggers)  

main_logger.info(f'{action_str} loop completed')
main_log_handler.remove_from_loggers(*loggers)
filetree.startfile(mgr.log_dir)

INFO:__main__:Beginning Charging & simulation loop...

INFO:__main__:Current molecule: "polyvinylchloride_solv_water" (1/1)
INFO:polymer_utils.representation:Loading OpenFF Topology with monomer graph match
INFO:polymer_utils.representation:Generating pure charges for polyvinylchloride_solv_water via the "Espaloma_AM1BCC" method
INFO:polymer_utils.representation:Successfully assigned charges via Espaloma_AM1BCC
INFO:polymer_utils.representation:Wrote polyvinylchloride_solv_water Molecule with Espaloma_AM1BCC charges to sdf file
INFO:polymer_utils.representation:Loading OpenFF Topology with monomer graph match
INFO:polymer_utils.representation:Generating pure charges for polyvinylchloride_solv_water via the "ABE10_exact" method
Problematic atoms are:
Atom atomic num: 6, name: , idx: 0, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 1, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 1, name: , idx

<Popen: returncode: None args: ['xdg-open', PosixPath('Polymers/Logs')]>