In [1]:
# Custom Imports
from polysaccharide import analysis, filetree, general, logutils, molutils
from polysaccharide import representation as rep

from polysaccharide import LOGGERS_MASTER
from polysaccharide.representation import Polymer, PolymerManager
from polysaccharide.solvation.solvents import WATER_TIP3P
from polysaccharide.molutils.rdmol import rdkdraw
from polysaccharide.charging.application import CHARGER_REGISTRY, ChargingParameters
from polysaccharide.simulation import SimulationPaths, SimulationParameters
from polysaccharide.analysis import plotprops, trajectory

# Generic Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty
from openmm.unit import Unit, Quantity

# File I/O
from pathlib import Path
import csv, json, pickle
from shutil import copyfile, rmtree
import importlib.resources as impres

# Logging and Shell
import logging
logging.basicConfig(level=logging.INFO)
                            
# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdmolfiles

# Molecular Dynamics
from openff.interchange import Interchange
from openff.toolkit import ForceField
from openff.toolkit.topology import Topology
from openff.toolkit.topology.molecule import Molecule, Atom
from openff.toolkit.typing.engines.smirnoff.parameters import LibraryChargeHandler

from openff.units import unit
from openmm.unit import picosecond, femtosecond, nanosecond # time
from openmm.unit import nanometer, angstrom # length
from openmm.unit import kelvin, atmosphere # misc

# Static Paths
RESOURCE_PATH = Path('resources')
COLL_PATH = Path('Collections')
# COMPAT_PDB_PATH = Path('compatible_pdbs')
COMPAT_PDB_PATH = Path('compatible_pdbs_updated')



## Configuring and (re)loading polymers, setting solvents, checking validity

In [2]:
reset      = False #True
purge_sims = False #True 
purge_logs = False #True

# poly_source_path = COMPAT_PDB_PATH / 'simple_polymers'
# poly_source_path = COMPAT_PDB_PATH / 'water_soluble_reduced'
poly_source_path = COMPAT_PDB_PATH / 'water_soluble_large'
# solv_template    = RESOURCE_PATH/'inp_templates'/'solv_polymer_template_box.inp'
solv_template = impres.path('resources.inp_templates', 'solv_polymer_template_box.inp')
desired_solvents = (WATER_TIP3P,) # (None,)
exclusion = 1.0*nanometer

# Define derived paths and create manager
collection_path  = COLL_PATH / poly_source_path.name
structure_path   = poly_source_path / f'{poly_source_path.name}_structures'
monomer_path     = poly_source_path / f'{poly_source_path.name}_monomers'

mgr = PolymerManager(collection_path)

# Perform manager setup / purge actions
if purge_logs: # NOTE : must be done BEFORE log FileHandler is created, as this will destroy it's output as well
    mgr.purge_logs(really=True)

creation_logger = logging.getLogger('polymer_setup')
loggers = [creation_logger, *LOGGERS_MASTER]

with logutils.ProcessLogHandler(filedir=mgr.log_dir, loggers=loggers, proc_name=f'Setup of {mgr.collection_dir.name}', timestamp=True):
    if reset:
        mgr.purge_collection(really=True, purge_logs=False) # Explicitly DON'T purge logs here (will be done prior to entering log loop)

    if purge_sims:
        mgr.purge_sims(really=True)

    if not mgr.polymers: # will be empty if not yet instantiated or if reset prior
        mgr.populate_collection(struct_dir=structure_path, monomer_dir=monomer_path)
        mgr.solvate_collection(desired_solvents, template_path=solv_template, exclusion=exclusion)

INFO:8766623464430:Setup of water_soluble_large completed in 0:00:00.000012



## Charge and sim loop V2

In [3]:
# Selecting subset of molecules which is suitable for ABE10 charging and subsequent simulation
HARD_POLYMERS = ['vulcanizedrubber', 'polyphenylenesulfone', 'polyethylene', 'polyphenyleneI'] # pathological or otherwise difficult-to-run polymers that I've encountered
EASY_POLYMERS = ['polyvinylchloride_solv_water', 'polyethylmethacrylate_solv_water'] + ['naturalrubber_solv_water', 'PEO_PLGA_solv_water'] # the quickest-to-charge and -to-simulate polymers in the simple_polymers sample set

desired_solvents = (None,)
whitelisted  = rep.filter_factory_by_attr(attr_name='base_mol_name', condition=lambda name : name in HARD_POLYMERS, inclusive=False)
good_solvent = rep.filter_factory_by_attr(attr_name='solvent', condition=lambda solv : solv in desired_solvents)

# filters : tuple[rep.MolFilter] = (good_solvent, rep.has_monomers, rep.is_AM1_sized)
filters : tuple[rep.MolFilter] = (whitelisted, good_solvent, rep.has_monomers, rep.is_AM1_sized)
# filters : tuple[rep.MolFilter] = (whitelisted, good_solvent, rep.has_monomers) # for large molecules
valid_mols = mgr.filtered_by(filters)

# display to check that loading has gone as planned
print(valid_mols.keys())

dict_keys(['peg_modified'])


In [6]:
# DEFINE TARGET MOLECULES AND FORCEFIELD
# sample_mols = EASY_POLYMERS
# sample_mols = valid_mols
# sample_mols = ['polyvinylchloride_solv_water']
sample_mols = ['pnipam_modified_solv_water']

to_sample = rep.filter_factory_by_attr(attr_name='mol_name', condition=lambda name : name in sample_mols)
sample_dirs = mgr.filtered_by(to_sample)

# CHARGING PARAMETERS
perform_charging = False #True
chg_params_path = RESOURCE_PATH / 'chg_templates' / 'standard_chg_params.json'
# chg_params_path = RESOURCE_PATH / 'chg_templates' / 'long_chain_chg_params.json'
# chg_params_path = RESOURCE_PATH / 'chg_templates' / 'standard_chg_params_rewrite.json'
chg_params = ChargingParameters.from_file(chg_params_path)

# SIMULATION PARAMETERS
run_sims = True
# sim_param_path = RESOURCE_PATH / 'sim_templates' / 'debug_sim.json'
# sim_param_path = RESOURCE_PATH / 'sim_templates' / 'debug_sim_dcd.json'
sim_param_path = RESOURCE_PATH / 'sim_templates' / 'debug_sim_dcd_ABE_avg.json'
# sim_param_path = RESOURCE_PATH / 'sim_templates' / 'half_standard_sim.json'
# sim_param_path = RESOURCE_PATH / 'sim_templates' / 'standard_sim.json'
sim_params = SimulationParameters.from_file(sim_param_path)

# ANALYSIS PARAMETERS
perform_analysis = True
traj_sample_interval = 1 # interval between consecutive time points for trajectory analysis (needed for longer sims)

# PRE-FLIGHT CALCULATIONS
if chg_params.averaging_charge_method == 'ABE10_averaged':
    raise ValueError('Charge averaging must be performed over a non-averaged (i.e. pure) set of charges')

step_names = {
    'charging' : perform_charging,
    'simulation' : run_sims,
    'analysis' : perform_analysis
}
action_str = ' & '.join([
    step_name
        for step_name, perform_step in step_names.items()
            if perform_step
])

print(chg_params)
print(sim_params)
print(sim_params.num_steps, sim_params.record_freq, action_str)
print(sample_dirs.keys())

ChargingParameters(overwrite_ff_xml=False, overwrite_chg_mono=False, base_ff_path=PosixPath('resources/force_fields/openff_constrained-2.0.0.offxml'), charge_methods=['ABE10_exact', 'Espaloma_AM1BCC', 'ABE10_averaged'], averaging_charge_method='ABE10_exact')
SimulationParameters(total_time=Quantity(value=1, unit=picosecond), num_samples=500, charge_method='ABE10_averaged', report_to_pdb=False, reported_state_data={'step': True, 'time': True, 'potentialEnergy': True, 'kineticEnergy': True, 'totalEnergy': True, 'temperature': True, 'volume': True, 'density': True, 'progress': False, 'remainingTime': False, 'speed': True, 'elapsedTime': True}, timestep=Quantity(value=1, unit=femtosecond), temperature=Quantity(value=300, unit=kelvin), pressure=Quantity(value=1, unit=atmosphere), friction_coeff=Quantity(value=1, unit=/picosecond), barostat_freq=1)
1000 2 simulation & analysis
dict_keys(['pnipam_modified_solv_water'])


In [7]:
# Functions for performing primary charging, sim, and analysis actions
def obtain_partial_charges(polymer : Polymer, main_logger : logging.Logger, chg_params : ChargingParameters) -> None:
    '''Ensure a Polymer has all partial charge sets'''
    # 0) LOAD MOLECULE AND TOPOLOGY, ATTEMPT TO APPLY LIBRARY CHARGES
    if not polymer.has_monomer_data:
        raise FileExistsError(f'No monomer JSONs found for {polymer.mol_name}')

    # 1) ENSURING CHARGES AND RELATED FILES FOR ALL CHARGING METHODS EXIST
    for chg_method in chg_params.charge_methods:
        chgr = CHARGER_REGISTRY[chg_method]()
        if chg_method == 'ABE10_averaged': # !NOTE! - critical that this not be the first key in the registry (has nothing to average over from scratch)
            residue_charges = polymer.residue_charges(
                averaging_charge_method=chg_params.averaging_charge_method,
                overwrite_charged_monomer_file=chg_params.overwrite_chg_mono
            )
            chgr.set_residue_charges(residue_charges)
        polymer.assert_charges_for(chgr, return_cmol=False)

    if (polymer.ff_file is None) or chg_params.overwrite_ff_xml: # can only reach if a charged monomer json already exists
        main_logger.info('Acquiring Force Field file with Library Charges')
        forcefield, lib_chgs = polymer.create_FF_file(xml_src=chg_params.base_ff_path, return_lib_chgs=True)

def perform_prop_analysis(polymer : Polymer, main_logger : logging.Logger, traj_sample_interval : int=1) -> None:
    '''Analyze trajectories to obtain polymer property data'''
    # aqcuire files for all information
    for sim_dir, sim_paths_file in polymer.simulation_paths.items():
        sim_paths, sim_params = polymer.load_sim_paths_and_params(sim_dir)

        if sim_dir == polymer.newest_sim_dir: # only attempt to load compressed binary trajectories
            main_logger.info(f'Found most recent trajectory {sim_paths.trajectory}')
            state_data = pd.read_csv(sim_paths.state_data)
            traj = trajectory.load_traj(sim_paths.trajectory, topo_path=polymer.structure_file, sample_interval=traj_sample_interval, remove_solvent=True)

            # save and plot RDF data
            main_logger.info('Calculating pairwise radial distribution functions')
            rdf_dataframe = trajectory.acquire_rdfs(traj, max_rad=1.0*nanometer)
            rdf_save_path = sim_dir/'rdfs.csv'
            sim_paths.spatial_data = rdf_save_path
            rdf_dataframe.to_csv(rdf_save_path, index=False)

            # save and plot property data
            main_logger.info('Calculating polymer shape properties')
            prop_dataframe = trajectory.acquire_time_props(traj, time_points=sim_params.time_points[::traj_sample_interval]) 
            prop_save_path = sim_dir/'time_series.csv'
            sim_paths.time_data = prop_save_path
            prop_dataframe.to_csv(prop_save_path, index=False)

            sim_paths.to_file(sim_paths_file) # update references to analyzed data files in path file
            main_logger.info(f'Successfully exported trajectory analysis data')

# DEFINE MAIN BODY FUNCTION WITH LOGGING
main_logger = logging.getLogger(__name__)
loggers = [main_logger, *LOGGERS_MASTER] # loggers from all modules which produce logging output

@mgr.logging_wrapper(loggers=loggers, proc_name=action_str.capitalize(), filters=to_sample)
def run_actions(polymer : Polymer, main_logger) -> None:
    if perform_charging:
        main_logger.info('(1) Acquiring partial charges')
        obtain_partial_charges(polymer, main_logger, chg_params)
        main_logger.info('') # log empty line to give some breathing room

    if run_sims:
        main_logger.info('(2) Preparing simulation')
        polymer.run_simulation(sim_params, ensemble='NPT')
        main_logger.info('') # log empty line to give some breathing room
    
    if perform_analysis:
        main_logger.info('(3) Analyzing trajectories')
        perform_prop_analysis(polymer, main_logger, traj_sample_interval)
        main_logger.info('') # log empty line to give some breathing room

run_actions(main_logger)

INFO:polysaccharide.representation:Current molecule: "pnipam_modified_solv_water" (1/1)
INFO:__main__:(2) Preparing simulation
INFO:polysaccharide.representation:Created new Simulation directory "05-11-2023_at_14-33-51_PM"
INFO:polysaccharide.representation:Loading OpenFF Topology WITH monomer graph match
INFO:polysaccharide.representation:Assigning charges from ABE10_averaged to pnipam_modified_solv_water's OpenFF Molecule
INFO:polysaccharide.representation:Creating SMIRNOFF Interchange for "pnipam_modified_solv_water"
INFO:polysaccharide.simulation:Creating OpenMM Simulation from Interchange
INFO:polysaccharide.simulation:Created NPT Simulation with Langevin Thermostat and Monte Carlo Barostat
INFO:polysaccharide.simulation:Created simulation files at Collections/water_soluble_large/pnipam_modified_solv_water/MD/05-11-2023_at_14-33-51_PM/pnipam_modified_solv_water_sim_paths.json
INFO:polysaccharide.simulation:Saving simulation checkpoint at Collections/water_soluble_large/pnipam_modi