In [None]:
# Custom Imports
from polymer_utils import analysis, general, filetree
from polymer_utils import simulation as polysim

from polymer_utils import charging
from polymer_utils.charging.types import AtomIDMap, ResidueChargeMap
from polymer_utils.charging.residues import ChargedResidue

from polymer_utils.representation import Polymer, PolymerManager
from polymer_utils.representation import LOGGER as polylogger

from polymer_utils.molutils import building
from polymer_utils.molutils.rdmol import rdcompare, fragment

from polymer_utils.solvation.solvents import WATER_TIP3P
from polymer_utils.analysis import plotprops, trajectory
from polymer_utils.logutils import config_mlf_handler, MultiStreamFileHandler
from polymer_utils.graphics import rdkdraw

# General Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL
from PIL.Image import Image # for typing
from datetime import datetime
from operator import mul, xor

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty
from openmm.unit import Unit, Quantity

# File I/O
from pathlib import Path
import csv, json, pickle
from shutil import copyfile, rmtree, move

# Logging and Shell
from IPython.display import clear_output
import subprocess
import logging
# logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
                            
# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdmolfiles

# Molecular Dynamics
from openff.units import unit
from openff.interchange import Interchange

from openff.toolkit import ForceField
from openff.toolkit.topology import Topology
from openff.toolkit.topology.molecule import Molecule, Atom
from openff.toolkit.typing.engines.smirnoff.parameters import LibraryChargeHandler

from openmm.openmm import MonteCarloBarostat
from openff.toolkit.utils.exceptions import ConformerGenerationError
from openff.toolkit.utils.toolkits import RDKitToolkitWrapper, OpenEyeToolkitWrapper, AmberToolsToolkitWrapper

from openmm import LangevinMiddleIntegrator, Context
from openmm.vec3 import Vec3
from openmm.app import Simulation, PDBReporter, StateDataReporter

from openmm.unit import picosecond, femtosecond, nanosecond # time
from openmm.unit import nanometer, angstrom # length
from openmm.unit import kelvin, atmosphere # misc

# Static Paths
RESOURCE_PATH = Path('resources')
COLL_PATH = Path('Collections')
COMPAT_PDB_PATH = Path('compatible_pdbs_updated')

## Loading Polymer Manager

In [None]:
reset      = False
purge_sims = False 
purge_logs = False

# Static Paths
RESOURCE_PATH = Path('resources')
COLL_PATH = Path('Collections')
COMPAT_PDB_PATH = Path('compatible_pdbs_updated')
# COMPAT_PDB_PATH = Path('compatible_pdbs')

# Paths to structure files
poly_source_path = COMPAT_PDB_PATH / 'water_soluble_polymers'
# poly_source_path = COMPAT_PDB_PATH / 'simple_polymers_updated'
# poly_source_path = COMPAT_PDB_PATH / 'water_soluble_polymers'
solv_template    = RESOURCE_PATH/'inp_templates'/'solv_polymer_template_box.inp'
desired_solvents = (None,) # (WATER_TIP3P,) # 
exclusion = 1.0*nanometer

# Define derived paths and create manager
# collection_path  = COLL_PATH / poly_source_path.name
collection_path  = COLL_PATH / f'{poly_source_path.name}_new'
structure_path   = poly_source_path / f'{poly_source_path.name}_structures'
monomer_path     = poly_source_path / f'{poly_source_path.name}_monomers'

mgr = PolymerManager(collection_path)

# Perform manager setup / purge actions
if purge_logs: # NOTE : must be done BEFORE log FileHandler is created, as this will destroy it's output as well
    mgr.purge_logs(really=True)

creation_logger = logging.getLogger('polymer_setup')
logfile_path = mgr.log_dir/f'Setup_{general.timestamp_now()}.log'

with MultiStreamFileHandler(logfile_path, loggers=[creation_logger, polylogger], proc_name=f'Creation of collection "{mgr.collection_dir.name}"'):
    if reset:
        mgr.purge_collection(really=True, purge_logs=False) # Explicitly DON'T purge logs here (will be done prior to entering log loop)

    if purge_sims:
        mgr.purge_sims(really=True)

    if not mgr.polymers: # will be empty if not yet instantiated or if reset prior
        mgr.populate_collection(struct_dir=structure_path, monomer_dir=monomer_path)
        mgr.solvate_collection(desired_solvents, template_path=solv_template, exclusion=exclusion)

## Load trajectory and calculate/plot properties

In [None]:
POLY_PATH = Path('Collections/simple_polymers_updated')
SAMPLE_INTERVAL = 5 # interval between consecutively read time step samples (e.g. 1 would read all steps, 2 every other, etc.)

mgr = PolymerManager(POLY_PATH)
pdir = mgr.polymers['naturalrubber_solv_water']

if not pdir.chrono_sims:
    raise ValueError(f'No Simulation results found for {pdir.mol_name}')

sim_dir_to_use = pdir.newest_sim_dir
print(sim_dir_to_use)   

In [None]:
# look up and load in relevant simulation data from the chosen directory
sim_params_path = next(sim_dir_to_use.glob('*_parameters.json'))
state_data_path = next(sim_dir_to_use.glob('*_data.csv'))
sim_params = SimulationParameters.from_file(sim_params_path)
state_data = pd.read_csv(state_data_path)

# load simulation frames into an analyzable trajectory
traj_path = next(sim_dir_to_use.glob('*_traj.pdb'))
traj = trajectory.load_traj(traj_path, topo_path=pdir.structure_file, sample_interval=SAMPLE_INTERVAL, remove_solvent=True)
topo = traj.topology


In [None]:

# save and plot RDF data
rdf_dataframe = trajectory.acquire_rdfs(traj, max_rad=1.0*nanometer)
rdf_dataframe.to_csv(sim_dir_to_use/'rdfs.csv')
rdf_fig, rdf_ax = plotprops.plot_rdfs(rdf_dataframe, scale=15.0)
rdf_fig.suptitle(f'Pairwise Radial Distribution Functions - {pdir.mol_name}')
rdf_fig.savefig(sim_dir_to_use/f'RDFs.png', bbox_inches='tight')
plt.close()

# save and plot property data
prop_dataframe = trajectory.acquire_time_props(traj, properties=analysis.DEFAULT_PROPS, time_points=sim_params.time_points[::SAMPLE_INTERVAL]) 
prop_dataframe.to_csv(sim_dir_to_use/'time_series.csv')
prop_fig, prop_ax = plotprops.plot_time_props(prop_dataframe, scale=18.0)
prop_fig.suptitle(f'Polymer Shape Properties - {pdir.mol_name}')
prop_fig.savefig(sim_dir_to_use/f'shape_props.png', bbox_inches='tight')
plt.close()

filetree.startfile(sim_dir_to_use)

## Building reduced structures for Colina Polymers

In [None]:
chain_lim = 180

# load source polymers
COMPAT_PDB_PATH = Path('compatible_pdbs_updated')
poly_source_path = COMPAT_PDB_PATH / 'water_soluble_polymers'
collection_path  = COLL_PATH / 'water_soluble_small'

structure_path   = poly_source_path / f'{poly_source_path.name}_structures'
monomer_path     = poly_source_path / f'{poly_source_path.name}_monomers'

mgr = PolymerManager(collection_path)
if not mgr.polymers: 
    mgr.populate_collection(struct_dir=structure_path, monomer_dir=monomer_path) # ensure originals have been loaded

# create output dirs
reduced_dir = COMPAT_PDB_PATH / 'water_soluble_small'
reduced_structures = reduced_dir / f'{reduced_dir.name}_structures'
reduced_monomers   = reduced_dir / f'{reduced_dir.name}_monomers'

reduced_dir.mkdir(       exist_ok=True)
reduced_monomers.mkdir(  exist_ok=True)
reduced_structures.mkdir(exist_ok=True)

# generate new structures
reverse = False # needed
for pdir in mgr.polymers_list:
    print(pdir.mol_name)
    monomers = pdir.monomer_data['monomers']
    if pdir.mol_name == 'paam_modified':
        monomers.pop('paam_SPECIAL_TERM')
        reverse = True

    chain = building.build_linear_polymer_limited(monomers, max_chain_len=chain_lim, reverse_term_labels=reverse)
    chain.save(str(reduced_structures/f'{pdir.mol_name}.pdb'), overwrite=True)
    copyfile(pdir.monomer_file, reduced_monomers/f'{pdir.mol_name}.json')

## Organizing pdb structure and json monomer files and taking inventories of files present

In [None]:
from shutil import move

PARENT = Path('compatible_pdbs_updated')
JSON_SRC = PARENT / 'json_files'

def organize_struct_mono_files(dump_dir : Path, mono_src : Path):
    '''For organizing structure and monomer files in a dump directory '''
    for dir in dump_dir.iterdir():
        if dir.is_dir() and (dir != mono_src): # to encompass the possibility that the json dir is in fact inside the dump folder
            struct_dir = dir/f'{dir.name}_structures'
            mono_dir = dir/f'{dir.name}_monomers'

            struct_dir.mkdir(exist_ok=True)
            mono_dir.mkdir(exist_ok=True)

            data_rows = []
            for pdb in dir.glob('*.pdb'):
                json_path = mono_src/f'{pdb.stem}.json'
                row_dict = {
                    'Species' : pdb.stem,
                    'PDB present' : True,
                    'JSON present' : json_path.exists()
                }

                move(pdb, struct_dir)
                if row_dict['JSON present']:
                    move(json_path, mono_dir)
                data_rows.append(row_dict)
            
            inventory = pd.DataFrame(data_rows)
            inventory.to_csv(dir/f'{dir.name}_inventory.csv')

organize_struct_mono_files(PARENT, JSON_SRC)

## Testing Polymer building and from-monomer calculations

In [None]:
import copy

mol_name = 'peg_modified' 
# mol_name = 'pnipam_modified' 
# mol_name = 'paam_modified'

pdir = mgr.polymers[mol_name]
offmol = pdir.offmol_matched(strict=False, verbose=False)
rdmol = offmol.to_rdkit()

# Labelled all unmatched atoms
for i, atom in enumerate(offmol.atoms):
    rdatom = rdmol.GetAtomWithIdx(i)
    rdatom.SetProp('atomNote', atom.metadata.get('residue_name', 'unmatched'))
print('Matched residues : ', set(atom.metadata.get('residue_name') for atom in offmol.atoms))

# Bounding monomer sizes 
max_mono_size = max(Chem.MolFromSmarts(SMARTS).GetNumAtoms() for SMARTS in pdir.monomer_data['monomers'].values())
print(max_mono_size)

# confirming order match with OpenFF
for atom in rdmol.GetAtoms():
    if atom.GetSymbol() != offmol.atoms[atom.GetIdx()].symbol:
        print(f'{atom} mismatched')
else:
    print('IDs match with offmol!')

rdkdraw.set_rdkdraw_size(800, 1/1)
rdmol

In [None]:
# show monomers
rdkdraw.set_rdkdraw_size(300, 2/1)
for mono_name, mono_SMARTS in pdir.monomer_data['monomers'].items():
    monomer = Chem.MolFromSmarts(mono_SMARTS) 
    print(mono_name, monomer.GetNumAtoms(), mono_SMARTS, monomer.GetNumAtoms())
    display(monomer)

In [None]:
for pdir in mgr.polymers_list:
    if pdir.has_monomer_data:
        mono = pdir.monomer_data['monomers']
        print(pdir.mol_name, building.count_middle_and_term_mono(mono), '\n\tLinear : ', building.is_linear_polymer(mono), '\n\tHomo : ', building.is_homopolymer(mono))

In [None]:
from collections import defaultdict

solvent = None
DOP_min, DOP_max = 3, 10

# testing that chain length errors (if any) are independent of DOP
res = []
logs = defaultdict(list)
for i, polymer in enumerate(mgr.polymers_list):
    if polymer.solvent == solvent:
        print(polymer.mol_name)
        try:
            mono_structs = polymer.monomer_data['monomers']
            if polymer.mol_name == 'paam_modified':
                mono_structs.pop('paam_SPECIAL_TERM')

            print(f'{len(mono_structs)} monomers in chain')

            dop_errors = []
            for DOP in range(DOP_min, DOP_max):
                n_atoms_pred = building.estimate_chain_len(mono_structs, DOP=DOP)
                chain = building.build_linear_polymer(mono_structs, DOP=DOP, add_Hs=False)
                n_atoms_real = chain.n_particles
                print(n_atoms_real, n_atoms_pred)
                dop_errors.append(n_atoms_real - n_atoms_pred)
            res.append(dop_errors)
            logs['Success'].append(polymer.mol_name)

        except Exception as e:
            print(e)
            logs[type(e).__name__].append(polymer.mol_name)

res = np.array(res)            
plt.imshow(res)

## Testing heatmapping drawing

In [None]:
cmap = plt.get_cmap('turbo')
# pdir = mgr.polymers['polythiophene_solv_water']
pdir = mgr.polymers['polyethylmethacrylate_solv_water']

dim = 10
aspect = 4/1
annotate = False

for cvtr_type in ('InChI', 'SMARTS', 'CXSMARTS'):
    fig, ax = pdir.compare_charges('ABE10_exact', 'Espaloma_AM1BCC', cmap, annotate=annotate, precision=5, converter=cvtr_type)
    fig.set_size_inches(dim, dim * aspect)

In [None]:
from rdkit.Chem.Draw import SimilarityMaps


cvtr = 'InChI'

offmol1 = pdir.charged_offmol_from_sdf('ABE10_exact')
rdmol1 = offmol1.to_rdkit()
flatmol1 = rdcompare.flattened_rdmol(rdmol1, converter=cvtr)

offmol2 = pdir.charged_offmol_from_sdf('Espaloma_AM1BCC')
rdmol2 = offmol2.to_rdkit()
flatmol2 = rdcompare.flattened_rdmol(rdmol2, converter=cvtr)

diff = rdcompare.difference_rdmol(flatmol1, flatmol2, prop='PartialCharge', remove_map_nums=True)
deltas = [diff.GetAtomWithIdx(i).GetDoubleProp('DeltaPartialCharge') for i in range(diff.GetNumAtoms())]
fig = SimilarityMaps.GetSimilarityMapFromWeights(diff, deltas, colorMap='jet', contourLines=10, alpha=0.3)
plt.savefig('test.png')

In [None]:
# PIL.Image.frombytes('RGBA', fig.canvas.get_width_height(), fig.canvas.tostring_argb())

n = 250
img = PIL.Image.frombytes('RGB', (n, n), fig.canvas.tostring_rgb())
display(img)

In [None]:
pdir = mgr.polymers['polyvinylchloride_solv_water']

fig1, ax1 = pdir.compare_charges('ABE10_exact', 'Espaloma_AM1BCC', cmap=plt.get_cmap('turbo'), converter='InChI')
fig2, ax2 = pdir.compare_charges('ABE10_exact', 'Espaloma_AM1BCC', cmap=plt.get_cmap('turbo'), converter='SMARTS')

ax1.set_title(pdir.mol_name)
ax2.set_title(pdir.mol_name)

## Generating heatmaps for all completed simulation in the Manager

In [None]:
charge_methods = ('ABE10_exact', 'Espaloma_AM1BCC')
outdir = Path('pcharge_heatmaps')/mgr.collection_dir.name
outdir.mkdir(exist_ok=True)
cmaps = [
    'seismic',
    'turbo',
    # 'rainbow',
    # 'terrain',
    # 'BrBG',
    # 'cool',
    # 'spring',
    # 'plasma'
]

for mol_name in mgr.all_completed_sims:
    polymer = mgr.polymers[mol_name]
    charged_mols = {
        chg_method : polymer.charged_offmol_from_sdf(chg_method).to_rdkit()
            for chg_method in charge_methods
    }
    charged_mols.values()

    for cmap_name in cmaps:
        cmap_dir = outdir/cmap_name
        cmap_dir.mkdir(exist_ok=True)
        cmap = plt.get_cmap(cmap_name)

        fig, ax = rdkdraw.compare_chgd_rdmols(*charged_mols.values(), *charged_mols.keys(), cmap=cmap, flatten=True)
        fig.savefig(cmap_dir/f'{mol_name}.png', bbox_inches='tight')
        plt.close()

## Probing updated monomers (from future - difference was numbered ports)

In [None]:
from collections import defaultdict

mp_prior = SIMPLE_POLY_PATH / 'simple_polymers_monomers_prior'
mp_updated = SIMPLE_POLY_PATH / 'simple_polymers_monomers_updated'

monomers = defaultdict(dict)
pairs = (
    ('prior', mp_prior), 
    ('updated', mp_updated)
)

for (label, src_dir) in pairs:
    for path in src_dir.glob('**/*.json'):
        with path.open('r') as mp_file:
            monomers[label][path.stem] = json.load(mp_file)

all_species = set(monomers['prior'].keys()) | set(monomers['updated'].keys())
for species in all_species:
    if (spec_prior := monomers['prior'].get(species)) and (spec_updated := monomers['updated'].get(species)):
        if spec_prior['monomers'] != spec_updated['monomers']:
            print(species, '\n\t', spec_prior['monomers'], '\n\t', spec_updated['monomers'])

In [None]:
targ_mol = 'naturalrubber'

priors = [Chem.MolFromSmarts(SMARTS) for SMARTS in monomers['prior'][targ_mol]['monomers'].values()]
currs  = [Chem.MolFromSmarts(SMARTS) for SMARTS in monomers['updated'][targ_mol]['monomers'].values()]

for prior_rdmol, new_rdmol in zip(priors, currs):
    display(prior_rdmol)
    display(new_rdmol)
    print('\n')


## Charge/Sim loop V1 proper

In [None]:
# DEFINE TARGET MOLECULES AND FORCEFIELD
# sample_mols = mols_to_use[:3]
sample_mols = ['polyvinylchloride']
main_ff_xml = CORE_PATH/'force_fields'/'openff_unconstrained-2.0.0.offxml'

# CHARGING PARAMETERS
toolkit = 'OpenEye Toolkit'
partial_charge_method = 'am1bccelf10'

# CHARGING / SIM LOOP BEHAVIOR
overwrite_ff_xml   = True
overwrite_chg_json = True
distrib_mono_charges = True
run_sims = True
strict = True
verbose = False

# SIMULATION PARAMETERS 
temperature = 300 * kelvin
friction_coeff = 1/picosecond

sim_time = 0.001 * nanosecond #5 * nanosecond 
timestep = 1 * femtosecond
num_samples = 100 #2_000

# AUXILIARY PRE-FLIGHT CALCULATIONS
sample_dirs = {
    mol_name : mgr.polymers.get(mol_name)
        for mol_name in sample_mols
}
action_str = f'Charging{" & simulation" if run_sims else ""}'

num_steps   = round(sim_time / timestep)
record_freq = round(num_steps / num_samples)
num_mols = len(sample_dirs)
print(num_steps, record_freq)

In [None]:
chg_logger = logging.getLogger(charging.__name__)

def create_pickled_cmol(polymer : Polymer, toolkit : str, partial_charge_method : str, strict : bool=True, verbose : bool=False) -> None:
    '''Ensure that a pickled, charged molecule exists for the mol directory - perform charging with method of choice if none exists'''
    pickle_path = polymer.pkl/f'{polymer.mol_name}.pkl'

    chg_logger.info(f'Loading topology and molecule via graph match...')
    mol = polymer.largest_offmol_matched(strict=strict, verbose=verbose, topo_only=True)
    chg_logger.info(f'Charging {polymer.mol_name} via {toolkit}-{partial_charge_method}...')
    cmol = charging.charging.generate_molecule_charges(mol, toolkit=toolkit, partial_charge_method=partial_charge_method) 

    with pickle_path.open('wb') as pickle_file: # write charged molecule to pickle to avoid constantly redoing AM1
        pickle.dump(cmol, pickle_file)

    polymer.info.pickle_file = pickle_path # ensure change is reflected in directory info
    polymer.to_file() # record all changes to disc

def create_chg_avg_mono(polymer : Polymer, distrib_mono_charges : bool=True) -> tuple[list[ChargedResidue], AtomIDMap]:
    '''Create a charge-averaged monomer file from an existing monomer spec file and a charged OFF Molecule'''
    chg_logger.info('Unpickling charged Molecule for charge averaging...')
    with polymer.info.pickle_file.open('rb') as pickle_file: 
        cmol = pickle.load(pickle_file) # load AM1-charged molecule from file (must exist by this point in loop)

    chg_logger.info(f'Averaging charges over {polymer.mol_name} residues...')
    avgs, atom_id_mapping = charging.averaging.get_averaged_charges(cmol, monomer_data=polymer.monomer_data, distrib_mono_charges=distrib_mono_charges) # average charges over unique residues
    mono_chgs = {avgd_res.residue_name : avgd_res.charges for avgd_res in avgs}
    
    chg_logger.info(f'Writing new charged JSON monomer file...')
    polymer.create_charged_monomer_file(mono_chgs)

    return avgs, atom_id_mapping

def create_off_xml(polymer : Polymer, xml_src : Path) -> tuple[ForceField, list[LibraryChargeHandler]]:
    '''Generate an OFF force field with molecule-specific (and solvent specific, if applicable) Library Charges appended'''
    ff_path = polymer.FF/f'{polymer.mol_name}.offxml' # path to output library charges to
    chg_logger.info('Writing new force field OFFXML file')
    forcefield, lib_chgs = charging.averaging.write_lib_chgs_from_mono_data(polymer.monomer_data_charged, xml_src, output_path=ff_path)

    if polymer.info.solvent is not None:
        chg_logger.info('Associated solvent found, merging Library-Charged force field with solvent force field...')
        forcefield = ForceField(ff_path, polymer.info.solvent.forcefield_file, allow_cosmetic_attributes=True) # use both the polymer-specific xml and the solvent FF xml to make hybrid forcefield
        forcefield.to_file(ff_path)

    polymer.info.ff_file = ff_path # ensure change is reflected in directory info
    polymer.to_file() # record all changes to disc

    return forcefield, lib_chgs

In [None]:
# BEGIN CHARGING / SIM LOOP - Perform charge averaging on all target molecules which don't already have averaged LCs; Load forcefield for those which already do 
main_logger = logging.getLogger(__name__)
loggers = [main_logger, chg_logger]
main_log_handler = config_mlf_handler(mgr.log_dir/f'Polymer_battery_{general.timestamp_now()}.log', loggers, writemode='a')

main_logger.info(f'Beginning {action_str} loop...\n')
for i, (mol_name, polymer) in enumerate(sample_dirs.items()):
    # 0) LOAD MOLECULE AND TOPOLOGY, ATTEMPT TO APPLY LIBRARY CHARGES
    start_time = datetime.now()
    main_logger.info(f'Current molecule: "{mol_name}" ({i + 1}/{num_mols})') # +1 converts to more human-readable 1-index for step count
    polymer_log_handler = config_mlf_handler(polymer.logs/f'{general.timestamp_now()}.log', loggers, writemode='w') # NOTE : order matters, initial main logger call above should not record to local polymer log
    if not polymer.has_monomer_data:
        raise FileExistsError(f'No monomer JSONs found for {mol_name}')
    
    # 1) ENSURING AN AM1-BCC-ELF10-CHARGED MOLECULE EXISTS (IN PICKLE FORM). WILL RECHARGE IF NONE EXISTS
    if (polymer.info.pickle_file is None):
        main_logger.warning('(1-precheck) Generating new pickled charged OpenFF Molecule...')
        try:
            create_pickled_cmol(polymer, toolkit, partial_charge_method, strict, verbose)
        except ConformerGenerationError:
            main_logger.error('Could not successfully generate conformers\n')
            continue 
    main_logger.info('(1) Found pickled charged molecule...')
    
    # 2) CREATE JSON WITH AVERAGED CHARGES IF ONE DOES NOT ALREADY EXIST
    if (polymer.info.monomer_file_chgd is None) or overwrite_chg_json: # can only reach this branch if a json is present but isn't identified as charged within the Polymer
        main_logger.warning('(2-precheck) Generating new charged monomer JSON...')
        create_chg_avg_mono(polymer, distrib_mono_charges=distrib_mono_charges)
    main_logger.info('(2) Found charged monomer JSON...')

    # 3) CREATE FORCE FIELD XML WITH MONOMER-BASED LIBRARY CHARGE ENTRIES
    if (polymer.info.ff_file is None) or overwrite_ff_xml: # can only reach if a charged monomer json already exists
        main_logger.warning('(3-precheck) Generating new Force Field XML with Library Charges...')
        create_off_xml(polymer, xml_src=main_ff_xml)
    main_logger.info('(3) Found Force Field file with Library Charges...')

    # 4) RUN OpenMM SIMULATION FOR TARGET MOLECULE
    if run_sims:
        main_logger.info('(4) Preparing simulation...')
        output_folder = polymer.make_res_dir()
        sim_log_handler = config_mlf_handler(output_folder/f'{polymer.mol_name} simulation.log', loggers)

        main_logger.info('Loading Topology...')
        openff_topology = polymer.openff_topology_matched(strict=strict, verbose=verbose, topo_only=True)
        openff_topology.box_vectors = polymer.box_vectors.in_units_of(nanometer) # set box vector to allow for periodic simulation (will be non-periodic if polymer box vectors are unset i.e. NoneType)

        main_logger.info('Loading charged Molecule...')
        with polymer.info.pickle_file.open('rb') as pickle_file: 
            cmol = pickle.load(pickle_file) # load AM1-charged molecule from file (must exist by this point in loop)

        main_logger.info('Loading Force Field...')
        forcefield = ForceField(polymer.info.ff_file, allow_cosmetic_attributes=True)

        main_logger.info('Creating Simulation from Interchange...')
        interchange = Interchange.from_smirnoff(force_field=forcefield, topology=openff_topology, charge_from_molecules=[cmol]) # generate Interchange with new library charges prior to writing to file
        integrator  = LangevinMiddleIntegrator(temperature, friction_coeff, timestep)
        sim = polysim.create_simulation(interchange, integrator)
        
        main_logger.info(f'Running {sim_time} OpenMM sim at {temperature} for {num_steps} steps...')
        polysim.run_simulation(sim, output_folder=output_folder, output_name=mol_name, num_steps=num_steps, record_freq=record_freq)

        polymer.to_file() # ensure directory data reflects changes to files
        # filetree.startfile(output_folder)
        sim_log_handler.remove_from_loggers(*loggers)  
    
    proc_time = str(datetime.now() - start_time)
    main_logger.info(f'Successfully completed actions on {mol_name} in {proc_time}\n')
    clear_output() # for Jupyter notebooks only, can freely comment this out
    polymer_log_handler.remove_from_loggers(*loggers)  

main_logger.info(f'{action_str} loop completed')
main_log_handler.remove_from_loggers(*loggers)

## Running Sims v0

In [None]:
polymers = mgr.polymers

desired_solvents = (WATER_TIP3P,) #,None)
hard_polymers = ['vulcanizedrubber', 'polyphenylenesulfone', 'polyethylene'] # pathological or otherwise difficult-to-run polymers that I've encountered
hard_polymers_solv = [
    f'{unsolv_mol}_solv_{solvent.name}'
        for solvent in desired_solvents
            for unsolv_mol in hard_polymers
]
hard_polymers.extend(hard_polymers_solv) # ensure solvated names are also included

mols_to_use = [polymer.mol_name
    for polymer in polymers.values()
        if (polymer.mol_name not in hard_polymers)         # 1) are not manually excluded
            and (0 < polymer.n_atoms <= 300)               # 2) are loadable (i.e. non-zero size) but are small enough for AM1BCC (150 is speed limit, 300 is error limit)
            and (polymer.has_monomer_data)                 # 3) have monomer information files
            and (polymer.info.solvent in desired_solvents) # 4) is solvated in the specified solvents (could be None)
]

print(mols_to_use)

In [None]:
# DEFINE TARGET MOLECULES AND FORCEFIELD
sample_mols = mols_to_use
# sample_mols = ['polyvinylchloride_solv_water']
main_ff_xml = CORE_PATH/'force_fields'/'openff_unconstrained-2.0.0.offxml'
solv_ff_xml = CORE_PATH/'force_fields'/'tip3p.offxml'

# SET CHARGING LOOP BEHAVIOR
prevent_overwrites = False # to deprecate
distrib_mono_charges = True
run_sims = True
verbose = False

# SIMULATION PARAMETERS 
temperature = 300 * kelvin
friction_coeff = 1/picosecond

sim_time = 5 * nanosecond 
timestep = 1 * femtosecond
num_samples = 2_000

In [None]:
# PRE-FLIGHT CHECKS
sample_dirs = {
    mol_name : polymers.get(mol_name)
        for mol_name in sample_mols
}

num_steps   = round(sim_time / timestep)
record_freq = round(num_steps / num_samples)
num_mols = len(sample_dirs)
print(num_steps, record_freq)

main_log_dir = POLY_PATH/'Logs'
main_log_dir.mkdir(exist_ok=True)

master_logger = setup_logger(f'Polymer_battery_{general.timestamp_now()}', outpath=main_log_dir, formatter=LOG_FORMATTER, writemode='w')
master_handler = master_logger.handlers[0]

In [None]:
# BEGIN CHARGING / SIM LOOP - Perform charge averaging on all target molecules which don't already have averaged LCs; Load forcefield for those which already do 
for i, (mol_name, polymer) in enumerate(sample_dirs.items()):
    log_name = mol_name #f'{mol_name}_chg_sim_log' #{general.timestamp_now()}'
    logger = setup_logger(log_name, outpath=polymer.logs, writemode='a', formatter=LOG_FORMATTER)
    logger.addHandler(master_handler) # ensure output is also logged to the master

    # DEFINING PATHS, CREATING FOLDERS, AND FETCHING FILES
    pdb_path      = polymer.info.structure_file
    lc_path       = polymer.FF/f'new {mol_name} charges.offxml' # path to output library charges to
    pickle_path   = polymer.pkl/f'{mol_name}.pkl'
    output_folder = polymer.make_res_dir()

    # LOAD MOLECULE AND TOPOLOGY, ATTEMPT TO APPLY LIBRARY CHARGES
    logger.info(f'Current molecule: {mol_name} ({i + 1}/{num_mols})') # +1 converts to more human-readable 1-index for step count
    json_path = polymer.monomer_file_ranked
    if json_path is None:
        raise FileExistsError(f'No monomer JSONs found for {mol_name}')
    
    logger.info(f'Using monomer file "{json_path}"...')
    with json_path.open('r') as json_file:
        mono_data = json.load(json_file)

    logger.info(f'Loading and matching molecule "{mol_name}"...')
    openff_topology, _, _error = Topology.from_pdb_and_monomer_info(str(pdb_path), json_path, strict=True, verbose=verbose)
    openff_topology.box_vectors = polymer.box_vectors.in_units_of(nanometer) # set box vector to allow for periodic simulation (will be non-periodic if polymer box vectors are unset i.e. NoneType)
    mol = next(openff_topology.molecules) # get the first molecule (assumed to be the polymer of interest)

    if prevent_overwrites and lc_path.exists(): # check if library charges have already been generated for this molecule
        logger.info('Obtaining partial charges from Library Charge xml...')
        forcefield = ForceField(lc_path, solv_ff_xml, allow_cosmetic_attributes=True) # use both the polymer-specific xml and the solvent FF xml when creating the Forcefield
        
        logger.info('Unpickling charged Molecule...')
        with pickle_path.open('rb') as pickle_file: # read cmol from file if already extant
            cmol = pickle.load(pickle_file)
    else:
        # PERFORMING INITIAL AM1-BCC CHARGING, OR UNPICKLING MOLECULE IF THIS HAS ALREADY BEEN DONE
        if not pickle_path.exists():
            logger.warning('No extant pickled charged Molecule found, performing charging...')
            try:
                cmol = polychg.generate_molecule_charges(mol, partial_charge_method='am1bccelf10') # perform AM1BCC
            except ConformerGenerationError:
                logger.warning('Could not successfully generate conformers')
                continue 

            with pickle_path.open('wb') as pickle_file: # write charged molecule to pickle to avoid constantly redoing AM1
                pickle.dump(cmol, pickle_file)
            polymer.info.pickle_file = pickle_path
        
        logger.info('Unpickling charged Molecule...')
        with pickle_path.open('rb') as pickle_file: # read cmol from file if already extant
            cmol = pickle.load(pickle_file)

        # CHARGE AVERAGING
        logger.info(f'Averaging charges over {mol_name} residues...')
        avgs, atom_id_mapping = polychg.get_averaged_charges(cmol, monomer_data=mono_data, distrib_mono_charges=distrib_mono_charges) # average charges over unique residues

        logger.warning('Library Charge file not found OR overwrite allowed, writing new Library Charge xml...')
        forcefield, lib_chgs = polychg.write_new_library_charges(avgs, main_ff_xml, output_path=lc_path)
        polymer.info.ff_file = lc_path
        
        # CREATE JSON WITH AVERAGED CHARGES IF ONE DOES NOT ALREADY EXIST
        if polymer.info.monomer_file_chgd is None:
            logger.info('Writing new monomer JSON with charge data...')

            mono_chgs = {avgd_res.residue_name : avgd_res.charges for avgd_res in avgs}
            if polymer.info.solvent is not None:
                mono_data['charges'] = {**mono_chgs, **polymer.info.solvent.monomer_json_data['charges']} # ensure solvent "monomer" charges are also recorded

            chgd_json_path = json_path.with_name(f'{json_path.stem}_charged.json')
            chgd_json_path.touch()
            with chgd_json_path.open('w') as new_json:
                json.dump(mono_data, new_json, indent=4)
            polymer.info.monomer_file_chgd = chgd_json_path

    # RUN OpenMM SIMULATION FOR TARGET MOLECULE
    if run_sims:
        logger.info(f'Running {sim_time} OpenMM sim at {temperature} for {num_steps} steps...')

        forcefield = ForceField(lc_path, solv_ff_xml, allow_cosmetic_attributes=True)
        interchange = Interchange.from_smirnoff(force_field=forcefield, topology=openff_topology, charge_from_molecules=[cmol]) # generate Interchange with new library charges prior to writing to file
        integrator  = LangevinMiddleIntegrator(temperature, friction_coeff, timestep)
        
        sim = polysim.create_simulation(interchange, integrator)
        polysim.run_simulation(sim, output_folder=output_folder, output_name=mol_name, num_steps=num_steps, record_freq=record_freq)
    
    polymer.to_file() # ensure directory data reflects changes to files
    # filetree.startfile(output_folder)
    clear_output() # for Jupyter notebooks only, can freely comment this out
    logger.info(f'Successfully completed actions on {mol_name}\n')
    logger.removeHandler(master_handler) # free up master log handler - prevents bleed-over between multiple sim sessions

master_logger.info(f'Charging{" & simulation" if run_sims else ""} loop completed')

In [None]:
failed_sims = lambda : set(sample_mols) - set(mgr.all_completed_sims(polymers).keys())
failed_sims()

## Generating monomer files for polyamides

In [None]:
POLYAM_PATH   = Path('compatible_pdbs/polyamides')
p = POLYAM_PATH/'smiles.json'

with p.open('r') as smiles_file:
    mono_smiles = json.load(smiles_file)
mono_smiles['TMC'] = mono_smiles['TMC'].replace('Cl', 'O[H]') # replace chlorides with oxygens present in full polymer

rdkdraw.set_rddraw_size(400, 3/2)

mono_mols, mono_smarts = {}, {}
for name, SMILES in mono_smiles.items():
    rdmol = Chem.MolFromSmiles(SMILES, sanitize=False)
    for atom in rdmol.GetAtoms():
        atom.SetAtomMapNum(atom.GetIdx() + 1)
    
    mono_mols[name] = rdmol
    mono_smarts[name] = Chem.MolToSmarts(rdmol)

    display(rdmol)


In [None]:
monomers = {
    'MPD_monovalent' : '[H:1]-[#6:2]1:[#6:3](-[H:4]):[#6:5](-[#7:6](-*)-[H:8]):[#6:9](-[H:10]):[#6:11](-[#7:12](-[H:13])-[H:14]):[#6:15]:1-[H:16]',
    'MPD_bivalent'   : '[H:1]-[#6:2]1:[#6:3](-[H:4]):[#6:5](-[#7:6](-*)-[H:8]):[#6:9](-[H:10]):[#6:11](-[#7:12](-[H:13])-*):[#6:15]:1-[H:16]',
    'TMC_monovalent' : '[H:1]-[#6:2]1:[#6:3](-[#6:4](=[#8:5])-*):[#6:8](-[H:9]):[#6:10](-[#6:11](=[#8:12])-[#8:13]-[H:14]):[#6:15](-[H:16]):[#6:17]:1-[#6:18](=[#8:19])-[#8:20]-[H:21]', 
    'TMC_bivalent'   : '[H:1]-[#6:2]1:[#6:3](-[#6:4](=[#8:5])-*):[#6:8](-[H:9]):[#6:10](-[#6:11](=[#8:12])-*):[#6:15](-[H:16]):[#6:17]:1-[#6:18](=[#8:19])-[#8:20]-[H:21]', 
    'TMC_trivalent'  : '[H:1]-[#6:2]1:[#6:3](-[#6:4](=[#8:5])-*):[#6:8](-[H:9]):[#6:10](-[#6:11](=[#8:12])-*):[#6:15](-[H:16]):[#6:17]:1-[#6:18](=[#8:19])-*', 
}

json_spec = {
    'monomers' : monomers,
    'caps' : {name : [] for name in monomers}
}

pam_mono_path = POLYAM_PATH/'polyamides.json'
with pam_mono_path.open('w') as mono_out:
    json.dump(json_spec, mono_out, indent=4)

for pam_path in POLYAM_PATH.glob('*.pdb'):
    ind_mono_path = POLYAM_PATH/f'{pam_path.stem}.json'
    with ind_mono_path.open('w') as mono_out:
        json.dump(json_spec, mono_out, indent=4)

## Testing loading of polyamides using monomer spec

In [None]:
pam_pdbs = [path for path in POLYAM_PATH.glob('*.pdb')]

In [None]:
pdb_file = pam_pdbs[1]

openff_topology, _, _error = Topology.from_pdb_and_monomer_info(str(pdb_file), pam_mono_path, strict=False, verbose=False)
mol = next(openff_topology.molecules)

for atom in mol.atoms:
    if not atom.metadata['already_matched']:
        print(atom.metadata)

rdkdraw.set_rddraw_size(500, 3/2)
display(mono_mols[pdb_file.stem])

## Some other section