In [2]:
# Custom Imports
from polymer_utils import general, filetree, extratypes
from polymer_utils import charging as polychg
from polymer_utils import simulation as polysim
from polymer_utils.representation import PolymerDir, PolymerDirManager
from polymer_utils.solvents import WATER_TIP3P

# General Imports
import numpy as np
from datetime import datetime

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod

# File I/O
from pathlib import Path
import csv, json, pickle
from shutil import copyfile, rmtree

# Logging and Shell
from IPython.display import clear_output
import subprocess
import logging
logging.basicConfig(level=logging.INFO)
                            
# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdmolfiles

# Molecular Dynamics
from openff.units import unit
from openff.interchange import Interchange

from openff.toolkit.topology import Topology
from openff.toolkit.topology.molecule import Molecule, Atom
from openff.toolkit.typing.engines.smirnoff import ForceField

from openff.toolkit.utils.exceptions import ConformerGenerationError
from openff.toolkit.utils.toolkits import RDKitToolkitWrapper, OpenEyeToolkitWrapper, AmberToolsToolkitWrapper

from openmm import LangevinMiddleIntegrator, Context
from openmm.vec3 import Vec3
from openmm.app import Simulation, PDBReporter, StateDataReporter

from openmm.unit import picosecond, femtosecond, nanosecond # time
from openmm.unit import nanometer, angstrom # length
from openmm.unit import Unit, kelvin # misc

# Static Paths
CORE_PATH = Path('Core')
POLY_PATH = Path('Polymers')
TEST_PATH = Path('Polymers_test')
COMPAT_PDB_PATH = Path('compatible_pdbs')

POLY_PDB_PATH = COMPAT_PDB_PATH/'simple_polymers'
SOLVENTS_PATH = CORE_PATH/'solvents'
POLYMER_SOLV_TEMPLATE = CORE_PATH/'inp_templates'/'solv_polymer_template_box.inp'

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


## Loading and configuring available polymers

In [2]:
# Logging config
LOG_FORMATTER = logging.Formatter('%(asctime)s.%(msecs)03d [%(levelname)-7s:%(processName)s:line %(lineno)-3d] - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
      
def setup_logger(log_name : str, outpath : Path, writemode='w', formatter : logging.Formatter=None):
    '''Boilerplate for creating a new Logger for process output'''
    log_path = outpath/f'{log_name}.log'
    log_path.touch()

    logger = logging.getLogger(log_name) # call is idempotent with same logger namename
    if len(logger.handlers) < 1: # prevent duplicate logging output when recreating logger
    # if not logger.hasHandlers(): # prevent duplicate logging output when recreating logger
        file_handler = logging.FileHandler(log_path, mode=writemode)
        if formatter is not None:
            file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)

    return logger

In [3]:
reset     = False #True
resolvate = False #True
recover   = True
clear_sims = False

solvent = WATER_TIP3P
solvent.structure_file  = CORE_PATH/'solvents'/solvent.name/f'{solvent.name}.pdb'
solvent.forcefield_file = CORE_PATH/'force_fields'/'tip3p.offxml'
mgr = PolymerDirManager(collection_dir=POLY_PATH)

if reset:
    mgr.purge_dirs(really=True) 
    mgr.populate_mol_dirs(source_dir=POLY_PDB_PATH)

if resolvate:        
    for mol_name, mol_dir in mgr.mol_dirs.items():
        if mol_dir.info.solvent is None: # only try to solvate systems which don't already have a solvent
            print(mol_name)
            solv_dir = mol_dir.solvate(template_path=POLYMER_SOLV_TEMPLATE, solvent=solvent, exclusion=1*nanometer)

    mgr.update_mol_dirs() # ensure solvated dirs are added to collection

if recover:
    # When resolvation, ensure leftover charge files from previous solvation sims are reassigned
    recovery_attrs = {
        'pkl' : 'pickle_file',
        'FF'  : 'ff_file'
    }

    for mol_dir in mgr.mol_dirs_list:
        for subdir_name, attr_name in recovery_attrs.items():
            try:
                subdir = getattr(mol_dir, subdir_name)
                existing_file = next(subdir.iterdir()) # raises StopIteration if folder is empty
                setattr(mol_dir.info, attr_name, existing_file)
                mol_dir.to_file() # ensure info is updated on disc copy

                print(mol_dir.info)
            except StopIteration:
                pass

if clear_sims:
    mgr.purge_sims(really=True)

print([i for i in mgr.mol_dirs.keys()], '\n', mgr.all_completed_sims)

PolymerInfo(mol_name='polymethylketone_solv_water', exclusion=Quantity(value=1, unit=nanometer), solvent=Solvent(name='water', formula='H2O', smarts='[#1:1]-[#8:3]-[#1:2]', density=Quantity(value=0.997, unit=gram/(centimeter**3)), MW=Quantity(value=18.015, unit=gram/mole), charges={'1': 0.417, '2': 0.417, '3': -0.834}, structure_file=PosixPath('Core/solvents/water/water.pdb'), forcefield_file=PosixPath('Core/force_fields/tip3p.offxml')), structure_file=PosixPath('Polymers/polymethylketone/polymethylketone_solv_water/structures/polymethylketone_solv_water.pdb'), monomer_file=PosixPath('Polymers/polymethylketone/polymethylketone_solv_water/monomers/polymethylketone_solv_water.json'), monomer_file_chgd=PosixPath('Polymers/polymethylketone/polymethylketone_solv_water/monomers/polymethylketone_solv_water_charged.json'), pickle_file=PosixPath('Polymers/polymethylketone/polymethylketone_solv_water/pkl/polymethylketone_solv_water.pkl'), ff_file=PosixPath('Polymers/polymethylketone/polymethylke

## Running Sims

In [4]:
mol_dirs = mgr.mol_dirs

desired_solvents = (WATER_TIP3P,) # (None,)
hard_polymers = ['vulcanizedrubber', 'polyphenylenesulfone', 'polyethylene'] # pathological or otherwise difficult-to-run polymers that I've encountered
hard_polymers_solv = [
    f'{unsolv_mol}_solv_{solvent.name}'
        for solvent in desired_solvents
            if solvent is not None 
                for unsolv_mol in hard_polymers
]
hard_polymers.extend(hard_polymers_solv) # ensure solvated names are also included

mols_to_use = [mol_dir.mol_name
    for mol_dir in mol_dirs.values()
        if (mol_dir.mol_name not in hard_polymers)         # 1) are not manually excluded
            and (0 < mol_dir.n_atoms <= 300)               # 2) are loadable (i.e. non-zero size) but are small enough for AM1BCC (150 is speed limit, 300 is error limit)
            and (mol_dir.has_monomer_data)                 # 3) have monomer information files
            and (mol_dir.info.solvent in desired_solvents) # 4) is solvated in the specified solvents (could be None)
]
print(mols_to_use)

['polymethylketone_solv_water', 'polyvinylchloride_solv_water', 'atactic_styrene_solv_water', 'syntactic_styrene_solv_water', 'naturalrubber_solv_water', 'polyphenyleneI_solv_water', 'polythiophene_solv_water', 'polyethylmethacrylate_solv_water', 'peg_modified_solv_water', 'PEO_PLGA_solv_water']


In [8]:
# DEFINE TARGET MOLECULES AND FORCEFIELD
sample_mols = mols_to_use
# sample_mols = ['polyvinylchloride_solv_water']
main_ff_xml = CORE_PATH/'force_fields'/'openff_unconstrained-2.0.0.offxml'

# CHARGING PARAMETERS
toolkit_method = 'openeye'
partial_charge_method = 'am1bccelf10'

# SET CHARGING LOOP BEHAVIOR
overwrite_ff_xml   = True
overwrite_chg_json = True
distrib_mono_charges = True
run_sims = True
verbose = False

# SIMULATION PARAMETERS 
temperature = 300 * kelvin
friction_coeff = 1/picosecond

sim_time = 5 * nanosecond 
timestep = 1 * femtosecond
num_samples = 2_000

In [9]:
# PRE-FLIGHT CHECKS
sample_dirs = {
    mol_name : mol_dirs.get(mol_name)
        for mol_name in sample_mols
}

num_steps   = round(sim_time / timestep)
record_freq = round(num_steps / num_samples)
num_mols = len(sample_dirs)
print(num_steps, record_freq)

master_logger = setup_logger(f'Polymer_battery_{general.timestamp_now()}', outpath=mgr.log_dir, formatter=LOG_FORMATTER, writemode='w')
master_handler = master_logger.handlers[0]

# BEGIN CHARGING / SIM LOOP - Perform charge averaging on all target molecules which don't already have averaged LCs; Load forcefield for those which already do 
for i, (mol_name, mol_dir) in enumerate(sample_dirs.items()):
    log_name = mol_name 
    polymer_logger = setup_logger(log_name, outpath=mol_dir.logs, writemode='a', formatter=LOG_FORMATTER)
    polymer_logger.addHandler(master_handler) # ensure output is also logged to the master

    start_time = datetime.now()
    # 0) LOAD MOLECULE AND TOPOLOGY, ATTEMPT TO APPLY LIBRARY CHARGES
    master_logger.info(f'Current molecule: "{mol_name}" ({i + 1}/{num_mols})') # +1 converts to more human-readable 1-index for step count
    if not mol_dir.has_monomer_data:
        raise FileExistsError(f'No monomer JSONs found for {mol_name}')
    
    # 1) ENSURING AN AM1-BCC-ELF10-CHARGED MOLECULE EXISTS (IN PICKLE FORM). WILL RECHARGE IF NONE EXISTS
    if (mol_dir.info.pickle_file is None):
        pickle_path = mol_dir.pkl/f'{mol_name}.pkl'
        polymer_logger.warning('(1-precheck) Generating new pickled charged OpenFF Molecule...')

        polymer_logger.info(f'Loading topology and molecule via graph match...')
        openff_topology, _, _error = Topology.from_pdb_and_monomer_info(str(mol_dir.info.structure_file), mol_dir.monomer_file_ranked, strict=True, verbose=verbose)
        mol = next(openff_topology.molecules) # get the first molecule (assumed to be the polymer of interest)
    
        try:
            logging.info(f'Charging {mol_name} via {toolkit_method}-{partial_charge_method}...')
            cmol = polychg.generate_molecule_charges(mol, toolkit_method=toolkit_method, partial_charge_method=partial_charge_method) 
        except ConformerGenerationError:
            polymer_logger.error('Could not successfully generate conformers\n')
            continue 

        with pickle_path.open('wb') as pickle_file: # write charged molecule to pickle to avoid constantly redoing AM1
            pickle.dump(cmol, pickle_file)

        mol_dir.info.pickle_file = pickle_path # ensure change is reflected in directory info
    polymer_logger.info('(1) Found pickled charged molecule...')
    
    # 2) CREATE JSON WITH AVERAGED CHARGES IF ONE DOES NOT ALREADY EXIST
    if (mol_dir.info.monomer_file_chgd is None) or overwrite_chg_json: # can only reach this branch if a json is present but isn't labelled as charged
        polymer_logger.warning('(2-precheck) Generating new charged monomer JSON...')
        
        polymer_logger.info('Unpickling charged Molecule for charge averaging...')
        with mol_dir.info.pickle_file.open('rb') as pickle_file: 
            cmol = pickle.load(pickle_file) # load AM1-charged molecule from file (must exist by this point in loop)

        json_path = mol_dir.info.monomer_file # NOTE : important that this file NOT be charged
        with json_path.open('r') as json_file:
            mono_data = json.load(json_file)

        polymer_logger.info(f'Averaging charges over {mol_name} residues...')
        avgs, atom_id_mapping = polychg.get_averaged_charges(cmol, monomer_data=mono_data, distrib_mono_charges=distrib_mono_charges) # average charges over unique residues
        mono_chgs = {avgd_res.residue_name : avgd_res.charges for avgd_res in avgs}
        if mol_dir.info.solvent is not None:
            mono_chgs = {**mono_chgs, **mol_dir.info.solvent.monomer_json_data['charges']} # ensure solvent "monomer" charge entries are also recorded
        mono_data['charges'] = mono_chgs

        polymer_logger.info('Writing new charged JSON monomer file...')
        chgd_json_path = json_path.with_name(f'{json_path.stem}_charged.json')
        with chgd_json_path.open('w') as new_json:
            json.dump(mono_data, new_json, indent=4)

        mol_dir.info.monomer_file_chgd = chgd_json_path # ensure change is reflected in directory info
    polymer_logger.info('(2) Found charged monomer JSON...')

    # 3) CREATE FORCE FIELD XML WITH MONOMER-BASED LIBRARY CHARGE ENTRIES
    if (mol_dir.info.ff_file is None) or overwrite_ff_xml: # can only reach if a charged monomer json already exists
        lc_path = mol_dir.FF/f'new {mol_name} charges.offxml' # path to output library charges to
        polymer_logger.warning('(3-precheck) Generating new Force Field XML with Library Charges...')
        with mol_dir.info.monomer_file_chgd.open('r') as json_file:
            mono_data_chgd = json.load(json_file)
        polymer_logger.info('Writing new force field OFFXML file')
        forcefield, lib_chgs = polychg.write_new_lib_chgs_from_json(mono_data_chgd, main_ff_xml, output_path=lc_path)

        if mol_dir.info.solvent is not None:
            polymer_logger.info('Associated solvent found, merging Library-Charged force field with solvent force field...')
            forcefield = ForceField(lc_path, mol_dir.info.solvent.forcefield_file, allow_cosmetic_attributes=True) # use both the polymer-specific xml and the solvent FF xml to make hybrid forcefield
            forcefield.to_file(lc_path)

        mol_dir.info.ff_file = lc_path # ensure change is reflected in directory info
    polymer_logger.info('(3) Found Force Field file with Library Charges...')

    mol_dir.to_file() # record all changes to disc

    # 4) RUN OpenMM SIMULATION FOR TARGET MOLECULE
    if run_sims:
        polymer_logger.info('(4) Preparing simulation...')

        polymer_logger.info('Loading Topology...')
        openff_topology, _, _error = Topology.from_pdb_and_monomer_info(str(mol_dir.info.structure_file), mol_dir.monomer_file_ranked, strict=True, verbose=verbose)
        openff_topology.box_vectors = mol_dir.box_vectors.in_units_of(nanometer) # set box vector to allow for periodic simulation (will be non-periodic if mol_dir box vectors are unset i.e. NoneType)

        polymer_logger.info('Loading charged Molecule...')
        with mol_dir.info.pickle_file.open('rb') as pickle_file: 
            cmol = pickle.load(pickle_file) # load AM1-charged molecule from file (must exist by this point in loop)

        polymer_logger.info('Loading Force Field...')
        forcefield = ForceField(mol_dir.info.ff_file, allow_cosmetic_attributes=True)

        polymer_logger.info('Creating Simulation from Interchange...')
        interchange = Interchange.from_smirnoff(force_field=forcefield, topology=openff_topology, charge_from_molecules=[cmol]) # generate Interchange with new library charges prior to writing to file
        integrator  = LangevinMiddleIntegrator(temperature, friction_coeff, timestep)
        sim = polysim.create_simulation(interchange, integrator)
        
        polymer_logger.info(f'Running {sim_time} OpenMM sim at {temperature} for {num_steps} steps...')
        output_folder = mol_dir.make_res_dir()
        polysim.run_simulation(sim, output_folder=output_folder, output_name=mol_name, num_steps=num_steps, record_freq=record_freq)
        # filetree.startfile(output_folder)
    
    mol_dir.to_file() # ensure directory data reflects changes to files
    proc_time = str(datetime.now() - start_time)
    polymer_logger.info(f'Successfully completed actions on {mol_name} in {proc_time}\n')
    polymer_logger.removeHandler(master_handler) # free up master log handler - prevents bleed-over between multiple sim sessions
    clear_output() # for Jupyter notebooks only, can freely comment this out

master_logger.info(f'Charging{" & simulation" if run_sims else ""} loop completed')

INFO:Polymer_battery_03-17-2023_at_10-14-58_AM:Charging & simulation loop completed


In [1]:
polymer_logger.handlers, logging.root.manager.loggerDict # use this to link SMIRNOFF and others to logging

NameError: name 'logging' is not defined