# Demo 2: parameterizing and packing polymer systems
Here we show how to incorporate force field parameters, solvent admolecules, and other multiple polymers into Topology before exporting it to an MD engine 

In [1]:
import logging
logging.basicConfig(level=logging.INFO)

from pathlib import Path
from polymerist.genutils.fileutils.pathutils import is_empty, assemble_path


EXAMPLE_DIR = Path('polymer_loading_examples')
assert EXAMPLE_DIR.exists() and not is_empty(EXAMPLE_DIR)

OUTPUT_DIR = Path('scratch') # dummy directory for writing without tampering with example inputs
OUTPUT_DIR.mkdir(exist_ok=True)

In [2]:
from openff.toolkit import Molecule, Topology # this import might take a sec the first time you run this cell

INFO:rdkit:Enabling RDKit 2023.09.6 jupyter extensions


## Working on individual polymers
For convenience, we've provided a pre-made example SDF for a poly(N-isopropylacrylamide) (PNIPAAm) polymer  
NOTE: if you haven't already, please run through the [prior demo](2.1-topology_and_fragment.ipynb) to see what this file means and how to create your own

In [3]:
from polymerist.mdtools.openfftools.topology import topology_from_sdf, get_largest_offmol

pnipaam_sdf = assemble_path(EXAMPLE_DIR, 'PNIPAAm', 'sdf')
assert pnipaam_sdf.exists()

pnipaam_top = topology_from_sdf(pnipaam_sdf)
# pnipaam_top.visualize()
pnipaam = get_largest_offmol(pnipaam_top) # a useful trick for working with individual Molecules in a single-mol or solvated topology
pnipaam.visualize(backend='nglview')

INFO:numexpr.utils:Note: NumExpr detected 20 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
INFO:numexpr.utils:NumExpr defaulting to 16 threads.




NGLWidget()

In [4]:
# residue partition metadata is preserved by the SDF
for atom in pnipaam.atoms[:10]: 
    print(atom.metadata)

{'residue_name': 'pnipaam_TERM1', 'residue_number': 80, 'insertion_code': ' ', 'chain_id': ' ', 'match_info': '{"27": ["pnipaam_MIDDLE", 1], "28": ["pnipaam_MIDDLE", 1], "80": ["pnipaam_TERM1", 1]}', 'substructure_query_id': 1}
{'residue_name': 'pnipaam_TERM1', 'residue_number': 80, 'insertion_code': ' ', 'chain_id': ' ', 'match_info': '{"28": ["pnipaam_MIDDLE", 2], "80": ["pnipaam_TERM1", 2]}', 'substructure_query_id': 2}
{'residue_name': 'pnipaam_TERM1', 'residue_number': 80, 'insertion_code': ' ', 'chain_id': ' ', 'match_info': '{"27": ["pnipaam_MIDDLE", 19], "28": ["pnipaam_MIDDLE", 19], "80": ["pnipaam_TERM1", 19]}', 'substructure_query_id': 19}
{'residue_name': 'pnipaam_TERM1', 'residue_number': 80, 'insertion_code': ' ', 'chain_id': ' ', 'match_info': '{"27": ["pnipaam_MIDDLE", 20], "28": ["pnipaam_MIDDLE", 20], "80": ["pnipaam_TERM1", 20]}', 'substructure_query_id': 20}
{'residue_name': 'pnipaam_TERM1', 'residue_number': 80, 'insertion_code': ' ', 'chain_id': ' ', 'match_info':

### Custom metadata
In principle, there's no limit to the kinds of data you can associate with a polymer, via the Molecule.properties field.  
For example, here we insert patent info about PNIPAAm which you can verify is written after the chemical table in the resulting SDF

In [5]:
from polymerist.mdtools.openfftools.topology import topology_to_sdf


pnipaam.name = 'PNIPAAm' # OpenFF Molecules also have a dedicated "name" attribute to give you molecules convenience handles
pnipaam.properties['IUPAC name'] = 'poly(N-isopropylacrylamide)'
pnipaam.properties['Common name'] = 'PNIPAAm'
pnipaam.properties['First patented'] = '1956-12-04'
pnipaam.properties['Patent holder'] = 'Edward H. Sprecht'
pnipaam.properties['Patent No.'] = 'US-2773063-A'

sdf_path_annotated = assemble_path(OUTPUT_DIR, 'PNIPAAm_annotated', 'sdf')
topology_to_sdf(sdf_path_annotated, pnipaam.to_topology()) # individual Molecule objects must be bundled into a topology before exporting to SDF

### Assigning [atomic partial charges](https://en.wikipedia.org/wiki/Partial_charge) to polymers
This is a niche-but-vital step, as it governs the electrostatic interactions within and without the polymer chains which will eventually be realized in an molecular dynamics engine  
This is a particularly difficult task for macromolecules, though speedy methods such as pretrained graph neural networks (GNNs) or [custom library charges](2.4-RCT_demo.ipynb) are available 

`polymerist` provides user-friendly hooks for various rapid partial charge wrappers (assuming you've installed one of the [parameterization toolkits](https://github.com/timbernat/polymerist?tab=readme-ov-file#2-parameterization-toolkits)).  
These hook cache the resulting partial charge values and charging method directly to the Molecule's metadata, enabling tracking of provenance during simulation preparation


In [6]:
from polymerist.mdtools.openfftools.partialcharge.molchargers import MolCharger
from polymerist.mdtools.openfftools.partialcharge.rescharge import LibraryCharger

MolCharger.subclass_registry # show which partial charge wrappers are available

{'AM1-BCC-ELF10': polymerist.mdtools.openfftools.partialcharge.molchargers.ABE10Charger,
 'Espaloma-AM1-BCC': polymerist.mdtools.openfftools.partialcharge.molchargers.EspalomaCharger,
 'NAGL': polymerist.mdtools.openfftools.partialcharge.molchargers.NAGLCharger,
 'RCT': polymerist.mdtools.openfftools.partialcharge.rescharge.interface.LibraryCharger}

In [7]:
from polymerist.mdtools.openfftools.partialcharge.molchargers import (
    NAGLCharger,  # requires having OpenFF NAGL installed
    ABE10Charger, # requires having OpenEye toolkits installed and licensed
)

# choose your partial charge method here
# charger = ABE10Charger() # NOTE: this method is unworkably slow for anything bigger than ~150 atoms; it is just placed here to indicate you COULD use AM1-BCC if desired
charger = NAGLCharger()

# assign partial charges
logging.info(f'Partial charges initial: {pnipaam.partial_charges}') # to show you nothing is up my sleeve, we verify that the charges are unset prior to the invocation

pnipaam_charged = charger.charge_molecule(pnipaam)
logging.info(f'Recorded charge method: {pnipaam_charged.properties["charge_method"]}')      # charge method is recorded for provenance
logging.info(f'Net partial charge after charger: {pnipaam_charged.partial_charges.sum()}')  # logging.info just sum to aviod cluttering output with huge array

pnipaam_charged_sdf = assemble_path(OUTPUT_DIR, 'PNIPAAm_charged', 'sdf')
topology_to_sdf(pnipaam_charged_sdf, pnipaam_charged.to_topology()) # export the charged molecule to SDF

INFO:root:Partial charges initial: None
INFO:polymerist.mdtools.openfftools.partialcharge.molchargers:Assigning partial charges via the "NAGL" method
INFO:openff.nagl.nn._models:Could not find property in lookup table: 'Could not find property value for molecule with InChI InChI=1/C162H299N27O27/c1-83(2)163-136(190)57-56-111(138(192)165-85(5)6)59-113(140(194)167-87(9)10)61-115(142(196)169-89(13)14)63-117(144(198)171-91(17)18)65-119(146(200)173-93(21)22)67-121(148(202)175-95(25)26)69-123(150(204)177-97(29)30)71-125(152(206)179-99(33)34)73-127(154(208)181-101(37)38)75-129(156(210)183-103(41)42)77-131(158(212)185-105(45)46)79-133(160(214)187-107(49)50)81-135(162(216)189-109(53)54)82-134(161(215)188-108(51)52)80-132(159(213)186-106(47)48)78-130(157(211)184-104(43)44)76-128(155(209)182-102(39)40)74-126(153(207)180-100(35)36)72-124(151(205)178-98(31)32)70-122(149(203)176-96(27)28)68-120(147(201)174-94(23)24)66-118(145(199)172-92(19)20)64-116(143(197)170-90(15)16)62-114(141(195)168-88(11)12)6

## Including other molecules in your polymer system

### Coexistent polymers
We demonstrate setting up a mix of polymers using the PNIPAAm from above and a poly(bisphenol A carbonate) (PBPA) that [I prepared earlier](https://tvtropes.org/pmwiki/pmwiki.php/Main/OneIPreparedEarlier)

In [11]:
pbpa_prepared_sdf = assemble_path(EXAMPLE_DIR, 'PBPA', postfix='prepared', extension='sdf')
assert pbpa_prepared_sdf.exists()

pbpa_prepared_top = topology_from_sdf(pbpa_prepared_sdf)
pbpa_prepared = get_largest_offmol(pbpa_prepared_top) # a useful trick for working with individual Molecules in a single-mol or solvated topology

assert pbpa_prepared.partial_charges is not None
print(pbpa_prepared.properties)
pbpa_prepared.visualize(backend='nglview')

{'IUPAC name': 'poly(bisphenol A carbonate)', 'Common name': 'Makrolon', 'First patented': '1953-10-16', 'Patent holder': 'Hermann Schnell, Bayer AG', 'Patent No.': 'US-3028365', 'charge_method': 'NAGL'}


NGLWidget()

`polymerist` ships with a lattice-based packer which makes it straightforward to "tile" multiple copies of a polymer (or many polymers) into a simulation box  
The packer is flexible and allows you to position polymers in space via a set of "lattice points", which define where to center to coordinates of each copy of a conformer


We demonstrate a simple PNIPAAm-PBPA "3D checkerboard" packing here, but the possibilities are only limited by your imagination (and ability to generate lattice points)

In [15]:
from itertools import product as cartesian
import numpy as np

from polymerist.mdtools.openfftools.physprops import effective_radius
from polymerist.mdtools.openfftools.topology import topology_from_molecule_onto_lattice


# set lattice size and polymers to place into lattice
S : int = 3 # number of polymers to place along each axis (i.e., will have SxSxS alternating box of polymers)
polymer_1 : Molecule = pnipaam_charged
polymer_2 : Molecule = pbpa_prepared
# load pre-prepared poly(bisphenol A carbonate) (PBPA) to use as coexistent polymer
# generate cordinate for lattice; polymers wil be placed concentric to these lattice sites
lattice_str = f'{S}x{S}x{S}'
integer_lattice = np.array([int_point for int_point in cartesian(range(S), repeat=3)]) # the "3" is because we are in 3 dimensions
is_odd_idx = np.mod(integer_lattice.sum(axis=1), 2).astype(bool) # analogous to the indices of either color in a 3D checkboard

r_eff = max(effective_radius(polymer_1), effective_radius(polymer_2))
lattice_points = (r_eff * integer_lattice).m_as('angstrom')  # scale by larger of effective radii to avoid collisions; strip units while ensuring magnitudes are as Angstroms

pnipaam_top_packed   = topology_from_molecule_onto_lattice(polymer_1, lattice_points[~is_odd_idx])
bisphenol_top_packed = topology_from_molecule_onto_lattice(polymer_2, lattice_points[is_odd_idx])
mixed_polymer_top = pnipaam_top_packed + bisphenol_top_packed

melt_name = f'{polymer_1.name}_{polymer_2.name}_{lattice_str}'
melt_sdf_path = assemble_path(OUTPUT_DIR, f'{melt_name}_melt', 'sdf')
topology_to_sdf(melt_sdf_path, mixed_polymer_top) # export the mixed polymer topology to SDF

mixed_polymer_top.visualize()

NGLWidget()

### Packing solvents
`polymerist` also provides some utilities for simplifying packing of simulation boxes with a small molecule solvent  
We provide you with pre-parameterized TIP3P water to use for packing to avoid expensive reparameterization downstream; other pre-parameterized solvents [are planned](https://github.com/timbernat/polymerist/issues/4) for future releases

In [16]:
from openmm.unit import gram, centimeter, nanometer, angstrom

from polymerist.mdtools.openfftools import boxvectors
from polymerist.mdtools.openfftools.solvation.solvents import water_TIP3P
from polymerist.mdtools.openfftools.solvation.packing import pack_topology_with_solvent


# set box properies here
box_padding = 0.5 * nanometer     # how far beyond the tight bounding box of the polymer(s) to extend each box face
rho = 0.2 * gram / centimeter**3  # low density here is to account for wide spacing of lattice melt, and so this demo doesn;t take forever to finish :P
# rho = 0.997 * gram / centimeter**3
solvent : Molecule = water_TIP3P 

# calculate periodic box vectors
box_dims    = boxvectors.get_topology_bbox(mixed_polymer_top)
box_vectors = boxvectors.box_vectors_flexible(box_dims)
box_vectors = boxvectors.pad_box_vectors_uniform(box_vectors, box_padding)

# calculate number of solvent molecules
solvated_melt_topology = pack_topology_with_solvent(
    mixed_polymer_top,
    solvent=solvent,
    box_vecs=box_vectors,
    density=rho,
    exclusion=box_padding,
)
solv_path = assemble_path(OUTPUT_DIR, melt_name, postfix=f'solv_{solvent.name}', extension='sdf')
topology_to_sdf(solv_path, solvated_melt_topology)

solvated_melt_topology.visualize()

INFO:polymerist.mdtools.openfftools.solvation.packing:Solvating 4973.719480415983 nm**3 Topology with 33253 water_TIP3P molecules to density of 0.2 g/(cm**3)
INFO:polymerist.mdtools.openfftools.solvation.packing:Packmol packing converged
INFO:polymerist.mdtools.openfftools.solvation.packing:Set solvated Topology box vectors to [[16.34311795020417 0.0 0.0] [0.0 17.050810161959074 0.0] [0.0 0.0 17.848484772094558]] nanometer


NGLWidget()

## Setting periodic box and exporting with Interchange