# Practical demo of MD polymer structure building from scratch

In [None]:
# Supressing annoying warnings (!must be done first!)
import warnings
warnings.catch_warnings(record=True)
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

import logging
from polymerist.genutils.logutils.IOHandlers import LOG_FORMATTER

logging.basicConfig(
    level=logging.INFO,
    format =LOG_FORMATTER._fmt,
    datefmt=LOG_FORMATTER.datefmt,
    force=True
)
LOGGER = logging.getLogger(__name__)

from rdkit import Chem
from rdkit.Chem.AllChem import EmbedMolecule

from polymerist.rdutils import rdkdraw # configure molecule drawing
rdkdraw.set_rdkdraw_size(400, 3/2)
rdkdraw.disable_substruct_highlights()

# 1) Building monomer templates and PDB files from basic SMILES

## Define SMILES for single monomer unit

In [None]:
## PVC
molname = 'polyvinylchloride'
smiles = 'C=C-Cl'
bond_atom_map_nums = (1, 2) # for bond cleavage assignment
isotope_map = {
    3 : 0,
    4 : 0
}

## PE
# molname = 'polyethylene'
# smiles = 'C=C'
# bond_atom_map_nums = (1, 2) # for bond cleavage assignment
# isotope_map = {
#     3 : 0,
#     4 : 0
# }

## PAAm
# molname = 'polyacrylamide'
# smiles = 'C=CC(=O)N'
# bond_atom_map_nums = (1, 2)
# isotope_map = {
#     7 : 0,
#     8 : 0
# }

## PS
# molname = 'polystyrene'
# smiles = 'c1ccccc1C=C'
# smiles = 'c1ccc[13cH]c1C=C'
# bond_atom_map_nums = (7, 8) # for bond cleavage assignment
# isotope_map = {
#     14 : 0,
#     16 : 0
# }

## Chitosan
# molname = 'polyglucosamine'
# smiles = 'N[C@H]1C(O)OC(CO)[C@@H](O)[C@@H]1O'

orig_smiles_mol = Chem.MolFromSmiles(smiles, sanitize=False)
display(orig_smiles_mol)

## Expand SMILES to include full chemical info 
Namely, this includes explicit Hs, Kekulized aromatic rings, and atom map numbers

In [None]:
from polymerist.monomers.substruct import specification
from pathlib import Path

outdir = Path(f'demos/{molname}')
outdir.mkdir(exist_ok=True, parents=True)

new_smiles = specification.expanded_SMILES(smiles)
FULL_SMILES_MOL = Chem.MolFromSmiles(new_smiles, sanitize=False)
display(FULL_SMILES_MOL)
print(new_smiles)

## Assigning linkers for inter-monomer bonds

In [None]:
from enum import Enum, auto

class PortMethod(Enum):
    '''For specifying how ports should be added to a complete RDMol'''
    MAP_NUMBERS   = auto()
    BOND_CLEAVAGE = auto()
    REACTION      = auto() # not included here : provide an MDL rxn template

In [None]:
from polymerist.rdutils.labeling import molwise
from polymerist.rdutils.amalgamation import portlib, bonding


assign_port_by = PortMethod.BOND_CLEAVAGE
if portlib.get_num_ports(FULL_SMILES_MOL) == 0: # check for existence of ports to ensure idempotency
    if assign_port_by == PortMethod.MAP_NUMBERS:
        map_nums = isotope_map.keys()
        for (atom_id, map_num) in zip(molwise.atom_ids_by_map_nums(FULL_SMILES_MOL, *map_nums), map_nums):
            linker_atom = FULL_SMILES_MOL.GetAtomWithIdx(atom_id)
            linker_atom.SetIsotope(isotope_map[map_num])
            linker_atom.SetAtomicNum(0)

    elif assign_port_by == PortMethod.BOND_CLEAVAGE:
        rwmol = Chem.RWMol(FULL_SMILES_MOL)
        bond_atom_ids = molwise.atom_ids_by_map_nums(rwmol, *bond_atom_map_nums)
        FULL_SMILES_MOL = bonding.decrease_bond_order(rwmol, *bond_atom_ids)
        molwise.assign_ordered_atom_map_nums(FULL_SMILES_MOL, in_place=True)
        Chem.SanitizeMol(FULL_SMILES_MOL, sanitizeOps=specification.SANITIZE_AS_KEKULE) 

    elif assign_port_by == PortMethod.REACTION:
        raise NotImplemented
    else:
        raise TypeError(f'Must provide a valid port assignment method (cannot be of type {type(assign_port_by)})')

display(FULL_SMILES_MOL)

## Enumerating "cap" groups from linkers and generating spec-compliant SMARTS

In [None]:
from copy import deepcopy
from polymerist.genutils.iteration import subsets


smarts = {}
sat_ids = subsets(portlib.get_linker_ids(FULL_SMILES_MOL), exclude_full=True)
for i, linkers_to_saturate in enumerate(sat_ids):
    new_mono = deepcopy(FULL_SMILES_MOL)
    for linker_id in linkers_to_saturate:
        linker_atom = new_mono.GetAtomWithIdx(linker_id)
        linker_atom.SetAtomicNum(1)
    new_mono = molwise.assign_ordered_atom_map_nums(new_mono, in_place=False)
    Chem.SanitizeMol(new_mono, sanitizeOps=specification.SANITIZE_AS_KEKULE)

    key = molname if not i else f'{molname}_TERM{i}'
    smarts[key] = [specification.compliant_mol_SMARTS(Chem.MolToSmiles(new_mono)).replace('#0', '*')] # for some reason, MolToSmarts doesn't reflect hydrogen addition

## Specify orientation of terminal monomers and create monomer representation object

In [None]:
from polymerist.monomers import MonomerGroup
from polymerist.genutils.fileutils.pathutils import assemble_path


grp = MonomerGroup(
    monomers=smarts,
    term_orient={
        f'{molname}_TERM1' : 'head',
        f'{molname}_TERM2' : 'tail',
    }    
)

for (resname, rdmol) in grp.iter_rdmols():
    print(resname)
    display(rdmol)

mono_path = assemble_path(outdir, molname, extension='json')
grp.to_file(mono_path) # cache monomer SMARTS for future use

## Grow chain and generate PDB (only linear polymers currently supported!)

In [None]:
from polymerist.polymers import building

DOP = 10

chain = building.build_linear_polymer(grp, DOP=DOP)
pdb_path = assemble_path(outdir, molname, extension='pdb')
building.mbmol_to_openmm_pdb(pdb_path, chain)
chain.visualize(backend='nglview')

# 2) Load and parameterize with OpenFF

## Load chemical info into PDB, partitioning into distinct residues

In [None]:
from openff.toolkit import Topology, Molecule
from polymerist.openfftools import topology
from polymerist.residues.partition import partition


pdb_path=Path(f'pdb_test_cleaned/pdbs/simple_polymers/{molname}.pdb') # using pre-made PDB stand-in for now, as mbuild-generated structures are not connecting middle monomers properly
offtop = Topology.from_pdb(pdb_path, _custom_substructures=grp.monomers)
was_partitioned = partition(offtop)
assert(was_partitioned)
offmol = topology.get_largest_offmol(offtop)

display(offmol)

top_path = assemble_path(outdir, molname, extension='sdf')
topology.topology_to_sdf(top_path, offtop) # preserve fully-specified system in SDF format

## Assign atomic partial charges

In [None]:
from polymerist.openfftools import pcharge

charger = pcharge.MolCharger.subclass_registry['Espaloma-AM1-BCC']()
cmol = charger.charge_molecule(offmol)

cmol_path = assemble_path(outdir, molname, postfix='charged', extension='sdf')
ctop = cmol.to_topology()
topology.topology_to_sdf(cmol_path, ctop)

## Defining periodic box and packing with solvent

In [None]:
import numpy as np
from openmm.unit import gram, centimeter, nanometer
from polymerist.openfftools.solvation import boxvectors, solvents, packing
from polymerist.openfftools.solvation.packing import pack_topology_with_solvent


rho = 0.997 * gram / centimeter**3
box_dims = np.array([4.0, 4.0, 4.0]) * nanometer
solvent = solvents.water_TIP3P

box_vecs = boxvectors.box_vectors_flexible(box_dims)
solv_top = pack_topology_with_solvent(ctop, solvent=solvent, box_vecs=box_vecs, density=rho)
solv_path = assemble_path(outdir, molname, postfix=f'solv_{solvent.name}', extension='sdf')
topology.topology_to_sdf(solv_path, solv_top)

## Generate OpenMM files and run simulations

In [None]:
from polymerist.openmmtools.parameters import SimulationParameters
from polymerist.openfftools.omminter import openff_topology_to_openmm
from polymerist.openmmtools.execution import run_simulation_schedule

ff_name = 'openff-2.0.0'
sim_params_path = Path('sample_sim_params.json')

sim_params = SimulationParameters.from_file(sim_params_path)
print(sim_params.thermo_params)
print(sim_params.integ_params)
print(sim_params.reporter_params)
schedule = {
    'demo' : sim_params
}

ommtop, ommsys, ommpos = openff_topology_to_openmm(solv_top, forcefield=ff_name, box_vecs=box_vecs)

ommdir = outdir / 'openmm'
ommdir.mkdir(exist_ok=True)
history = run_simulation_schedule(ommdir, schedule, ommtop, ommsys, ommpos, return_history=True)