# Testing of features in polysaccharide2

In [None]:
# Supressing annoying warnings (!must be done first!)
import warnings

warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning) # doesn't actually seem to do anything about mbuild warnings

# Logging
from rich.progress import Progress, track
from polysaccharide2.genutils.logutils.IOHandlers import LOG_FORMATTER

import logging
logging.basicConfig(
    level=logging.INFO,
    format =LOG_FORMATTER._fmt,
    datefmt=LOG_FORMATTER.datefmt,
    # force=True
)
LOGGER = logging.Logger(__name__)

# Defining final simulation workflow

## Global parameters

In [None]:
mol_name = 'peg_modified'

pdb_sub = 'simple_polymers'
pdb_src_dir  = Path(f'pdb_test_cleaned/pdbs/{pdb_sub}')
mono_src_dir = Path(f'pdb_test_cleaned/monos/{pdb_sub}')

working_dir = Path(f'workflow_test_{mol_name}')
working_dir.mkdir(exist_ok=True)

## 0) Filetree setup

In [None]:
from shutil import copyfile
from polysaccharide2.genutils.fileutils.pathutils import assemble_path
from polysaccharide2.monomers import MonomerGroup

term_orients = { 
    'peg_modified' : {
        'peg_TERM2' : 'head',
        'peg_TERM3' : 'tail',
    },
    'paam_modified' : {
        'paam_TERM2' : 'head',
        'paam_TERM3' : 'tail',
    },
    'pnipam_modified' : {
        'pnipam_TERM2' : 'head',
        'pnipam_TERM3' : 'tail',
    },
}

starting_dir = working_dir / 'chemistry_seeds'
starting_dir.mkdir(exist_ok=True)

# COPY PDB FILE
pdb_src_path = assemble_path(pdb_src_dir, mol_name, extension='pdb')
assert(pdb_src_path.exists())
pdb_path = assemble_path(starting_dir, mol_name, extension='pdb')
copyfile(pdb_src_path, pdb_path)

# COPY MONOMER FILE WITH ASSIGNED TERMINAL GROUPS
mono_src_path = assemble_path(mono_src_dir, mol_name, extension='json')
assert(mono_src_path.exists())
mono_path = assemble_path(starting_dir, mol_name, extension='json')

monomers = MonomerGroup.from_file(mono_src_path)
monomers.term_orient = term_orients[mol_name]
monomers.to_file(mono_path)

## 1) Assigning chemistry

In [None]:
from openff.toolkit import Topology

from polysaccharide2.openfftools import TKREGS
from polysaccharide2.openfftools import topology
from polysaccharide2.residues.partition import partition
from polysaccharide2.monomers.repr import MonomerGroup


monogrp = MonomerGroup.from_file(mono_path)
offtop = Topology.from_pdb(pdb_path, _custom_substructures=monogrp.monomers, toolkit_registry=TKREGS['The RDKit'])
was_partitioned = partition(offtop)
print(was_partitioned)

# assign properties to Molecule
offmol =topology.get_largest_offmol(offtop)
offmol.name = mol_name
offmol.properties['solvent'] = None
offmol.properties['charge_method'] = None

# save partitioned Topology
sdf_dir = Path('sdf_test')
sdf_dir.mkdir(exist_ok=True)
mol_path = sdf_dir / f'{mol_name}.sdf'
topology.topology_to_sdf(mol_path, offtop=offtop, toolkit_registry=TKREGS['The RDKit'])

## 2) Assigning charges

### 2a) RCT + generating library charges

In [None]:
mono_path : Path
N : int

from polysaccharide2.openfftools.pcharge import MolCharger
from polysaccharide2.residues.rescharge.interface import LibraryCharger
from workflow import rct_protocol

# building parameters
# base_charge_method = 'AM1-BCC-ELF10'
base_charge_method = 'Espaloma-AM1-BCC'

outdir = Path('RCT_test')
outdir.mkdir(exist_ok=True)

N = 150
chgr = MolCharger.subclass_registry[base_charge_method]()
delete_pdb : bool=True
save_sdf : bool=True


# defining paths
mol_category = 'simple_polymers'
pdb_dir  = Path(f'pdb_test_cleaned/pdbs/{mol_category}')
mono_dir = Path(f'pdb_test_cleaned/monos/{mol_category}')
term_group_orient = term_groups[mol_name]

# estimation and building loop
mono_path = mono_dir / f'{mol_name}.json'
assert(mono_path.exists())

monogrp = MonomerGroup.from_file(mono_path)
lib_chgs = rct_protocol(outdir, mol_name, monogrp, term_group_orient, N, charger=chgr, delete_pdb=delete_pdb, save_sdf=save_sdf)
lib_chgs.to_file(outdir / f'{mol_name}_residue_charges.json')

# PHASE 2: applying library charges to full-size mol
pdb_path = pdb_dir / f'{mol_name}.pdb' # path to the true, full-size PDB 
offtop = Topology.from_pdb(pdb_path, _custom_substructures=monogrp.monomers, toolkit_registry=TKREGS['The RDKit'])
was_partitioned = partition(offtop) 
assert(was_partitioned)
fullmol = topology.get_largest_offmol(offtop)

lib_chgr = LibraryCharger(lib_chgs)
rctmol = lib_chgr.charge_molecule(fullmol, in_place=False)
topology.topology_to_sdf(outdir / f'{mol_name}_{lib_chgr.CHARGING_METHOD}.sdf', rctmol.to_topology())

### 2b) Generate charged mols with chosen methods

In [None]:
charge_methods : list[str]
library_charges_path : Optional[Path]

from polysaccharide2.openfftools import TKREGS
from polysaccharide2.openfftools import topology
from polysaccharide2.openfftools.pcharge import MolCharger
from polysaccharide2.residues.rescharge.rctypes import ChargesByResidue

charge_methods = [
    'Espaloma-AM1BCC',
    'RCT'
]
lib_chg_path = Path()

for charge_method in charge_methods:
    offtop = topology.topology_from_sdf(mol_path)
    offmol = topology.get_largest_offmol(offtop)
    mol_name = offmol.name

    ChargerType = MolCharger.subclass_registry[charge_method]
    if charge_method == 'RCT':
        if lib_chg_path is not None and lib_chg_path.exists():
            charger = ChargerType(ChargesByResidue.from_file(lib_chg_path))
        else:
            raise FileExistsError
    else:
        charger = ChargerType

    cmol = charger.charge_molecule(offmol, in_place=False)
    topology.topology_to_sdf(sdf_dir / f'{mol_name}_{charge_method}.sdf', cmol.to_topology())

## 3) Conformer anneal

In [None]:
anneal_params_path : Path

## 4) Simulation schedule

In [None]:
schedule : dict[str, Path] # -> SimulationParameters
param_mol_path : Path # .sdf

from openmm.unit import nanosecond, picosecond, femtosecond
from openmm.unit import kelvin, atmosphere, nanometer, centimeter
from openmm.unit import gram, mole, liter

from polysaccharide2.genutils.unitutils import openmm_to_openff, openff_to_openmm
from polysaccharide2.genutils.fileutils.pathutils import assemble_path
from polysaccharide2.genutils.logutils.IOHandlers import MSFHandlerFlex

from polysaccharide2.openmmtools import parameters, execution
from polysaccharide2.openmmtools.parameters import SimulationParameters

from polysaccharide2.openfftools import topology
from polysaccharide2.openfftools.omminter import openff_topology_to_openmm
from polysaccharide2.openfftools.solvation import packing, boxvectors, solvents
from polysaccharide2.openfftools.solvation.packing import pack_topology_with_solvent

from workflow import vacuum_anneal


# input parameters
sdf_dir = Path('sdf_test')
sdf_path = sdf_dir / 'polyvinylchloride_AM1-BCC-ELF10.sdf'
postfix = 'conf_1'

box_dims = 4.0 * np.ones(3) * nanometer
density = 0.997 * gram/centimeter**3
exclusion = 1.0 * nanometer
ff_name = 'openff-2.0.0'

# define paths
save_dir = Path('openmm_test')
save_dir.mkdir(exist_ok=True)
solvent  = solvents.water_TIP3P

sim_param_path = Path('sim_param_sets')
anneal_params = SimulationParameters.from_file(sim_param_path / 'anneal_params.json')
prod_schedule = {
    'equilibration' : parameters.SimulationParameters.from_file(sim_param_path / 'equilibration_params.json'),
    'production' : parameters.SimulationParameters.from_file(sim_param_path / 'production_lite_params.json'),
}

# initialize simulation
with MSFHandlerFlex(save_dir, proc_name='sim_schedule', loggers='all') as log_handler:
    offtop = topology.topology_from_sdf(sdf_path)
    offmol = topology.get_largest_offmol(offtop)
    mol_name = offmol.name

    conf_top = vacuum_anneal(save_dir, offtop, anneal_params, forcefield=ff_name, box_vecs=box_dims)
    solv_top = pack_topology_with_solvent(conf_top, solvent, box_vecs=box_dims, density=density, exclusion=exclusion)
    solv_top_path = assemble_path(save_dir, f'{mol_name}_solv_{solvent.name}', extension='sdf', postfix=postfix)
    topology.topology_to_sdf(solv_top_path, solv_top)

    # execute remaining simulations
    ommtop, ommsys, ommpos = openff_topology_to_openmm(solv_top, forcefield=ff_name, box_vecs=box_dims)
    history = execution.run_simulation_schedule(save_dir, prod_schedule, ommtop, ommsys, ommpos, return_history=True)

## 5) Analysis

In [None]:
sim_paths_path : Path # -> SimulationPaths

import mdtraj
import matplotlib.pyplot as plt

from polysaccharide2.openmmtools.serialization import SimulationPaths
from polysaccharide2.analysis import mdtrajutils


sim_paths = SimulationPaths.from_file(Path('openmm_test/production/production_paths.json'))

traj = mdtraj.load(sim_paths.trajectory_path, top=sim_paths.topology_path)
pair_dict = {
    'chain O - water O' : traj.top.select_pairs('not water and element O', 'water and element O')
}
if 'N' in mdtrajutils.unique_elem_types(traj):
    pair_dict['chain N - water O'] = traj.top.select_pairs('not water and element N', 'water and element O')


rdf_dframe = mdtrajutils.acquire_rdfs(traj, pair_dict=pair_dict, min_rad=0.2, max_rad=1.2, rad_unit=nanometer)
radii, rdf = mdtrajutils.rdfs_to_plot_data(rdf_dframe)

plt.plot(radii, rdf)