# Testing of features in polysaccharide2

In [1]:
# Supressing annoying warnings (!must be done first!)
import warnings

warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning) # doesn't actually seem to do anything about mbuild warnings

# Logging
from polysaccharide2.genutils.logutils.IOHandlers import LOG_FORMATTER

import logging
LOGGER = logging.Logger(__name__)
logging.basicConfig(
    level=logging.INFO,
    format =LOG_FORMATTER._fmt,
    datefmt=LOG_FORMATTER.datefmt,
    # force=True
)

# General
import re, json
from pathlib import Path
from shutil import copyfile

import numpy as np

# Logging
from tqdm import tqdm as tqdm_text
from tqdm.notebook import tqdm as tqdm_notebook
from rich.progress import Progress, track

import logging

# Chemistry
from openmm.unit import nanometer, angstrom
from openff.toolkit import Topology, Molecule, ForceField
from openff.units import unit as offunit

from openff.interchange import Interchange
from openff.interchange.components import _packmol as packmol

from rdkit import Chem
import openeye

# Custom
import polysaccharide2 as ps2
from polysaccharide2.genutils.decorators.functional import allow_string_paths, allow_pathlib_paths, optional_in_place

  from .xtc import XTCTrajectoryFile
  from pkg_resources import parse_version
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
  entry_points = metadata.entry_points()["mbuild.plugins"]
  self.comm = Comm(**args)
  self.comm = Comm(**args)




# Testing topology load and solvation

## Defining water

In [2]:
from rdkit import Chem

from polysaccharide2.topology import offref
from polysaccharide2.topology.topIO import save_molecule
from polysaccharide2.rdutils.labeling.molwise import assign_ordered_atom_map_nums

# rdwat = Chem.MolFromSmiles('O')
# assign_ordered_atom_map_nums(rdwat, in_place=True)
# offwat = Molecule.from_rdkit(rdwat)
def generate_water_TIP3P() -> Molecule:
    '''Helper method for creating a new TIP3p water representation from scratch'''
    TIP3P_ATOM_CHARGES = { # NOTE : units deliberately omitted here (become applied to entire charge array)
        'H' :  0.417,
        'O' : -0.843
    }

    water = Molecule.from_smiles('O')
    water.name = 'water_TIP3P'
    water.partial_charges = [TIP3P_ATOM_CHARGES[atom.symbol] for atom in water.atoms]*offunit.elementary_charge

    return water

In [3]:
water_dir = Path('water_files')
water_dir.mkdir(exist_ok=True)

water = generate_water_TIP3P()

save_molecule(water_dir / f'{water.name}_oe.sdf' , water, offref.TKREGS['OpenEye Toolkit'])
save_molecule(water_dir / f'{water.name}_rd.sdf', water, offref.TKREGS['The RDKit'])
WATER_PATH = copyfile(water_dir / f'{water.name}_oe.sdf', water_dir / f'{water.name}.sdf')

## Testing load using from_pdb and monomers

In [4]:
from polysaccharide2.topology import offref, topIO
from polysaccharide2.topology.topinfo import get_largest_offmol
from polysaccharide2.residues.partition import partition
from polysaccharide2.monomers.repr import MonomerGroup

# pdb_dir  = Path('polymer_examples/compatible_pdbs/simple_polymers')
# mono_dir = Path('polymer_examples/monomer_generation/json_files/')

pdb_sub = 'simple_polymers'
pdb_dir  = Path(f'pdb_test_cleaned/pdbs/{pdb_sub}')
mono_dir = Path(f'pdb_test_cleaned/monos/{pdb_sub}')

mol_name = 'polyvinylchloride'
# mol_name = 'PEO_PLGA'
# mol_name = 'paam_modified'
# pdb_sub = 'proteins'
# mol_name = '6cww'

pdb = pdb_dir / f'{mol_name}.pdb'
mono = mono_dir / f'{mol_name}.json'
assert(pdb.exists())
assert(mono.exists())

monogrp = MonomerGroup.from_file(mono)
rdmol = Chem.MolFromPDBFile(str(pdb))
offtop = Topology.from_pdb(pdb, _custom_substructures=monogrp.monomers, toolkit_registry=offref.TKREGS['The RDKit'])
was_partitioned = partition(offtop)
print(was_partitioned)

offmol = get_largest_offmol(offtop)
offmol.name = mol_name

# save partitioned Topology
sdf_dir = Path('sdf_test')
sdf_dir.mkdir(exist_ok=True)
topIO.topology_to_sdf(sdf_dir / f'{mol_name}.sdf', offtop=offtop)

True


## Partial charge assignment

In [5]:
from polysaccharide2.topology.offref import TKREGS
from polysaccharide2.topology import topIO
from polysaccharide2.residues.charging import application, calculation


base_charge_method = 'AM1-BCC-ELF10'

# assign charges with default methods
charged_mols = {}
for charge_method, ChargerType in application.MolCharger.subclass_registry.items():
    chgr = ChargerType()
    cmol = charged_mols[charge_method] = chgr.charge_molecule(offmol, in_place=False)

# generate library charges and charge by residue
res_chg = calculation.get_averaged_charges(charged_mols[base_charge_method], monogrp)
res_chg.to_file(sdf_dir / f'{mol_name}_library_charges.json')

offmol_avg = application.apply_residue_charges(offmol, res_chg, in_place=False)
offmol_avg.properties['charge_method'] = 'RCT-averaged'
charged_mols['RCT-averaged'] = offmol_avg

# saving charged molecules to SDF files
for charge_method, cmol in charged_mols.items():
    topIO.topology_to_sdf(sdf_dir / f'{mol_name}_{charge_method}.sdf', cmol.to_topology())

2023-10-02 20:20:20.478 [INFO    :     application:line 50  ] - Assigning partial charges via the "AM1-BCC-ELF10" method
2023-10-02 20:21:09.778 [INFO    :     application:line 53  ] - Successfully assigned "AM1-BCC-ELF10" charges
2023-10-02 20:21:09.782 [INFO    :     application:line 50  ] - Assigning partial charges via the "Espaloma-AM1-BCC" method
  self._check_n_conformers(
2023-10-02 20:21:09.886 [INFO    :     application:line 53  ] - Successfully assigned "Espaloma-AM1-BCC" charges


## Solvation of Topologies

In [6]:
from openmm.unit import gram, centimeter, nanometer, mole
from polysaccharide2.topology.solvation.packing import pack_topology_with_solvent

# PARAMETERS
targ_box_vecs = 4.1 * np.ones(3) * nanometer
density = 0.997 * (gram / centimeter**3)
exclusion = 1.3 * nanometer
solvent = Molecule.from_file(WATER_PATH)

for charge_method, cmol in charged_mols.items():
    # solv_top = pack_topology_with_solvent(cmol.to_topology(), solvent, box_vecs=targ_box_vecs, density=density, exclusion=exclusion)
    solv_top = pack_topology_with_solvent(cmol.to_topology(), solvent, box_vecs=targ_box_vecs, density=density, exclusion=exclusion)
    topIO.topology_to_sdf(sdf_dir / f'{mol_name}_{charge_method}_solv_{solvent.name}.sdf', solv_top)


In [9]:
solv_top.box_vectors

0,1
Magnitude,[[4.1 0.0 0.0]  [0.0 4.1 0.0]  [0.0 0.0 4.1]]
Units,nanometer


# OpenMM I/O and simulation interfaces

## Initialize parameter sets

In [None]:
import numpy as np
from pathlib import Path

from openmm.unit import nanosecond, picosecond, femtosecond
from openmm.unit import kelvin, atmosphere, nanometer

from polysaccharide2.openmmtools import parameters, serialization, preparation
from polysaccharide2.openmmtools import thermo, reporters

name = 'pvc'
p = Path('openmm_test')
p.mkdir(exist_ok=True)

integ_params = parameters.IntegratorParameters(
    time_step=1*femtosecond,
    total_time=10*picosecond,
    num_samples=100
)
thermo_params = thermo.ThermoParameters(
    ensemble='nvt'
)
rep_params = reporters.ReporterParameters()

sim_params = parameters.SimulationParameters(integ_params, thermo_params, rep_params)
sim_params_path = serialization.assemble_sim_file_path(p, name, extension='json', affix='sim_params')
sim_params.to_file(sim_params_path)

sim_paths = serialization.SimulationPaths(sim_params_path)
sim_paths_path = serialization.assemble_sim_file_path(p, name, extension='json', affix='sim_paths')
sim_paths.to_file(sim_paths_path)

## Cast OpenFF Topology to OpenMM via Interchange

In [7]:
offtop = topIO.topology_from_sdf(sdf_dir / 'polyvinylchloride_AM1-BCC-ELF10_solv_water_TIP3P.sdf')

In [8]:
offtop.box_vectors

In [None]:
from openff.toolkit import ForceField
from openff.interchange import Interchange

from polysaccharide2.topology import offref, topIO, topinfo
from polysaccharide2.topology.solvation import boxvectors
from polysaccharide2.genutils.unitutils import openff_to_openmm


sdf_dir = Path('sdf_test')
offtop = topIO.topology_from_sdf(sdf_dir / 'polyvinylchloride_ABE10.sdf')
offmol = topinfo.get_largest_offmol(offtop)

box_dims = 2.0 * np.ones(3) * nanometer
box_vecs = boxvectors.xyz_to_box_vectors(box_dims)
offtop.box_vectors = box_vecs

ff_name = 'openff-2.0.0.offxml'
ff = ForceField(offref.FFDIR / ff_name)
ic = Interchange.from_smirnoff(ff, offtop, charge_from_molecules=[offmol])

ommtop = ic.to_openmm_topology()
ommsys = ic.to_openmm(combine_nonbonded_forces=False, add_constrained_forces=False)
ommpos = openff_to_openmm(ic.positions)

## Initialize OpenMM sim + files

In [None]:
ommsim = preparation.initialize_simulation_and_files(p, name, sim_paths, ommtop, ommsys)
ommsim.context.setPositions(ommpos) # by default, positions are unnasigned

LOGGER.info('Performing energy minimization')
ommsim.minimizeEnergy()
LOGGER.info('Energy successfully minimized')
preparation.record_simulation_top_and_sys(p, name, ommsim, sim_paths)

LOGGER.info(f'Integrating {sim_params.integ_params.total_time} OpenMM sim for {sim_params.integ_params.num_steps} steps')
ommsim.step(sim_params.integ_params.num_steps)
LOGGER.info('Simulation integration completed successfully')

# Experimenting with SMARTS functional groups

In [None]:
from polysaccharide2.monomers.substruct.functgroups import FN_GROUP_TABLE, FN_GROUP_ENTRIES
from polysaccharide2.monomers.substruct.functgroups.records import FnGroupSMARTSEntry

In [None]:
FN_GROUP_TABLE.loc[FN_GROUP_TABLE['group_type'].str.contains('carbonyl')]

In [None]:
smarts = FN_GROUP_ENTRIES[44].SMARTS
Chem.MolFromSmarts(smarts)

# Testing monomer loading

In [None]:
from pathlib import Path 
from polysaccharide2.monomers.repr import MonomerGroup

p = Path('polymer_examples/monomer_generation/json_files/bisphenolA.json')
q = Path('polymer_examples/monomer_generation/json_files/naturalrubber.json')

mg1 = MonomerGroup.from_file(p)
mg2 = MonomerGroup.from_file(q)

In [None]:
Chem.MolFromSmiles(mg2.monomers['naturalrubber'][0])

# Testing building

In [None]:
from polysaccharide2.polymers import estimation, building

estimation.estimate_chain_len_linear(mg1, 10)

# Testing simulation I/O

In [None]:
from pathlib import Path 
from openmm.unit import nanosecond

sp = ps2.openmmtools.records.SimulationParameters(100*nanosecond, 5, 'NVT')
sp.to_file(Path('test.json'))

# Playing with ratios

In [None]:
from dataclasses import dataclass
from typing import Any, Callable, ClassVar, TypeVar
from math import gcd
from numbers import Number


N = TypeVar('N')
def sgnmag(num : N) -> tuple[bool, N]:
    '''Returns the sign and magnitude of a numeric-like value'''
    return num < 0, abs(num)


@dataclass(repr=False)
class Ratio:
    '''For representing fractional ratios between two objects'''
    num   : Any
    denom : Any

    # REPRESENTATION
    def __repr__(self) -> str:
        return f'{self.num}/{self.denom}'
    
    def to_latex(self) -> str:
        '''Return latex-compatible string which represent fraction'''
        return rf'\frac{{{self.num}}}{{{self.denom}}}'

    # RELATIONS
    @property
    def reciprocal(self) -> 'Ratio':
        '''Return the reciprocal of a ration'''
        return self.__class__(self.denom, self.num)


@dataclass(repr=False)
class Rational(Ratio):
    '''For representing ratios of integers'''
    num   : int
    denom : int

    # REDUCTION
    autoreduce : ClassVar[bool]=False
    
    def __post_init__(self) -> None:
        if self.__class__.autoreduce:
            self.reduce()

    def reduce(self) -> None:
        '''Reduce numerator and denominator by greatest common factor'''
        _gcd = gcd(self.num, self.denom)
        self.num=int(self.num / _gcd)
        self.denom=int(self.denom / _gcd)
    simplify = reduce # alias for convenience

    @property
    def reduced(self) -> 'Rational':
        '''Return reduced Rational equivalent to the current rational (does not modify in-place)'''
        new_rat = self.__class__(self.num, self.denom)
        new_rat.reduce()

        return new_rat
    simplifed = reduced # alias for convenience
    
    def as_proper(self) -> tuple[int, 'Rational']:
        '''Returns the integer and proper fractional component of a ratio'''
        integ, remain = divmod(self.num, self.denom)
        return integ, self.__class__(remain, self.denom)
    
    # ARITHMETIC
    def __add__(self, other : 'Rational') -> 'Rational':
        '''Sum of two Rationals'''
        return self.__class__(
            num=(self.num * other.denom) + (self.denom * other.num),
            denom=(self.denom * other.denom)
        )
    
    def __sub__(self, other : 'Rational') -> 'Rational':
        '''Difference of two Rationals'''
        return self.__class__(
            num=(self.num * other.denom) - (self.denom * other.num),
            denom=(self.denom * other.denom)
        )

    def __mul__(self, other : 'Rational') -> 'Rational':
        '''Product of two Rationals'''
        return self.__class__(
            num=self.num * other.num,
            denom=self.denom * other.denom
        )

    def __div__(self, other : 'Rational') -> 'Rational':
        '''Quotient of two Rationals'''
        return self.__class__(
            num=self.num * other.denom,
            denom=self.denom * other.num
        )
    
    def __pow__(self, power : float) -> 'Rational':
        '''Exponentiates a ratio'''
        return self.__class__(
            num=self.num**power,
            denom=self.denom**power
        )

In [None]:
p = Rational(3, 6)
q = Rational(4, 12)

print(p, p.reciprocal, p.reduced, p+q)

In [None]:
Rational.autoreduce = False

In [None]:
import numpy as np
from numbers import Number

for val in (4, 4.0, 4+0j, np.pi, '4', [4], False, 'sgdfg'):
    print(val, type(val), isinstance(val, Number))

In [None]:
from fractions import Fraction