In [2]:
# Custom Imports
import polysaccharide as ps
from polysaccharide import analysis, extratypes, filetree, general, logutils, molutils
from polysaccharide import polymer

from polysaccharide.charging.residues import ChargedResidue
from polysaccharide.charging.application import CHARGER_REGISTRY, ChargingParameters

from polysaccharide import LOGGERS_MASTER
from polysaccharide.logutils import ProcessLogHandler

from polysaccharide.molutils.rdmol import rdkdraw

from polysaccharide.polymer.representation import Polymer
from polysaccharide.polymer.management import PolymerManager
from polysaccharide.polymer.filtering import has_sims, is_solvated, is_unsolvated, is_charged
from polysaccharide.polymer import building, monomer

from polysaccharide.solvation.solvents import WATER_TIP3P
from polysaccharide.analysis import trajectory, statistics
from polysaccharide.simulation.records import SimulationPaths, SimulationParameters
from polysaccharide.graphics import plotutils

# Generic Imports
import re
from functools import partial
from collections import defaultdict
from itertools import combinations

# Numeric imports
from math import ceil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty
from openmm.unit import Unit, Quantity

# File I/O
import argparse
from pathlib import Path
import csv, json, pickle
from shutil import copyfile, rmtree
import importlib.resources as impres

# Logging and Shell
import logging
logging.basicConfig(
    level=logging.INFO,
    format=logutils.LOG_FORMATTER._fmt,
    datefmt=logutils.LOG_FORMATTER.datefmt,
    force=True
)
                            
# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdmolfiles

# Molecular Dynamics
from openff.interchange import Interchange
from openff.toolkit import ForceField
from openff.toolkit.topology import Topology
from openff.toolkit.topology.molecule import Molecule, Atom
from openff.toolkit.typing.engines.smirnoff.parameters import LibraryChargeHandler

from openff.units import unit
from openmm.unit import picosecond, femtosecond, nanosecond # time
from openmm.unit import nanometer, angstrom # length
from openmm.unit import kelvin, atmosphere # misc

# polymer resource management
import importlib_resources as impres
from polysaccharide import resources
from polysaccharide.resources import AVAIL_RESOURCES

RESOURCE_PATH = resources.RESOURCE_PATH
SIM_PARAM_PATH = impres.files(resources.sim_templates)
CHG_PARAM_PATH = impres.files(resources.chg_templates)
INP_PARAM_PATH = impres.files(resources.inp_templates)

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


## Analyzing replicates for Polymers paper

In [3]:
# Defining Paths and PolymerManagers
data_dir = Path('data_for_paper')

colina_dir = data_dir / 'colina_data'

openff_dir = data_dir / 'openff_data'
openff_dir.mkdir(exist_ok=True)

combined_dir = data_dir / 'combined_data'
combined_dir.mkdir(exist_ok=True)

COLL_PATH = Path('Collections')

conf_mgr = PolymerManager(COLL_PATH / 'water_soluble_polymers_confs')
equil_mgr = PolymerManager(COLL_PATH / 'water_soluble_polymers_equil')
# targ_mgr = conf_mgr
targ_mgr = equil_mgr

In [6]:
# Extracting high-dimensional data "cube" of observable averages for all replicates
full_data = defaultdict(lambda : defaultdict(lambda : defaultdict(list)))
for mol_name, sim_dirs_list in targ_mgr.all_completed_sims.items():
    polymer = targ_mgr.polymers[mol_name]
    for sim_dir in sim_dirs_list:
        sim_paths, sim_params = polymer.load_sim_paths_and_params(sim_dir)

        time_data = pd.read_csv(sim_paths.time_data)
        x_data, y_data = trajectory.props_to_plot_data(time_data)
        for prop_name, time_series in y_data.items():
            full_data[polymer.base_mol_name][sim_params.charge_method][prop_name].append(time_series.mean())

In [7]:
# Computing means and uncertainties and collating into dataframes 
dframe_fns = {
    'obs' : np.mean,
    'std' : np.std
}

for mol_name, mol_dict in full_data.items():
    for outname, dframe_fn in dframe_fns.items():
        dframe = pd.concat([
            pd.DataFrame.from_dict(
                {prop_name : dframe_fn(prop_data) for prop_name, prop_data in prop_dict.items()},
                orient='index',
                columns=[f'Sage 2.0.0 - {chg_method}']
            ) 
            for chg_method, prop_dict in mol_dict.items()
        ], axis=1)

        dframe.to_csv(openff_dir / f'{mol_name}_{outname}.csv')

In [8]:
# Merging new dataframes with data from Colina paper
for ref_data_dir in colina_dir.iterdir():
    filename = ref_data_dir.name

    new_data = pd.read_csv(openff_dir / filename, index_col=0)
    ref_data = pd.read_csv(ref_data_dir, index_col=0)
    data = pd.concat([new_data, ref_data], axis=1)

    data.to_csv(combined_dir / filename)

In [1]:
mol_name = 'peg_modified'

exps_path = data_dir / f'{mol_name}_exp.csv'
stds_path = data_dir / f'{mol_name}_std.csv'

exps = pd.read_csv(exps_path)
stds = pd.read_csv(stds_path)

exps

NameError: name 'data_dir' is not defined

In [None]:
(openff_dir / filename)

## Plotting replicate data

In [None]:
chg_dict = data_agg['ABE10_averaged']
# chg_dict = data_agg['Espaloma_AM1BCC']

fig, ax = plotutils.presize_subplots(nrows=1, ncols=len(chg_dict))
for axis, (mol_name, prop_dict) in zip(ax.flatten(), chg_dict.items()):
    x_pos = np.arange(len(prop_dict))
    means, stds = [], []

    for prop_name, prop_data in prop_dict.items():
        means.append(np.mean(prop_data))
        stds.append(np.std(prop_data))

    axis.set_title(f'{mol_name} Shape Properties')
    axis.bar(x_pos, means, yerr=stds)
    axis.set_xticks(x_pos)
    axis.set_xticklabels(prop_dict.keys(), rotation=-45)

In [None]:
chg_method = 'Espaloma_AM1BCC'
pd.DataFrame(data_agg[chg_method]['paam_modified'].items(), columns=['', chg_method])

In [None]:
data_agg

## Prior plotting

In [None]:
pdir = mgr.polymers['polyvinylchloride']
spath, sparam = pdir.load_sim_paths_and_params()
df = pd.read_csv(spath.state_data)

In [None]:
mgr_equil = PolymerManager(COLL_PATH / 'water_soluble_polymers_equil')
for mol_name, polymer in mgr_equil.filtered_by(is_solvated).items():
    print(mol_name, len(polymer.completed_sims))

In [None]:
rdf_dframe = pd.read_csv(sim_paths.spatial_data)
radii, rdfs = analysis.trajectory.rdfs_to_plot_data(rdf_dframe)
fig, ax = plotutils.plot_df_props(radii, rdfs)

In [None]:
prop_dframe = pd.read_csv(sim_paths.time_data)
times, props = analysis.trajectory.props_to_plot_data(prop_dframe)
fig, ax = plotutils.plot_df_props(times, props)

In [None]:
mgr_confs = PolymerManager(COLL_PATH / 'water_soluble_polymers_confs')
mol_name = 'paam_modified_conf_1_solv_water'

pdir = mgr_confs.polymers[mol_name]
sim_dir = mgr_confs.all_completed_sims[mol_name][0]
sim_paths, sim_params = pdir.load_sim_paths_and_params(sim_dir)

# Notebook for testing generic ideas and developing functions

In [None]:
# Generic Imports
import re
from functools import partial
from collections import defaultdict
from itertools import combinations

# Numeric imports
from math import ceil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty

# File I/O
import argparse
from pathlib import Path
import csv, json, pickle
from shutil import copyfile, rmtree
import importlib.resources as impres

## Removing fields from XML (useful for annoying barostat in OpenMM states)

In [None]:
import xml.etree.ElementTree as ET

for sim_dir, sim_paths_file in pdir.simulation_paths.items():
    sim_paths = SimulationPaths.from_file(sim_paths_file)
    chk = sim_paths.checkpoint
    if chk.suffix == '.xml':
        tree = ET.parse(sim_paths.checkpoint)
        root = tree.getroot()

        par = next(root.iter('Parameters'))
        par.clear()

## Testing dynamic checkpoint file updating

In [None]:
import pickle

class Test:
    def __init__(self, val : int, checkpoint : Path) -> None:
        self.val = val
        self.checkpoint_path = checkpoint

    def to_file(self):
        if hasattr(self, 'checkpoint_path'):
            with self.checkpoint_path.open('wb') as file:
                pickle.dump(self, file)

    def __setattr__(self, __name: str, __value: Any) -> None:
        super().__setattr__(__name, __value)
        self.to_file()
        print(__name, __value)

In [None]:
p = Path('test.pkl')
p.touch()

t = Test(5, p)
t.other = 'word'

In [None]:
with p.open('rb') as file:
    v = pickle.load(file)

v.__dict__
v.foo = 'bar'

## Experimenting with grid size optimization WRT aspect and number of squares

In [None]:
from math import ceil, sqrt, floor

def size_penalty(N_targ : int, N_real : int) -> float:
    return (N_real / N_targ - 1)**2

def aspect_penalty(a_targ : float, a_real : float) -> float:
    # return (a_real / a_targ - 1)**2
    return 1 - min(a_targ / a_real, a_real / a_targ)

def dims(N : int, a : float=1/1, w1=1, w2=1) -> tuple[int, int]:
    '''Given a particular number of cells and an aspect ratio, yields the smallest 2x2 grid dimensions which accomodate at least N grid squares whose aspect ratio is closest to the '''
    return min( 
        ((r, ceil(N / r))
            for r in range(1, N + 1)
        ),
        key=lambda dims : w1*size_penalty(N, dims[0]*dims[1]) + w2*aspect_penalty(a, dims[0]/dims[1])
    )

a = 2/1
for N in range(1, 20):
    nrows, ncols = dims(N, a)
    fig, ax = plotutils.presize_subplots(nrows=nrows, ncols=ncols, scale=1)
    fig.suptitle(f'N = {N}')