# Core Imports

In [1]:
# Generic Imports
import re
from functools import partial, cached_property
from collections import defaultdict
from itertools import combinations, chain
from ast import literal_eval

# Numeric imports
import pandas as pd
import numpy as np

# File I/O
from pathlib import Path
import csv, json, openpyxl, pickle

# Logging
from tqdm import tqdm as tqdm_text
from tqdm.notebook import tqdm as tqdm_notebook

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Generator, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty

# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdChemReactions

from openff.toolkit import ForceField
from openff.toolkit.topology import Topology, Molecule

from openforcefields.openforcefields import get_forcefield_dirs_paths
OPENFF_DIR = Path(get_forcefield_dirs_paths()[0])

# Creating OpenMM and LAMMPS systems

## Harvest and tabulate paths + info for all Interchange and Topology files

In [10]:
import pandas as pd

# MOL_MASTER_DIR = Path('polymer_structures')
# MOL_MASTER_DIR = Path('polymer_validation')
# MOL_MASTER_DIR = Path('polymer_revision')
# MOL_MASTER_DIR = Path('polymer_update')
MOL_MASTER_DIR = Path('polymer_benchmark')
build_records_path = MOL_MASTER_DIR / 'build_records.csv'

# mol_file_frame = pd.read_csv(build_records_path, index_col=[0, 1])

In [11]:
TAGS = (
    'mechanism',
    'polymer_name',
    'oligomer_size',
    'lattice_size'
)

records = []
for mol_dir in MOL_MASTER_DIR.glob('**/[0-9]x[0-9]x[0-9]'):
    mol_info = {
        tag : value
            for tag, value in zip(TAGS, mol_dir.relative_to(MOL_MASTER_DIR).parts)
    }
    mol_info['directory'] = mol_dir

    record_path = mol_dir / f'{mol_info["lattice_size"]}_{mol_info["oligomer_size"]}_{mol_info["polymer_name"]}_RECORD.json'
    if record_path.exists:
        with record_path.open('r') as record_file:
            mol_info.update(json.load(record_file))

    records.append(mol_info)

mol_file_frame = pd.DataFrame.from_records(records)
mol_file_frame.set_index(['mechanism', 'polymer_name'], inplace=True)
groups = mol_file_frame.groupby(['lattice_size'])
# groups = mol_file_frame.groupby(['lattice_size', 'mechanism'])

# post-processing and typing
mol_file_frame['records_path'] = None
for str_path_col in ('topology_path', 'interchange_path', 'directory'):
    mol_file_frame[str_path_col] = mol_file_frame[str_path_col].map(Path) # de-stringify file Paths
groups = mol_file_frame.groupby(['lattice_size'])

### Filtering by elements and compound name

In [5]:
benz_names = ('benzene', 'phenyl', 'benz', 'phen')
desired_elems = ('F', 'N', 'O')

has_elems = mol_file_frame['unique_elems_in_topology'].map(lambda x : all(elem in x for elem in desired_elems))
has_benz = mol_file_frame.index.map(lambda x : any(bname in x[1] for bname in benz_names)).to_series(index=mol_file_frame.index)

candidates = mol_file_frame[has_benz & has_elems]
candidates[candidates['lattice_size'] == '1x1x1']

Unnamed: 0_level_0,Unnamed: 1_level_0,oligomer_size,lattice_size,directory,exper_density,oligomer_type,n_atoms_in_topology,unique_elems_in_topology,topology_path,topology_time,interchange_path,interchange_time
mechanism,polymer_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
polyamide,"poly(4-[4-[6'-[4-(4-aminophenyl)-2-(trifluoromethyl)phenoxy]spiro[fluorene-9,9'-xanthene]-3'-yl]oxy-3-(trifluoromethyl)phenyl]aniline-co-4-[2-(4-carboxyphenyl)-1,1,1,3,3,3-hexafluoropropan-2-yl]benzoic acid)",trimer,1x1x1,polymer_benchmark/polyamide/poly(4-[4-[6'-[4-(...,1.22,trimer,378,"[C, H, F, N, O]",polymer_benchmark/polyamide/poly(4-[4-[6'-[4-(...,0.497389,polymer_benchmark/polyamide/poly(4-[4-[6'-[4-(...,1.608921


### Selecting smallest, most average, and largest oligomers as benchmarking points

In [6]:
def output_olig_samples() -> None:
    olig_by_size = groups.get_group('1x1x1').sort_values('n_atoms_in_topology')
    sizes = olig_by_size['n_atoms_in_topology'].to_numpy()

    edges = [0, -1]
    size_tol : int = 1

    avg_size = round(sizes.mean())
    avg_idx_candidates = ((np.abs(olig_by_size['n_atoms_in_topology'] - avg_size) <= size_tol)                # midpoint oligomer should be within a tolerance of the true average size...
        & (~olig_by_size.index.get_level_values(0).isin(olig_by_size.iloc[edges].index.get_level_values(0)))) # ...and not belong to the sae chemical classes as the endpoints
    midpt_idx = avg_idx_candidates.argmax()
    edges.insert(1, midpt_idx)

    subsampled_oligs = olig_by_size.iloc[edges]
    subsampled_oligs.to_csv('oligomers_for_benchmark.csv')

output_olig_samples()

# Defining utility functions

In [12]:
from typing import Optional

from openmm import System, Context, NonbondedForce
from openmm.unit import kilojoule_per_mole, Quantity
from polymerist.genutils.containers import RecursiveDict


## OpenMM
NONBOND_CUTOFF_METHOD_NAMES = (
    'NoCutoff',
    'CutoffNonPeriodic',
    'CutoffPeriodic',
    'Ewald',
    'PME',
    'LJPME',
)
NONBOND_CUTOFF_METHODS = {
    idx : method_name
        for idx, method_name in sorted( # sort in ascending order by integer code
            (getattr(NonbondedForce, method_name), method_name)
                for method_name in NONBOND_CUTOFF_METHOD_NAMES
        )
}

def describe_ommsys_forces(ommsys : System) -> tuple[dict[str, dict[str, str]], dict[str, int]]:
    '''Describes accessible parameters associated with each Force in an OpenMM system
    Also maps each Force's force_group to a unique id

    Returns the decriptive text as a string, and a dict mapping each Force's name to it's id'''
    force_map : dict[str, int] = {}
    descript_dict = RecursiveDict()

    for force in ommsys.getForces():
        force_name = force.getName()
        force_map[force_name] = force.getForceGroup()
        descript_dict[force_name]['type'] = type(force).__name__
        
        for attr in dir(force):
            if attr.startswith('get'):
                try:
                    attr_val = getattr(force, attr)()
                    if attr == 'getNonbondedMethod': # convert integer index into readable name of nonbonded cutoff method
                        attr_val = NONBOND_CUTOFF_METHODS[attr_val]
                    descript_dict[force_name][attr.removeprefix('get')] = attr_val
                except TypeError: # called when the getter expects more than 0 arguments
                    pass
                
    return descript_dict, force_map

def eval_openmm_energies(context : Context, force_name_remap : Optional[dict[str, str]]=None) -> dict[str, Quantity]:
    '''Perform an energy evaluation on an OpenMM Context'''
    if force_name_remap is None:
        force_name_remap = {}
    openmm_energies = {}

    # get global energies
    overall_state = context.getState(getEnergy=True) # get total potential energy
    openmm_energies['Potential'] = overall_state.getPotentialEnergy()
    openmm_energies['Kinetic'  ] = overall_state.getKineticEnergy()

    # get individual energies from each force type
    for i, force in enumerate(context.getSystem().getForces()):
        state = context.getState(getEnergy=True, groups={i}) # TODO : add option to keep whatever groups were there prior
        force_label = force_name_remap.get(force.getName(), force.getName()) # check if a remapped name is registered, otherwise use the Force's set name
        openmm_energies[force_label] = state.getPotentialEnergy()

    return openmm_energies

# Create MD files from Interchange, evaluate starting energies

In [13]:
from openmm import Integrator, Context

from openff.interchange import Interchange
from openff.interchange.constants import _PME
from openff.interchange.components.mdconfig import MDConfig
from openff.interchange.interop.openmm._positions import to_openmm_positions

from polymerist.openmmtools import serialization, preparation


STATE_PARAMS : dict[str, bool] = {
    'getPositions'  : True,
    'getVelocities' : True,
    'getForces'     : True,
    'getEnergy'     : True,
    'getParameters' : True,
    'getParameterDerivatives' : False,
    'getIntegratorParameters' : True
}

def interchange_to_lammps(interchange : Interchange, lmp_data_path : Path, lmp_input_path : Path) -> None:
    '''Produce LAMMPS input and data files from an OpenFF Interchange'''
    interchange.to_lammps(lmp_data_path) # MD data file
    mdc = MDConfig.from_interchange(interchange)
    mdc.write_lammps_input(lmp_input_path) # input directive file

    # replacing generic lmp file with data file from above
    with lmp_input_path.open('r') as in_file:
        in_file_block = in_file.read()

    with lmp_input_path.open('w') as in_file:
        in_file.write(
            in_file_block.replace('out.lmp', f'"{lmp_data_path}"') # need surrounding double quotes to allow LAMMPS to read special symbols in filename (if present)
        )

def interchange_to_openmm(interchange : Interchange, integrator : Integrator, omm_sys_path : Path, omm_state_path : Path) -> Context:
    '''Produce OpenMM System and State .xml files from an OpenFF Interchange'''
    system = interchange.to_openmm(combine_nonbonded_forces=False)
    preparation.label_forces(system)
    context = Context(system, integrator)
    context.setPositions(to_openmm_positions(interchange, include_virtual_sites=True))

    ## writing OpenMM files
    serialization.serialize_system(omm_sys_path, system)
    serialization.serialize_state_from_context(omm_state_path, context, state_params=STATE_PARAMS)

    return context

In [16]:
groups.groups['1x1x1']

MultiIndex([(              'polyester', ...),
            ('polyurethane_isocyanate', ...),
            (              'polyamide', ...)],
           names=['mechanism', 'polymer_name'])

In [17]:
import pickle
import cProfile

from shutil import copyfile
from gc import collect
from rich.progress import Progress

from polymerist.genutils.fileutils.pathutils import assemble_path
from polymerist.genutils.fileutils.jsonio.update import append_to_json

from polymerist.duration import Timer
from polymerist.lammpstools import lammpseval
from polymerist.openfftools import topology

from polymerist.openmmtools.thermo import EnsembleFactory
from polymerist.openmmtools.parameters import SimulationParameters


# parameters
sim_params = SimulationParameters.from_file('sim_params.json')
ensfac = EnsembleFactory.from_thermo_params(sim_params.thermo_params)

build_lammps : bool = True
build_openmm : bool = True

force_name_remap = { # easier-to-understand names for OpenMM energies
    'vdW force'                : 'vdW',
    'Electrostatics force'     : 'Electrostatic',
    'vdW 1-4 force'            : 'vdW 1-4',
    'Electrostatics 1-4 force' : 'Electrostatic 1-4',
    'PeriodicTorsionForce'     : 'Dihedral',
    'HarmonicAngleForce'       : 'Angle',
    'HarmonicBondForce'        : 'Bond'
}

# execute MD loop
for lattice_size in groups.groups.keys():
    print(lattice_size)
    targ_df = groups.get_group(lattice_size)
    energies = RecursiveDict()
    with Progress() as progress:
        mol_task_id = progress.add_task('Generating MD files', total=len(targ_df))

        for (mechanism, mol_name), row in targ_df.iterrows():
            # load recorded topology and interchange files
            progress.update(mol_task_id, description=f'{mechanism} : {mol_name}')

            mol_record_path = row.directory / f'{row.lattice_size}_{row.oligomer_type}_{mol_name}_RECORD.json'
            targ_df.loc[(mechanism, mol_name), 'records_path'] = mol_record_path if mol_record_path.exists() else None

            offtop = topology.topology_from_sdf(row.topology_path, allow_undefined_stereo=True)
            with row.interchange_path.open('rb') as inc_file:
                interchange = pickle.load(inc_file)

            # LAMMPS
            if build_lammps:
                lmp_dir : Path = row.directory / 'LAMMPS'
                lmp_dir.mkdir(exist_ok=True)

                lmp_data_path  = assemble_path(lmp_dir, mol_name, extension='lammps')
                lmp_input_path = assemble_path(lmp_dir, mol_name, extension='in')
                lmp_prof_path  = assemble_path(lmp_dir, mol_name, extension='txt', postfix='profile')

                ## writing LAMMPS files
                with Timer() as lammps_timer:
                    lmp_profile = cProfile.Profile()
                    lmp_ret = lmp_profile.runcall(
                        interchange_to_lammps,
                        interchange=interchange,
                        lmp_data_path=lmp_data_path,
                        lmp_input_path=lmp_input_path
                    )
                    
                lmp_profile.dump_stats(lmp_prof_path)
                if mol_record_path.exists():
                    append_to_json(mol_record_path, lammps_time=lammps_timer.time_taken)

                ## evaluating LAMMPS energies
                # box_params = lammpseval.get_lammps_unit_cell(lmp_input_path)
                energies['LAMMPS'][(mechanism, mol_name)] = lammpseval.get_lammps_energies(lmp_input_path, preferred_unit=kilojoule_per_mole)
                # clear_output()

            # OpenMM
            if build_openmm:
                omm_dir : Path = row.directory / 'OpenMM'
                omm_dir.mkdir(exist_ok=True)

                omm_sys_path   = assemble_path(omm_dir, mol_name, postfix='system'  , extension='xml')
                omm_state_path = assemble_path(omm_dir, mol_name, postfix='state'   , extension='xml')
                omm_top_path   = assemble_path(omm_dir, mol_name, postfix='topology', extension='sdf') 
                omm_prof_path  = assemble_path(omm_dir, mol_name, postfix='profile', extension='txt')

                integrator = ensfac.integrator(time_step=sim_params.integ_params.time_step)

                with Timer() as openmm_timer:
                    omm_profile = cProfile.Profile()
                    context = omm_profile.runcall(
                        interchange_to_openmm,
                        interchange=interchange,
                        integrator=integrator,
                        omm_sys_path=omm_sys_path,
                        omm_state_path=omm_state_path
                    )
                    copyfile(row.topology_path, omm_top_path) # TODO : PDB?

                omm_profile.dump_stats(omm_prof_path)
                if mol_record_path.exists():
                    append_to_json(mol_record_path, openmm_time=openmm_timer.time_taken)
                    
                ## evaluating OpenMM energies
                openmm_energies = eval_openmm_energies(context, force_name_remap=force_name_remap)
                assert(openmm_energies['Kinetic'] == 0.0*kilojoule_per_mole) # check total KE to verify no integration is being done
                energies['OpenMM'][(mechanism, mol_name)] = openmm_energies

            progress.advance(mol_task_id)
            collect() # manual garbage collector call to try to alleviate memory issues

Output()

1x1x1
LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-4.8421008 -3.6445331 -5.9293035) to (13.157899 14.355467 12.070696) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  60 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  12 = max dihedrals/atom
  scanning impropers ...
  2 = max impropers/atom
  reading bonds ...
  62 bonds
  reading angles ...
  100 angles
  reading dihedrals ...
  162 dihedrals
  reading impropers ...
  54 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
     5 = max # of 1-3 neighbors
    12 = max # of 1-4 neighbors
    12 = max # of special neighbors
  special bonds CPU = 0.000 seconds
  read_da

LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-7.5202174 -7.5568462 -23.909572) to (10.479783 10.443154 23.407399) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  165 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  14 = max dihedrals/atom
  scanning impropers ...
  1 = max impropers/atom
  reading bonds ...
  164 bonds
  reading angles ...
  299 angles
  reading dihedrals ...
  546 dihedrals
  reading impropers ...
  30 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
     6 = max # of 1-3 neighbors
    12 = max # of 1-4 neighbors
    16 = max # of special neighbors
  special bonds CPU = 0.000 seconds
  read_data C

LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-10.330508 -17.801727 -20.18182) to (13.204261 16.321638 21.809496) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  378 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  20 = max dihedrals/atom
  scanning impropers ...
  4 = max impropers/atom
  reading bonds ...
  413 bonds
  reading angles ...
  694 angles
  reading dihedrals ...
  1081 dihedrals
  reading impropers ...
  576 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
    10 = max # of 1-3 neighbors
    24 = max # of 1-4 neighbors
    25 = max # of special neighbors
  special bonds CPU = 0.000 seconds
  read_data 

Output()

2x2x2


LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-5.7117094 -4.2134092 -6.2041285) to (22.289059 22.163859 24.285344) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  480 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  12 = max dihedrals/atom
  scanning impropers ...
  2 = max impropers/atom
  reading bonds ...
  496 bonds
  reading angles ...
  800 angles
  reading dihedrals ...
  1296 dihedrals
  reading impropers ...
  432 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
     5 = max # of 1-3 neighbors
    12 = max # of 1-4 neighbors
    12 = max # of special neighbors
  special bonds CPU = 0.001 seconds
  read_data

LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-22.710369 -20.282014 -20.122139) to (65.643249 72.683886 68.945813) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  1320 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  14 = max dihedrals/atom
  scanning impropers ...
  1 = max impropers/atom
  reading bonds ...
  1312 bonds
  reading angles ...
  2392 angles
  reading dihedrals ...
  4368 dihedrals
  reading impropers ...
  240 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
     6 = max # of 1-3 neighbors
    12 = max # of 1-4 neighbors
    16 = max # of special neighbors
  special bonds CPU = 0.001 seconds
  read_d

LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-19.432746 -18.48509 -24.105975) to (68.918257 73.100237 74.210976) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  3024 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  20 = max dihedrals/atom
  scanning impropers ...
  4 = max impropers/atom
  reading bonds ...
  3304 bonds
  reading angles ...
  5552 angles
  reading dihedrals ...
  8648 dihedrals
  reading impropers ...
  4608 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
    10 = max # of 1-3 neighbors
    24 = max # of 1-4 neighbors
    25 = max # of special neighbors
  special bonds CPU = 0.002 seconds
  read_d

Output()

3x3x3


LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-20.483994 -20.580361 -23.949844) to (108.85168 120.43888 121.42736) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  4455 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  14 = max dihedrals/atom
  scanning impropers ...
  1 = max impropers/atom
  reading bonds ...
  4428 bonds
  reading angles ...
  8073 angles
  reading dihedrals ...
  14742 dihedrals
  reading impropers ...
  810 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
     6 = max # of 1-3 neighbors
    12 = max # of 1-4 neighbors
    16 = max # of special neighbors
  special bonds CPU = 0.003 seconds
  read_

Per MPI rank memory allocation (min/avg/max) = 22.39 | 22.39 | 22.39 Mbytes
    E_bond        E_angle        E_dihed        E_impro         E_pair         E_vdwl         E_coul         E_long         E_tail         PotEng    
 578.93646      6926.8833      1720.4964      1.6468976     -2322.3795      600.57721      9851.2577     -12774.214     -4.1239517      6905.5836    
Loop time of 3.496e-06 on 1 procs for 0 steps with 4455 atoms

200.2% CPU use with 1 MPI tasks x 1 OpenMP threads

MPI task timing breakdown:
Section |  min time  |  avg time  |  max time  |%varavg| %total
---------------------------------------------------------------
Pair    | 0          | 0          | 0          |   0.0 |  0.00
Bond    | 0          | 0          | 0          |   0.0 |  0.00
Kspace  | 0          | 0          | 0          |   0.0 |  0.00
Neigh   | 0          | 0          | 0          |   0.0 |  0.00
Comm    | 0          | 0          | 0          |   0.0 |  0.00
Output  | 0          | 0          | 0  

LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-20.386447 -21.482228 -22.719957) to (121.7316 127.50933 126.97626) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  10206 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  20 = max dihedrals/atom
  scanning impropers ...
  4 = max impropers/atom
  reading bonds ...
  11151 bonds
  reading angles ...
  18738 angles
  reading dihedrals ...


Output()

5x5x5


LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-8.1282943 -7.9798634 -7.8512158) to (72.386565 72.815801 73.4853) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  7500 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...
  12 = max dihedrals/atom
  scanning impropers ...
  2 = max impropers/atom
  reading bonds ...
  7750 bonds
  reading angles ...
  12500 angles
  reading dihedrals ...
  20250 dihedrals
  reading impropers ...
  6750 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
     5 = max # of 1-3 neighbors
    12 = max # of 1-4 neighbors
    12 = max # of special neighbors
  special bonds CPU = 0.004 seconds
  read_

Per MPI rank memory allocation (min/avg/max) = 33.38 | 33.38 | 33.38 Mbytes
    E_bond        E_angle        E_dihed        E_impro         E_pair         E_vdwl         E_coul         E_long         E_tail         PotEng    
 2249.9993      21266.624      3975.0375      133.79984      237794.44      212071.25      73216.236     -47493.046     -97.208073      265419.9     
Loop time of 2.424e-06 on 1 procs for 0 steps with 7500 atoms

206.3% CPU use with 1 MPI tasks x 1 OpenMP threads

MPI task timing breakdown:
Section |  min time  |  avg time  |  max time  |%varavg| %total
---------------------------------------------------------------
Pair    | 0          | 0          | 0          |   0.0 |  0.00
Bond    | 0          | 0          | 0          |   0.0 |  0.00
Kspace  | 0          | 0          | 0          |   0.0 |  0.00
Neigh   | 0          | 0          | 0          |   0.0 |  0.00
Comm    | 0          | 0          | 0          |   0.0 |  0.00
Output  | 0          | 0          | 0  

LAMMPS (2 Aug 2023)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
  using 1 OpenMP thread(s) per MPI task
Reading data file ...
  triclinic box = (-24.397878 -23.807479 -23.967962) to (217.38726 218.75172 219.58238) with tilt (0 0 0)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  20625 atoms
  scanning bonds ...
  3 = max bonds/atom
  scanning angles ...
  6 = max angles/atom
  scanning dihedrals ...


  14 = max dihedrals/atom
  scanning impropers ...
  1 = max impropers/atom
  reading bonds ...
  20500 bonds
  reading angles ...
  37375 angles
  reading dihedrals ...
  68250 dihedrals
  reading impropers ...
  3750 impropers
Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0        0        0.5     
  special bond factors coul:  0        0        0.8333333333
     4 = max # of 1-2 neighbors
     6 = max # of 1-3 neighbors
    12 = max # of 1-4 neighbors


    16 = max # of special neighbors
  special bonds CPU = 0.012 seconds
  read_data CPU = 0.495 seconds
PPPM initialization ...
  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
  G vector (1/distance) = 0.19736749
  grid = 54 54 60
  stencil order = 5
  estimated absolute RMS force accuracy = 0.038326762
  estimated relative force accuracy = 0.0001154199
  using double precision FFTW3
  3d grid and FFT values/proc = 226265 174960
Generated 55 of 55 mixed pair_coeff terms from arithmetic mixing rule
Neighbor list info ...
  update: every = 1 steps, delay = 0 steps, check = yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 11
  ghost atom cutoff = 11
  binsize = 5.5, bins = 44 45 45
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut/coul/long, perpetual
      attributes: half, newton on
      pair build: half/bin/newton/tri
      stencil: half/bin/3d/tri
      bin: standard
Setting up Verlet run ...
  Unit style    : 

In [None]:
energy_dir = Path('energy_tables')
energy_dir.mkdir(exist_ok=True)
edfs = {}

for (label, energies_dict) in energies.items():
    energy_path = assemble_path(energy_dir, label, postfix=f'{MOL_MASTER_DIR.stem}_{lattice_size}', extension='csv')
    edf = pd.DataFrame.from_dict(energies_dict, orient='index')
    edf.to_csv(energy_path)
    edfs[label] = edf

# Comparing energies

## Loading energy tables and comparing contributions

In [None]:
from functools import reduce
from operator import add

pd.options.display.float_format = '{:.4f}'.format # disable scientific notation

@dataclass
class TableFormats:
    sum_terms : dict[str, list[str]]
    del_terms : list[str]

formats = {
    'OpenMM' : TableFormats(
        sum_terms = {
            'vdW' : ['vdW', 'vdW 1-4'],
            'Coulomb' : ['Electrostatic', 'Electrostatic 1-4']
        },
        del_terms = ['Kinetic']
    ),
    'LAMMPS' : TableFormats(
        sum_terms = {
            'vdW' : ['vdW', 'Dispersion'],
            'Dihedral' : ['Proper Torsion', 'Improper Torsion'],
            'Coulomb'  : ['Coulomb Short', 'Coulomb Long']
        },
        del_terms = ['Nonbonded']
    ),
}

# apply reformatting to respective tables
edfs_fmt = {}
for platform, energy_df in edfs.items():
    fmt = formats[platform]

    # combine selected terms
    new_energy_df = energy_df.copy(deep=True) # leave original unmodified
    for combined_contrib, contribs in fmt.sum_terms.items():
        new_term = reduce(add, (new_energy_df[contrib] for contrib in contribs)) # merge contributions into a single new named term
        new_energy_df = new_energy_df.drop(columns=contribs, inplace=False) # clear contributions
        new_energy_df[combined_contrib] = new_term # done after drop to ensure name clashes don't result in extra deletion
    
    # delete redundant terms
    for del_contrib in fmt.del_terms:
        new_energy_df.drop(columns=[del_contrib], inplace=True) # clear contributions
    edfs_fmt[platform] = new_energy_df

In [None]:
import matplotlib.pyplot as plt
from polymerist.graphics.plotutils import presize_subplots


col_order = ['Bond', 'Angle', 'Dihedral', 'vdW', 'Coulomb', 'Potential']
max_err_perc : float = None
# max_err_perc : float = 100.0

energy_perc_rel_err = ((edfs_fmt['OpenMM'] - edfs_fmt['LAMMPS']) / edfs_fmt['LAMMPS']).abs() * 100
if max_err_perc:
    err_in_tol = (energy_perc_rel_err.abs() < max_err_perc).all(axis=1)
    energy_perc_rel_err = energy_perc_rel_err[err_in_tol]

fig, ax = presize_subplots(nrows=2, ncols=3)
for col, axis in zip(col_order, ax.flatten()):
    heights, bins, patches = axis.hist(energy_perc_rel_err[col], bins=50)
    axis.set_ylabel(f'{col} energy (rel. % error)')
    axis.tick_params(axis='x', rotation=-20)
    
plt.show()
display(energy_perc_rel_err[col_order])


In [None]:
diff_path = assemble_path(energy_dir, 'Energy_rel_err_table', postfix=f'{MOL_MASTER_DIR.stem}_{lattice_size}', extension='csv')
energy_perc_rel_err.to_csv(diff_path)

energy_fig_path = assemble_path(energy_dir, 'Energy_rel_err_graphs', postfix=f'{MOL_MASTER_DIR.stem}_{lattice_size}', extension='png')
fig.savefig(energy_fig_path)

# Getting just the systems which have density data available

In [None]:
def get_polymer_name(row : pd.Series) -> str:
    return f'poly({row["IUPAC_name_monomer_0"]}-co-{row["IUPAC_name_monomer_1"]})'.lower()

p = Path('monomer_data_processed/monomer_data_MASTER.csv')
polyid_df = pd.read_csv(p, index_col=0)

polyid_df['polymer_name'] = polyid_df.apply(get_polymer_name, axis=1) # generate column of polymer names from monomer names
polyid_df.set_index(['rxn_name', 'polymer_name'], inplace=True) # reindex by mechanism and molecule name
polyid_df = polyid_df[polyid_df['Density'].notnull()] # filter by density values

common_index = polyid_df.index.intersection(energy_perc_rel_err.index)
polyid_df.loc[common_index]['Density']

In [None]:
has_density = energy_perc_rel_err.loc[common_index]
has_density['Density'] = polyid_df.loc[common_index]['Density']
has_density

# Benchmarking

In [None]:
from polymerist.graphics import plotutils

plot_pairs = (
    ('n_atoms_in_topology', 'topology_time'), # TOSELF : can't 
    ('n_atoms_in_topology', 'interchange_time'),
)


fig, ax = plotutils.presize_subplots(nrows=1, ncols=len(plot_pairs))
for axis, (x_var, y_var) in zip(ax, plot_pairs):
    axis.scatter(mol_file_frame[x_var], mol_file_frame[y_var])
    axis.set_xlabel(x_var)
    axis.set_ylabel(y_var)