In [1]:
import cProfile
import json

from pathlib import Path
import pandas as pd

# Loading polymer and profiling data

In [2]:
import pstats
# from openff.interchange.interop.internal import lammps


MOL_MASTER_DIR = Path('polymer_benchmark_fastlmp')
TAGS = (
    'mechanism',
    'polymer_name',
    'oligomer_size',
    'lattice_size',
    'MD_platform'
)
time_attr = 'cumtime'

records = []
for path in MOL_MASTER_DIR.glob('**/*.txt'):
    mol_dir = path.parents[1]
    mol_info = {
        tag : value
            for tag, value in zip(TAGS, path.relative_to(MOL_MASTER_DIR).parts)
    }
    mol_info['profile_path'] = path

    stats = pstats.Stats(str(path))
    stats = stats.sort_stats(pstats.SortKey.CUMULATIVE)
    stat_prof = stats.get_stats_profile()

    lmp_writer_times = {
        fn_name : getattr(fn_profile, time_attr)
            for fn_name, fn_profile in stat_prof.func_profiles.items()
                if ('_write' in fn_name) or ('_process' in fn_name)# and (fn_profile.file_name == lammps.__file__)
    }
    mol_info.update(lmp_writer_times)
    
    record_path = mol_dir / f'{mol_info["lattice_size"]}_{mol_info["oligomer_size"]}_{mol_info["polymer_name"]}_RECORD.json'
    if record_path.exists:
        with record_path.open('r') as record_file:
            mol_info.update(json.load(record_file))

    records.append(mol_info)

pframe = pd.DataFrame.from_records(records)
pframe.set_index(['mechanism', 'polymer_name'], inplace=True)
# for str_path_col in ('profile_path', 'topology_path', 'interchange_path', 'directory'):
#     pframe[str_path_col] = pframe[str_path_col].map(Path) # de-stringify file Paths

print(len(pframe))
pframe

## Plot runtimes for various Interchange MD write components

In [None]:
import matplotlib.pyplot as plt
from collections import defaultdict

dim = 6
n = pframe['n_atoms_in_topology']
benchdir = Path('benchmark_data')
benchdir.mkdir(exist_ok=True)

lmp_writer_names = []
lmp_writer_names_no_propers = []
lmp_writer_names_coeffs = []

lmp_str = 'LAMMPS_quickerlammps'
omm_str = 'OpenMM'
fn_dict = defaultdict(lambda : defaultdict(list))
for colname in pframe.columns:
    if '_write' in colname:
        fn_dict[lmp_str]['all writers'].append(colname)
        if 'propers' not in colname:
            fn_dict[lmp_str]['without propers'].append(colname)
        if 'coeffs' in colname:
            fn_dict[lmp_str]['coeffs only'].append(colname)

    if '_process' in colname:
        fn_dict[omm_str]['all writers'].append(colname)
        if ('proper' not in colname) and ('nonbonded' not in colname):
            fn_dict[omm_str]['without propers+nonbondeds'].append(colname)
        if 'forces' not in colname:
            fn_dict[omm_str]['no forces'].append(colname)

for platform, fn_name_map in fn_dict.items():
    for header, fn_names in fn_name_map.items():
        fig, ax = plt.subplots(1, figsize=(dim, dim))
        for colname in fn_names:
            ax.scatter(n, pframe[colname], label=' ' + colname, marker='o')
                    
        ax.set_xlabel('n_atoms_in_topology')
        ax.set_ylabel('Cumulative time (sec)')
        ax.set_title(f'Profile times ({platform}, {header})')

        leg = ax.legend(loc='best')
        fig.savefig(benchdir / f'Cumultimes_{platform}_{header.replace(" ", "_")}.png')

# Honing in on _write_propers hangups

In [None]:
l = 3
# plat = 'OpenMM'
plat = 'LAMMPS'
spec = pframe[(pframe['lattice_size'] == f'{l}x{l}x{l}') & (pframe['MD_platform'] == plat)]

In [None]:
row = spec.iloc[1]
stats = pstats.Stats(str(row.profile_path))
stats = stats.sort_stats(pstats.SortKey.CUMULATIVE)
stat_prof = stats.get_stats_profile()

omm_frame = pd.DataFrame.from_dict(
    {
        # fn_name : getattr(fn_profile, time_attr)
        fn_name : fn_profile.__dict__
            for fn_name, fn_profile in stat_prof.func_profiles.items()
                # if '_write' in fn_name
                # if '_process' in fn_name
    },
    orient='index'
)
omm_frame.to_csv(f'LAMMPS_profile_{spec.index[1][0]}_{row.n_atoms_in_topology}_atoms.csv')

In [None]:
omm_frame

In [None]:
stats.print_stats()

In [None]:
stat_prof.func_profiles

In [None]:
import pickle
from openff.interchange.interop.internal import lammps

row = spec.iloc[-1]
with Path(row.interchange_path).open('rb') as file:
    interchange = pickle.load(file)

outpath = Path('dummy.lmp')
profiler = cProfile.Profile()
with outpath.open('w') as outfile:
    ret = profiler.runcall(lammps._write_propers, outfile, interchange)

profiler.print_stats(pstats.SortKey.CUMULATIVE)

# Plotting Interchange output times for various MD platforms

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

dim = 8
fig, ax = plt.subplots(figsize=(dim, dim))
inset_ax = inset_axes(ax, width='30%', height='30%', loc='upper left', bbox_to_anchor=(0.1,0.15,0.8,0.8), bbox_transform=ax.transAxes)

ax.scatter(pframe['n_atoms_in_topology'], pframe['lammps_time'], color='r', marker='o', label='to_lammps')
ax.scatter(pframe['n_atoms_in_topology'], pframe['openmm_time'], color='b', marker='o', label='to_openmm')
ax.set_xlabel('# atoms in topology')
ax.set_ylabel('Interchange output time (sec)')

small = pframe[pframe['lattice_size'] == '1x1x1']
inset_ax.set_title('single oligomers')
inset_ax.scatter(small['n_atoms_in_topology'], small['lammps_time'], color='r', marker='.', label='to_lammps')
inset_ax.scatter(small['n_atoms_in_topology'], small['openmm_time'], color='b', marker='.', label='to_openmm')
inset_ax.set_xlabel('# atoms')
inset_ax.set_ylabel('time (sec)')
ax.legend(loc='upper center')

fig.savefig('interchange_MD_benchmark.png')

### Selecting smallest, most average, and largest oligomers as benchmarking points ONLY DO THIS WITH POLYMER_UPDATE!!

In [None]:
def output_olig_samples() -> None:
    olig_by_size = groups.get_group('1x1x1').sort_values('n_atoms_in_topology')
    sizes = olig_by_size['n_atoms_in_topology'].to_numpy()

    edges = [0, -1]
    size_tol : int = 1

    avg_size = round(sizes.mean())
    avg_idx_candidates = ((np.abs(olig_by_size['n_atoms_in_topology'] - avg_size) <= size_tol)                # midpoint oligomer should be within a tolerance of the true average size...
        & (~olig_by_size.index.get_level_values(0).isin(olig_by_size.iloc[edges].index.get_level_values(0)))) # ...and not belong to the sae chemical classes as the endpoints
    midpt_idx = avg_idx_candidates.argmax()
    edges.insert(1, midpt_idx)
    print(edges)

    subsampled_oligs = olig_by_size.iloc[edges]
    subsampled_oligs.to_csv('oligomers_for_benchmark.csv')

output_olig_samples()