In [1]:
import copy
import pickle
import re
from typing import List
import tqdm

import numpy as np

In [2]:
import openff.interchange
import openff.toolkit

from openff.interchange.components.interchange import Interchange
from openff.toolkit.topology import Topology
from openff.toolkit.typing.engines.smirnoff import ForceField

import openmm
import pandas as pd



In [3]:
print(openff.interchange.__version__)

v0.1.4


In [4]:
print(openff.toolkit.__version__)

0.10.1+102.g57a442b5


In [13]:
ichange_name = f"interchange-{openff.interchange.__version__}"
toolkit_name = f"toolkit-{openff.toolkit.__version__}"
version_name = f"{toolkit_name}_{ichange_name}"
version_name

'toolkit-0.10.1+102.g57a442b5_interchange-v0.1.4'

In [5]:
from interchange_regression_utilities.models import (
    ComparisonSettings,
    ExpectedValueChange,
    model_from_file,
    TopologyDefinition,
)

In [6]:
models = model_from_file(List[TopologyDefinition], "input-topologies.json")

In [7]:
topologies = []
failed_smiles = []
smiles_pattern = "SMILES '([A-Za-z0-9=\(\)\[\]]+)'"
for model in tqdm.tqdm(models):
    try:
        topologies.append(model.to_topology())
    except ValueError as e:
        if "No registered toolkits can provide the capability" in str(e):
            smiles = re.search(smiles_pattern, str(e)).group(1)
            failed_smiles.append(smiles)

Problematic atoms are:
Atom atomic num: 7, name: , idx: 4, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 3, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 5, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 15, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 4, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 3, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 5, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 15, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 7, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 6, aromatic: True, chiral: False
bon

In [8]:
len(failed_smiles)

0

In [14]:
topology_file = f"openff_topologies_{version_name}.pkl"
with open(topology_file, "wb") as f:
    pickle.dump(topologies, f)
# with open(topology_file, "rb") as f:
#     topologies = pickle.load(f)

In [15]:
force_field = ForceField("openff-2.0.0.offxml")

In [19]:
# interchanges = [
#     Interchange.from_smirnoff(force_field, topology)
#     for topology in tqdm.tqdm(topologies)
# ]

# interchange_systems = [
#     ichange.to_openmm(combine_nonbonded_forces=True)
#     for ichange in interchanges
# ]

interchange_systems = [
    force_field.create_openmm_system(topology, use_interchange=True)
    for topology in tqdm.tqdm(topologies)
]



  0%|                                                    | 0/51 [00:00<?, ?it/s][A[A

ImportError: cannot import name 'Interchange' from 'openff.interchange' (/Users/lily/pydev/interchange/openff/interchange/__init__.py)

In [17]:
toolkit_systems = [
    force_field.create_openmm_system(topology, use_interchange=False)
    for topology in tqdm.tqdm(topologies)
]





  2%|▊                                           | 1/51 [00:05<04:28,  5.38s/it][A[A

  0%|                                                    | 0/51 [03:22<?, ?it/s][A[A


  6%|██▌                                         | 3/51 [01:17<23:37, 29.54s/it][A[A

  8%|███▍                                        | 4/51 [01:38<20:42, 26.43s/it][A[A

 10%|████▎                                       | 5/51 [01:49<15:54, 20.74s/it][A[A

 12%|█████▏                                      | 6/51 [01:51<10:50, 14.46s/it][A[A

 14%|██████                                      | 7/51 [02:16<13:04, 17.84s/it][A[A

 16%|██████▉                                     | 8/51 [02:19<09:24, 13.14s/it][A[A

 18%|███████▊                                    | 9/51 [02:44<11:45, 16.79s/it][A[A

 20%|████████▍                                  | 10/51 [03:04<12:11, 17.85s/it][A[A

 22%|█████████▎                                 | 11/51 [03:24<12:24, 18.60s/it][A[A

 24%|██████████            



100%|███████████████████████████████████████████| 51/51 [24:09<00:00, 28.42s/it][A[A


In [20]:
def get_charges(
    system,
    model_index: int = 0,
    version: str = "interchange",
) -> pd.DataFrame:
    force = [
        force for force in system.getForces()
        if "NonbondedForce" in str(type(force))
    ][0]
    
    name = models[model_index].name
    data = {"particle_index": [], "charge": []}
    for i in range(force.getNumParticles()):
        charge, *_ = force.getParticleParameters(i)
        data["particle_index"].append(i)
        data["charge"].append(charge._value)
    
    df = pd.DataFrame(data)
    df["name"] = name
    df["version"] = version
    return df

In [22]:
all_charge_dfs = []

for i, (toolkit) in tqdm.tqdm(
    list(enumerate(toolkit_systems))
):
    model = models[i]
    
    name = model.name
#     ixml = openmm.XmlSerializer.serialize(ichange)
    txml = openmm.XmlSerializer.serialize(toolkit)
    
#     iname = f"from-interchange_{version_name}"
    tname = f"from-toolkit_{version_name}"
    
#     ifile = f"xmls/{name}-{iname}.xml"
    tfile = f"xmls/{name}-{tname}.xml"
    
    
#     with open(ifile, "w") as f:
#         f.write(ixml)
    with open(tfile, "w") as f:
        f.write(txml)
        
#     idf = get_charges(ichange, model_index=i, version=iname)
    tdf = get_charges(toolkit, model_index=i, version=tname)
    
    model_df = copy.deepcopy(tdf)
    model_df["version"] = f"original_topology_{version_name}"
    model_df["charge"] = [
        charge
        for molecule in topologies[i].topology_molecules
        for charge in np.array(molecule.partial_charges)
    ]
    
    all_charge_dfs.extend([tdf, model_df])

charge_df = pd.concat(all_charge_dfs)




  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(mol

  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecu

  for charge in np.array(molecule.partial_charges)



  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
  for charge in np.array(molecule.partial_charges)
100%|██████████████████████████████████████████| 51/51 [00:00<00:00, 221.64it/s]


In [23]:
charge_df.version.unique()

array(['from-toolkit_toolkit-0.10.1+102.g57a442b5_interchange-v0.1.4',
       'original_topology_toolkit-0.10.1+102.g57a442b5_interchange-v0.1.4'],
      dtype=object)

In [24]:
charge_df.to_csv(f"charges_{version_name}.csv")