Skip to content

Commit

Permalink
Add functionality and tests for pymatgen.io.openff.
Browse files Browse the repository at this point in the history
  • Loading branch information
orionarcher committed Mar 31, 2024
1 parent 079c354 commit f52a8df
Show file tree
Hide file tree
Showing 2 changed files with 498 additions and 0 deletions.
318 changes: 318 additions & 0 deletions pymatgen/io/openff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
"""Utility functions for classical md subpackage."""

from __future__ import annotations

from pathlib import Path

import numpy as np
import openff.toolkit as tk
from openff.units import Quantity, unit

import pymatgen
from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender
from pymatgen.core import Element, Molecule


def molgraph_to_openff_mol(molgraph: MoleculeGraph) -> tk.Molecule:
"""Convert a Pymatgen MoleculeGraph to an OpenFF Molecule.
Maps partial charges, formal charges, and aromaticity from site properties to atoms.
Maps bond order and bond aromaticity from edge weights and edge properties to bonds.
Parameters
----------
molgraph : MoleculeGraph
The Pymatgen MoleculeGraph to be converted.
Returns
-------
tk.Molecule
The converted OpenFF Molecule.
"""
# create empty openff_mol and prepare a periodic table
p_table = {str(el): el.Z for el in Element}
openff_mol = tk.Molecule()

# set atom properties
partial_charges = []
# TODO: should assert that there is only one molecule
for i_node in range(len(molgraph.graph.nodes)):
node = molgraph.graph.nodes[i_node]
atomic_number = node.get("atomic_number") or p_table[molgraph.molecule[i_node].species_string]

# put formal charge on first atom if there is none present
formal_charge = node.get("formal_charge")
if formal_charge is None:
formal_charge = (i_node == 0) * molgraph.molecule.charge * unit.elementary_charge

# assume not aromatic if no info present
is_aromatic = node.get("is_aromatic") or False

openff_mol.add_atom(atomic_number, formal_charge, is_aromatic=is_aromatic)

# add to partial charge array
partial_charge = node.get("partial_charge")
if isinstance(partial_charge, Quantity):
partial_charge = partial_charge.magnitude
partial_charges.append(partial_charge)

charge_array = np.array(partial_charges)
if np.not_equal(charge_array, None).all():
openff_mol.partial_charges = charge_array * unit.elementary_charge

# set edge properties, default to single bond and assume not aromatic
for i_node, j, bond_data in molgraph.graph.edges(data=True):
bond_order = bond_data.get("bond_order") or 1
is_aromatic = bond_data.get("is_aromatic") or False
openff_mol.add_bond(i_node, j, bond_order, is_aromatic=is_aromatic)

openff_mol.add_conformer(molgraph.molecule.cart_coords * unit.angstrom)
return openff_mol


def molgraph_from_openff_mol(molecule: tk.Molecule):
"""
This is designed to closely mirror the graph structure generated by tk.Molecule.to_networkx
"""
molgraph = MoleculeGraph.with_empty_graph(
Molecule([], []),
name="none",
)
p_table = {el.Z: str(el) for el in Element}
total_charge = 0
cum_atoms = 0

coords = molecule.conformers[0].magnitude if molecule.conformers is not None else np.zeros((molecule.n_atoms, 3))
for j, atom in enumerate(molecule.atoms):
molgraph.insert_node(
cum_atoms + j,
p_table[atom.atomic_number],
coords[j, :],
)
molgraph.graph.nodes[cum_atoms + j]["atomic_number"] = atom.atomic_number
molgraph.graph.nodes[cum_atoms + j]["is_aromatic"] = atom.is_aromatic
molgraph.graph.nodes[cum_atoms + j]["stereochemistry"] = atom.stereochemistry
# set partial charge as a pure float
partial_charge = None if atom.partial_charge is None else atom.partial_charge.magnitude
molgraph.graph.nodes[cum_atoms + j]["partial_charge"] = partial_charge
# set formal charge as a pure float
formal_charge = atom.formal_charge.magnitude # type: ignore
molgraph.graph.nodes[cum_atoms + j]["formal_charge"] = formal_charge
total_charge += formal_charge
for bond in molecule.bonds:
molgraph.graph.add_edge(
cum_atoms + bond.atom1_index,
cum_atoms + bond.atom2_index,
bond_order=bond.bond_order,
is_aromatic=bond.is_aromatic,
stereochemistry=bond.stereochemistry,
)
# formal_charge += molecule.total_charge
cum_atoms += molecule.n_atoms
molgraph.molecule.set_charge_and_spin(charge=total_charge)
return molgraph


def get_atom_map(inferred_mol: tk.Molecule, openff_mol: tk.Molecule) -> tuple[bool, dict[int, int]]:
"""Compute an atom mapping between two OpenFF Molecules.
Attempts to find an isomorphism between the molecules, considering various matching
criteria such as formal charges, stereochemistry, and bond orders. Returns the atom
mapping if an isomorphism is found, otherwise returns an empty mapping.
Parameters
----------
inferred_mol : tk.Molecule
The first OpenFF Molecule.
openff_mol : tk.Molecule
The second OpenFF Molecule.
Returns
-------
Tuple[bool, Dict[int, int]]
A tuple containing a boolean indicating if an isomorphism was found and a
dictionary representing the atom mapping.
"""
# do not apply formal charge restrictions
kwargs = dict(
return_atom_map=True,
formal_charge_matching=False,
)
isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs)
if isomorphic:
return True, atom_map
# relax stereochemistry restrictions
kwargs["atom_stereochemistry_matching"] = False
kwargs["bond_stereochemistry_matching"] = False
isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs)
if isomorphic:
return True, atom_map
# relax bond order restrictions
kwargs["bond_order_matching"] = False
isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs)
if isomorphic:
return True, atom_map
return False, {}


def infer_openff_mol(
mol_geometry: pymatgen.core.Molecule,
) -> tk.Molecule:
"""Infer an OpenFF Molecule from a Pymatgen Molecule.
Constructs a MoleculeGraph from the Pymatgen Molecule using the OpenBabelNN local
environment strategy and extends metal edges. Converts the resulting MoleculeGraph
to an OpenFF Molecule using molgraph_to_openff_mol.
Parameters
----------
mol_geometry : pymatgen.core.Molecule
The Pymatgen Molecule to infer from.
Returns
-------
tk.Molecule
The inferred OpenFF Molecule.
"""
molgraph = MoleculeGraph.with_local_env_strategy(mol_geometry, OpenBabelNN())
molgraph = metal_edge_extender(molgraph)
return molgraph_to_openff_mol(molgraph)


def add_conformer(
openff_mol: tk.Molecule, geometry: pymatgen.core.Molecule | None
) -> tuple[tk.Molecule, dict[int, int]]:
"""Add conformers to an OpenFF Molecule based on the provided geometry.
If a geometry is provided, infers an OpenFF Molecule from it,
finds an atom mapping between the inferred molecule and the
input molecule, and adds the conformer coordinates to the input
molecule. If no geometry is provided, generates a single conformer.
Parameters
----------
openff_mol : tk.Molecule
The OpenFF Molecule to add conformers to.
geometry : Union[pymatgen.core.Molecule, None]
The geometry to use for adding conformers.
Returns
-------
Tuple[tk.Molecule, Dict[int, int]]
A tuple containing the updated OpenFF Molecule with added conformers
and a dictionary representing the atom mapping.
"""
# TODO: test this
if geometry:
# for geometry in geometries:
inferred_mol = infer_openff_mol(geometry)
is_isomorphic, atom_map = get_atom_map(inferred_mol, openff_mol)
if not is_isomorphic:
raise ValueError(
f"An isomorphism cannot be found between smile {openff_mol.to_smiles()}"
f"and the provided molecule {geometry}."
)
new_mol = pymatgen.core.Molecule.from_sites([geometry.sites[i] for i in atom_map.values()])
openff_mol.add_conformer(new_mol.cart_coords * unit.angstrom)
else:
atom_map = {i: i for i in range(openff_mol.n_atoms)}
openff_mol.generate_conformers(n_conformers=1)
return openff_mol, atom_map


def assign_partial_charges(
openff_mol: tk.Molecule,
atom_map: dict[int, int],
charge_method: str,
partial_charges: None | list[float],
) -> tk.Molecule:
"""Assign partial charges to an OpenFF Molecule.
If partial charges are provided, assigns them to the molecule
based on the atom mapping. If the molecule has only one atom,
assigns the total charge as the partial charge. Otherwise,
assigns partial charges using the specified charge method.
Parameters
----------
openff_mol : tk.Molecule
The OpenFF Molecule to assign partial charges to.
atom_map : Dict[int, int]
A dictionary representing the atom mapping.
charge_method : str
The charge method to use if partial charges are
not provided.
partial_charges : Union[None, List[float]]
A list of partial charges to assign or None to use the charge method.
Returns
-------
tk.Molecule
The OpenFF Molecule with assigned partial charges.
"""
# TODO: test this
# assign partial charges
if partial_charges is not None:
partial_charges = np.array(partial_charges)
chargs = partial_charges[list(atom_map.values())] # type: ignore[index, call-overload]
openff_mol.partial_charges = chargs * unit.elementary_charge
elif openff_mol.n_atoms == 1:
openff_mol.partial_charges = np.array([openff_mol.total_charge.magnitude]) * unit.elementary_charge
else:
openff_mol.assign_partial_charges(charge_method)
return openff_mol


def create_openff_mol(
smile: str,
geometry: pymatgen.core.Molecule | str | Path | None = None,
charge_scaling: float = 1,
partial_charges: list[float] | None = None,
backup_charge_method: str = "am1bcc",
) -> tk.Molecule:
"""Create an OpenFF Molecule from a SMILES string and optional geometry.
Constructs an OpenFF Molecule from the provided SMILES
string, adds conformers based on the provided geometry (if
any), assigns partial charges using the specified method
or provided partial charges, and applies charge scaling.
Parameters
----------
smile : str
The SMILES string of the molecule.
geometry : Union[pymatgen.core.Molecule, str, Path, None], optional
The geometry to use for adding conformers. Can be a Pymatgen Molecule,
file path, or None.
charge_scaling : float, optional
The scaling factor for partial charges. Default is 1.
partial_charges : Union[List[float], None], optional
A list of partial charges to assign, or None to use the charge method.
backup_charge_method : str, optional
The backup charge method to use if partial charges are not provided. Default
is "am1bcc".
Returns
-------
tk.Molecule
The created OpenFF Molecule.
"""
if isinstance(geometry, (str, Path)):
geometry = pymatgen.core.Molecule.from_file(str(geometry))

if partial_charges is not None:
if geometry is None:
raise ValueError("geometries must be set if partial_charges is set")
if len(partial_charges) != len(geometry):
raise ValueError("partial charges must have same length & order as geometry")

openff_mol = tk.Molecule.from_smiles(smile, allow_undefined_stereo=True)

# add conformer
openff_mol, atom_map = add_conformer(openff_mol, geometry)
# assign partial charges
openff_mol = assign_partial_charges(openff_mol, atom_map, backup_charge_method, partial_charges)
openff_mol.partial_charges *= charge_scaling

return openff_mol

0 comments on commit f52a8df

Please sign in to comment.