In [30]:
from pathlib import Path
from tqdm import tqdm
from natsort import natsorted
import numpy as np
import io
import os

from openff.toolkit.topology import Molecule
from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.interchange import Interchange
from openff.interchange.drivers import get_openmm_energies
from openff.units.openmm import to_openmm

from openmm.unit import *

import multiprocessing
from multiprocessing import Pool, cpu_count

from ase.io import read as aseread
from ase.io import write as asewrite
from openbabel import pybel 

 Sage FF could be used for small molecules when interfaced with OpenFF   
 See Sage generator for more details


In [31]:
from openff.toolkit import ForceField
from openff.interchange import Interchange
from openff.interchange.drivers.openmm import get_openmm_energies

def get_sage_E_F(molecule, forcefield_xml, charge_scheme):
    """
    Process a molecule to compute its potential energy and atomic forces 
    using OpenMM via the OpenFF Interchange workflow.

    Args:
        molecule (Molecule): The OpenFF Molecule object.
        forcefield_xml (str): The path to the force field XML file.
        charge_scheme (str): The charge scheme to use (e.g., 'gasteiger').

    Returns:
        tuple: A tuple containing:
            float: The potential energy of the molecule in kcal/mol.
            np.ndarray: The atomic forces in kcal/mol/angstrom.
    """
    # Assign charges and create force field and topology.
    molecule.assign_partial_charges(charge_scheme)
    force_field = ForceField(forcefield_xml)
    topology = molecule.to_topology()

    # Build an Interchange object from the SMIRNOFF force field.
    interchange = Interchange.from_smirnoff(
        force_field=force_field,
        topology=topology,
        charge_from_molecules=[molecule]
    )
    interchange.positions = molecule.conformers[0]

    # Compute the energy using OpenFF's OpenMM energy driver.
    energies = get_openmm_energies(interchange)
    energy = energies.total_energy.m_as("kilocalorie/mole")

    # Convert the Interchange object to an OpenMM system.
    openmm_system = interchange.to_openmm(combine_nonbonded_forces=True)

    # Convert the OpenFF topology into an OpenMM Topology.
    omm_topology = interchange.topology.to_openmm()
    positions_in_nm = to_openmm(molecule.conformers[0])

    # Set up a minimal OpenMM simulation to obtain forces.
    from openmm import Platform, VerletIntegrator, unit
    from openmm.app import Simulation

    platform = Platform.getPlatformByName("Reference")
    integrator = VerletIntegrator(1.0 * unit.femtosecond)
    simulation = Simulation(omm_topology, openmm_system, integrator, platform)
    simulation.context.setPositions(positions_in_nm)

    # Retrieve the state including forces.
    state = simulation.context.getState(getEnergy=True, getForces=True)
    forces = state.getForces(asNumpy=True).value_in_unit(kilocalorie_per_mole / angstrom)

    # Convert forces to kcal/mol/angstrom.
    return energy, forces


In [32]:
def worker(sdf_file):
    molecule = Molecule.from_file(sdf_file, allow_undefined_stereo=True, file_format='sdf')
    energy = get_sage_E_F(molecule, 'openff-2.0.0.offxml', 'mmff94')
    return sdf_file, energy

def parallel_process(sdf_files, N):
    with Pool(processes=N) as pool: # set accordingly to #cores on your machine
        results = list(tqdm(pool.imap(worker, sdf_files), total=len(sdf_files)))
    return results

In [33]:
def create_sdf_batch(atoms_list, output_dir=None, bond_order_pair=None, new_bond_order=2):
    """
    Convert a list of ASE Atoms objects into a list of in-memory SDF file-like objects.
    Optionally save each SDF string to a file in the specified output directory.
    Additionally, if a bond_order_pair is provided, modify the bond order between these
    atoms using OpenBabel before generating the SDF.
    
    With simple command tool conversion or on-the-fly generation withoit fixing N=N bond, Sage potential from OpenFF produces:
    >> The OpenFF Toolkit does not currently support parsing molecules with S- and P-block radicals. 
    >> Found 1 radical electrons on molecule [H]c1c([H])c([H])c([N][N]c2c([H])c([H])c([H])c([H])c2[H])c([H])c1[H].

    text
    On-the-fly conversion saves processing time when transferring data. Oftentimes, it is
    unnecessary to keep SDF files on disk.

    Args:
        atoms_list (list): List of ASE Atoms objects.
        output_dir (str, optional): Directory in which to save SDF files.
        bond_order_pair (tuple, optional): A tuple (atom1_index, atom2_index) specifying the
            pair of atoms whose bond order will be modified. Use 1-indexed positions (as required
            by OpenBabel's OBMol.GetAtom method). If None, no bond order modifications are made.
        new_bond_order (int, optional): The new bond order to set for the specified pair (default is 2).

    Returns:
        list: A list of io.BytesIO objects containing SDF data for each molecule.
    """
    printed_message = False 
    sdf_list = []

# Create the output directory if requested.
    if output_dir is not None:
        os.makedirs(output_dir, exist_ok=True)

    for i, atoms in enumerate(atoms_list):
    # Write the ASE Atoms object to an in-memory XYZ string.
        xyz_buffer = io.StringIO()
        asewrite(xyz_buffer, atoms, format="xyz")
        xyz_str = xyz_buffer.getvalue()

        # Convert the XYZ string to a molecule using Pybel.
        mol = pybel.readstring("xyz", xyz_str)

        # If a bond_order_pair is provided, fix the bond order.
        if bond_order_pair is not None:
            # Get the underlying OBMol structure.
            obmol = mol.OBMol
            obmol.ConnectTheDots()
            obmol.PerceiveBondOrders()

            # Retrieve the atoms using the provided (1-indexed) indices.
            atom1 = obmol.GetAtom(bond_order_pair[0])
            atom2 = obmol.GetAtom(bond_order_pair[1])
            bond = obmol.GetBond(atom1, atom2)
            bond = obmol.GetBond(obmol.GetAtom(bond_order_pair[0]), obmol.GetAtom(bond_order_pair[1]))
    
            if bond is None:
                if not printed_message:
                    print(f"No bond found between atoms {bond_order_pair[0]} and {bond_order_pair[1]} in molecule {i}.")
                    printed_message = True  # Set the flag so that this message is printed only once.
            else:
                bond.SetBondOrder(new_bond_order)
                if not printed_message:
                    print(f"Molecule {i}: Set bond order between atoms {bond_order_pair[0]} and {bond_order_pair[1]} to {new_bond_order}.\n")
                    print('This message will be printed once, but all files will be modified.')
                    printed_message = True  # Again, only print once.

        # Write the molecule to an SDF string.
        sdf_str = mol.write("sdf")
        sdf_bytes = sdf_str.encode("utf-8")
        sdf_buffer = io.BytesIO(sdf_bytes)
        sdf_list.append(sdf_buffer)

        # Optionally, save the SDF file to disk.
        if output_dir is not None:
            filename = os.path.join(output_dir, f"molecule_{i}.sdf")
            with open(filename, "w") as f:
                f.write(sdf_str)

    return sdf_list

In [34]:
def process_batch(atoms_list, N=32, bond_order_pair=None, new_bond_order=2):
    """
    Process a batch of atoms to compute shifted energies and forces.
    Parameters:
    - atoms_list: List of atoms to process.
    - N: Number of parallel processes.
    - bond_order_pair: Optional parameter for bond order.
    - new_bond_order: Optional parameter for new bond order.
    """
    # Create SDF files from the list of atoms    
    sdf_files = create_sdf_batch(atoms_list, bond_order_pair=bond_order_pair, new_bond_order=new_bond_order)
    results = parallel_process(sdf_files, N=N)
    
    E = [energy[0] for _, energy in results]   #unpack energies from list of tuples
    E = np.array(E)
    E_shifted = E - np.mean(E)
    forces = np.array([energy[1] for _, energy in results]  )

    return E_shifted, forces

### Get single point energies for alanine dipeptide


In [35]:
ad_path = Path('data/AD/thermal_MD_10k/DFT-logs')
ad_atoms_list = [aseread(x) for x in natsorted(ad_path.iterdir()) if x.is_file() and x.suffix == '.log']

KeyboardInterrupt: 

In [None]:
ad_results = process_batch(ad_atoms_list, N=48)

100%|██████████| 10000/10000 [02:36<00:00, 64.05it/s]


In [None]:
np.save('data/AD/thermal_MD_10k/ad_E_sage_kcal_mol.npy', ad_results[0])
np.save('data/AD/thermal_MD_10k/ad_F_sage_kcal_mol_A.npy', ad_results[1])

### Get single point energies for azobenzene


In [None]:
az_path = Path('data/AZ/thermal_MD_10k/DFT-logs')
az_atoms_list = [aseread(x) for x in natsorted(az_path.iterdir()) if x.is_file() and x.suffix == '.log']
az_results = process_batch(az_atoms_list, N=48, bond_order_pair=(12, 13), new_bond_order=2)

Molecule 0: Set bond order between atoms 12 and 13 to 2.

This message will be printed once, but all files will be modified.


100%|██████████| 10000/10000 [02:37<00:00, 63.45it/s]


In [None]:
np.save('data/AZ/thermal_MD_10k/az_E_sage_kcal_mol.npy', az_results[0])
np.save('data/AZ/thermal_MD_10k/az_F_sage_kcal_mol_A.npy', az_results[1])

# Azobenzene ISOMERIZATION trajectories 

In [None]:
cs_path = Path('data/AZ/cs-inversion/AZ_cs-DFT_inversion_path.xyz')
cs_atoms_list = aseread(cs_path, index=":")
cs_iso_results = process_batch(cs_atoms_list, N=48, bond_order_pair=(12, 13), new_bond_order=2)


Molecule 0: Set bond order between atoms 12 and 13 to 2.

This message will be printed once, but all files will be modified.


100%|██████████| 179/179 [00:03<00:00, 45.36it/s]


In [None]:
np.save('data/AZ/cs-inversion/cs_inv_E_sage_kcal_mol.npy', cs_iso_results[0])

In [36]:
os_path = Path('data/AZ/os-rotation/AZ_os-DFT_rotation_path.xyz')
os_atoms_list = aseread(os_path, index=":")
os_iso_results = process_batch(os_atoms_list, N=48, bond_order_pair=(1, 13), new_bond_order=2)

Molecule 0: Set bond order between atoms 1 and 13 to 2.

This message will be printed once, but all files will be modified.


100%|██████████| 270/270 [00:05<00:00, 53.82it/s]


In [37]:
np.save('data/AZ/os-rotation/os_rot_E_sage_kcal_mol.npy', os_iso_results[0])