In [1]:
import os, subprocess, shutil
import numpy as np
import yaml

import parmed as pmd

from pmx import forcefield as pmxff
from PLBenchmarks import targets, ligands

# Choose underlying toolkits for the OpenFF toolkit
Choose the toolkit(s) you want to use with the OpenFF toolkit (either OpenEye or Ambertools and RDKit)

In [2]:
from openforcefield.utils import toolkits

### OpenEye version: uncomment the following if you have and if you want to use the OpenEye toolkit, then RDKit and Ambertools toolkits
toolkit_precedence = [toolkits.OpenEyeToolkitWrapper, toolkits.RDKitToolkitWrapper, toolkits.AmberToolsToolkitWrapper]

### Non-OpenEye version: uncomment the following if you want to use the rdkit and ambertools
# toolkit_precedence = [toolkits.RDKitToolkitWrapper, toolkits.AmberToolsToolkitWrapper]

toolkits.GLOBAL_TOOLKIT_REGISTRY = toolkits.ToolkitRegistry(toolkit_precedence=toolkit_precedence)

In [3]:
from openforcefield.topology import Molecule, Topology
from openforcefield.typing.engines.smirnoff import ForceField

In [4]:
# choose the open force field version:
# examples: 'smirnoff99Frosst-1.1.0.offxml' 'openff-1.0.0.offxml'
forcefield = 'openff-1.0.0.offxml'
openff_forcefield = ForceField(forcefield)

In [5]:
# function to convert openFF molecule (ligand) to a parmed structure
def ligandToPMD(ligand):
    ligand_positions = ligand.conformers[0]
    
    # Calculate am1bcc charges and fix them such that they add up to zero (4 digit precision)
    try:
        ligand.compute_partial_charges_am1bcc()
    except Exception as e:
        raise Exception('Error in charge calculation for ligand {}: {}'.format(ligand.name, e))
    # Give all atoms unique names so we can export to GROMACS
    for idx, atom in enumerate(ligand.atoms):
        atom.name = f'{atom.element.symbol}{idx}'
    
    # Do not assign H-bond constraints now, instead have ParmEd add them later
    del openff_forcefield._parameter_handlers['Constraints']

    ligand_topology = ligand.to_topology()
    try:
        ligand_system = openff_forcefield.create_openmm_system(ligand_topology, charge_from_molecules=[ligand])
    except Exception as e:
        raise Exception('Error in creating openmm system: {}'.format(e))
    # Create OpenMM Topology from OpenFF Topology
    omm_top = ligand_topology.to_openmm()


    # Convert OpenMM System to a ParmEd structure.
    pmd_structure = pmd.openmm.load_topology(omm_top, ligand_system, ligand_positions)
    
    return pmd_structure, ligand_topology, ligand_system, ligand_positions

In [6]:
# functions for Gromacs force field  manuplation/conversion
def set_charge_to_zero(itp):
    q = 0.0
    n = 0
    for a in itp.atoms:
        q += a.q
        n += 1
    intq = round(q)
    diffq = intq - q
    # round to 6 digit precision
    deltaq = np.around(diffq/float(n), decimals=6)
    
    newq = 0.0
    for a in itp.atoms:
        a.q += deltaq
        a.q = np.around(a.q, decimals=6)
        newq += a.q
    # add remainder to first atom
    intq = round(newq)
    diffq = intq - newq
    itp.atoms[0].q += diffq

def change_atomtypes(itp,suffix):
    for a in itp.atoms:
#        newtype = str(a.atomtype)+'_'+str(a.name)+str(suffix)
        newtype = str(a.atomtype)+str(suffix)
        a.atomtype = newtype

    newdict = {}
    for atkey in itp.atomtypes.keys():
        at = itp.atomtypes[atkey]
        newtype = str(atkey)+str(suffix)
        newdict[newtype] = at
    itp.atomtypes=newdict

def write_ff(atypes, fname, ff='amber99sb'):
    fp = open(fname,'w')
    print('[ atomtypes ]', file=fp)
    for atkey in atypes.keys():
        at = atypes[atkey]
        print('%8s %12.6f %12.6f %3s %12.6f %12.6f' % (atkey, at[1], at[2], at[3], at[4], at[5]), file=fp)

def write_posre(itp, fname, fc=1000):
    fp = open(fname,'w')
    print('[ position_restraints ]', file=fp)
    for i, atom in enumerate(itp.atoms):
        print("%d   1    %d   %d    %d" % (i+1,fc,fc,fc), file=fp)
    fp.close()


# Convert SDF files to gromacs topologies

If you don't have SDF files, you first need to convert them (i.e. with the OpenEye toolkit OEChem)

ATTENTION: Using PDB files might prone to errors because the pdb files do not have bond information

In [9]:
for target in targets.target_list:
    if target['name'] != 'cdk2':
        continue
    print('=== ' + target['name'] + ' ===')
    ligSet = ligands.getLigandSetDF(target['name'], cols=['name', 'smiles', 'docked'])
    for index, lig in ligSet.iterrows():
        print(lig)
        if lig['name'][0] != 'lig_31':
            continue
        ligFile= f'PLBenchmarks/data/{target["dir"]}/{lig.docked[0]}'
        topPath= f'PLBenchmarks/data/{target["dir"]}/04_topo/{forcefield}/{lig["name"][0]}/'  
        print(topPath)
        os.makedirs(f'{topPath}', exist_ok=True)
        
        print(ligFile)
        if os.path.isfile(f'{ligFile}'):
            ligand = Molecule.from_file(f'{ligFile}', allow_undefined_stereo=True)
        elif os.path.isfile(f'PLBenchmarks/data/{target["dir"]}/03_docked/{lig["name"][0]}/{lig["name"][0]}.pdb'):
            # Try to read in PDB file instead of a SDF, only works with OpenEye
            ligand = Molecule.from_file(f'PLBenchmarks/data/{target["dir"]}/03_docked/{lig["name"][0]}/{lig["name"][0]}.pdb', allow_undefined_stereo=True)
            # save as sdf file 
            # ATTENTION: automatic conversion to SDF
            ligand.name = lig["name"][0]
            ligand.to_file(f'{ligFile}', 'sdf')
        else:
            print('    File not found. Molecules {} cannot be read in'.format(lig["name"][0]))
            continue
        
        print('   ', lig["name"][0], ligand.to_smiles())  
        try: 
            pmd_structure, ligand_topology, ligand_system, ligand_positions = ligandToPMD(ligand)
        except Exception as e:
            print('    ' + str(e))
            continue
                                        
        # Export GROMACS files.
        pmd_structure.save(f'{topPath}/{lig["name"][0]}.top', overwrite = True)
        pmd_structure.save(f'{topPath}/{lig["name"][0]}.gro', overwrite = True, precision = 8)

        # Export AMBER files.
        pmd_structure.save(f'{topPath}/{lig["name"][0]}.prmtop', overwrite=True)
        pmd_structure.save(f'{topPath}/{lig["name"][0]}.inpcrd', overwrite=True)
        
        # Create GROMACS ITP file
        itp = pmxff.read_gaff_top(f'{topPath}/{lig["name"][0]}.top')
        itp.set_name('MOL')
        change_atomtypes(itp, lig["name"][0])
        set_charge_to_zero(itp)

        itpout = lig["name"][0] + '.itp'
        posre = 'posre_' + lig["name"][0] + '.itp'
        itp.write(f'{topPath}/{itpout}')
        write_ff(itp.atomtypes, f'{topPath}/ff{itpout}')
        write_posre(itp, f'{topPath}/{posre}') 

=== cdk2 ===
[{'name': 'jnk1', 'dir': '01_jnk1'}, {'name': 'pde2', 'dir': '02_pde2'}, {'name': 'thrombin', 'dir': '03_thrombin'}, {'name': 'p38', 'date': datetime.date(2019, 12, 9), 'dir': '2019-12-09_p38'}, {'name': 'ptp1b', 'date': datetime.date(2019, 12, 12), 'dir': '2019-12-12_ptp1b'}, {'name': 'galectin', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_galectin'}, {'name': 'cdk2', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_cdk2'}, {'name': 'cmet', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_cmet'}, {'name': 'mcl1', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_mcl1'}]
name                                                   lig_17
smiles      [H][C]1=[N][C]2=[C]([O][C]([H])([H])[C]3([H])[...
docked                            03_docked/lig_17/lig_17.sdf
Name: 0, dtype: object
name                                                 lig_1h1q
smiles      [H][C]1=[N][C]2=[C]([O][C]([H])([H])[C]3([H])[...
docked                        03_docked/lig_

# Generate GAFF2 topologies

In [None]:
forcefield = 'gaff2'
for target in targets.target_list:
    if target['name'] != 'cdk2':
        continue
    print('=== ' + target['name'] + ' ===')
    ligSet = ligands.getLigandSetDF(target['name'], cols=['name', 'smiles', 'docked'])
    for index, lig in ligSet.iterrows():
        print('   ', lig['name'][0])  
        ligName = lig['name'][0]
        ligFile = f'PLBenchmarks/data/{target["dir"]}/{lig.docked[0]}'
        ligPath = f'PLBenchmarks/data/{target["dir"]}/03_docked/{lig["name"][0]}/'
        topPath = f'PLBenchmarks/data/{target["dir"]}/04_topo/{forcefield}/{lig["name"][0]}/'  
        os.makedirs(f'{topPath}', exist_ok=True)
        
        if not os.path.isfile(f'{ligPath}/{ligName}.pdb'):
            if os.path.isfile(f'{ligFile}'):
                ligand = Molecule.from_file(f'{ligFile}')
                ligand.to_file(ligPath, 'pdb')
            else:
                print('    File not found. Molecules {} cannot be read in'.format(lig["name"][0]))
                continue
        os.makedirs(f'{topPath}', exist_ok=True)
        # quick hack to get formal charge of molecule
        itp = pmxff.read_gaff_top(f'PLBenchmarks/data/{target["dir"]}/04_topo/openff-1.0.0.offxml/{lig["name"][0]}/{ligName}.top')
        intq = round(sum([a.q for a in itp.atoms]))
        print(f'Formal Charge: {int(intq)} e')
            
        subprocessOutput = subprocess.run(f'python ./scriptpath/acpype.py \
                                                -i {ligPath}/{ligName}.pdb \
                                                -o gmx \
                                                -a gaff2 \
                                                -n {int(intq)}'.split(),
                                                capture_output = True)
        print(subprocessOutput.stdout.decode('utf8') + subprocessOutput.stderr.decode('utf8'))
        shutil.copy(f'{ligName}.acpype/{ligName}_GMX.itp', topPath)
        shutil.copy(f'{ligName}.acpype/{ligName}_GMX.gro', topPath)
        shutil.copy(f'{ligName}.acpype/{ligName}_GMX.top', topPath) 
            
        if os.path.isdir(f'{topPath}/{ligName}.acpype'):
            shutil.rmtree(f'{topPath}/{ligName}.acpype')
        shutil.move(f'{ligName}.acpype', f'{topPath}')
            
        itp = pmxff.read_gaff_top(f'{topPath}/{ligName}_GMX.itp')
        itp.set_name('MOL')

        itpout = ligName + '.itp'
        posre = 'posre_' + ligName + '.itp'
            
        itp.write(f'{topPath}/{itpout}')
        write_ff(itp.atomtypes, f'{topPath}/ff{itpout}')
        write_posre(itp, f'{topPath}/{posre}')

=== cdk2 ===
[{'name': 'jnk1', 'dir': '01_jnk1'}, {'name': 'pde2', 'dir': '02_pde2'}, {'name': 'thrombin', 'dir': '03_thrombin'}, {'name': 'p38', 'date': datetime.date(2019, 12, 9), 'dir': '2019-12-09_p38'}, {'name': 'ptp1b', 'date': datetime.date(2019, 12, 12), 'dir': '2019-12-12_ptp1b'}, {'name': 'galectin', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_galectin'}, {'name': 'cdk2', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_cdk2'}, {'name': 'cmet', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_cmet'}, {'name': 'mcl1', 'date': datetime.date(2019, 12, 13), 'dir': '2019-12-13_mcl1'}]
    lig_17
Formal Charge: 0 e
| ACPYPE: AnteChamber PYthon Parser interfacE v. 0 0 Rev: 0 (c) 2020 AWSdS |
==> ... charge set to 0
==> ... converting pdb input file to mol2 input file
==> * Babel OK *
==> Executing Antechamber...
==> * Antechamber OK *
==> * Parmchk OK *
==> Executing Tleap...
++++++++++start_quote+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
C