# Convert CD, CB7, and GDCC host-guest pairs from https://github.com/MobleyLab/benchmarksets/ to SMIRNOFF99Frosst

In [111]:
%load_ext autoreload
%autoreload 2

import os as os
import glob as glob
import numpy as np
import pandas as pd

import parmed as pmd

from openforcefield.typing.engines.smirnoff import ForceField, unit
from openforcefield.utils import mergeStructure

from smirnovert.utils import (create_pdb_with_conect, prune_conect, split_topology, create_host_guest_topology,
                    create_host_mol2, convert_mol2_to_sybyl_antechamber,
                    load_mol2, check_unique_atom_names,
                    check_bond_lengths,
                    extract_water_and_ions, create_water_and_ions_parameters)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [202]:
import logging
from importlib import reload
# `logging` needs to be reloaded, because `jupyter notebook` itself 
# uses the logging module to print messages to standard output...
reload(logging)
logger = logging.getLogger()
logger.setLevel(logging.WARNING)
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p')

## Clone the repository locally (this is easier than looping through all files remotely)

In [203]:
prefix = './benchmarksets/input_files/'
suffix = '/prmtop-rst7/'

In [204]:
benchmarksets = [
    'cb7-set1',
    'cb7-set2',
    'cd-set1',
    'cd-set2',
    'gdcc-set1',
    'gdcc-set2'
]

In [206]:
systems = []

for system in benchmarksets:
    for index, file in enumerate(sorted(glob.glob(prefix + system + suffix + '*.prmtop'))):
        systems.append(file)

In [207]:
systems

['./benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-1.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-17.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-18.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-18b.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-22.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-23.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-24.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-3.prmtop',
 './benchmarksets/input_files/cb7-set1/prmtop-rst7/cb7-5.prmtop',
 './benchmarksets/input_files/cb7-set2/prmtop-rst7/cb7-2.prmtop',
 './benchmarksets/input_files/cb7-set2/prmtop-rst7/cb7-20.prmtop',
 './benchmarksets/input_files/cb7-set2/prmtop-rst7/cb7-4.prmtop',
 './benchmarksets/input_files/cb7-set2/prmtop-rst7/cb7-5.prmtop',
 './benchmarksets/input_files/cb7-set2/prmtop-rst7/cb7-7.prmtop',
 './benchmarksets/input_files/cb7-set2/prmtop-rst7/cb7-8.prmtop',
 '

In [1]:
def convert(destination, prefix, reference_prmtop, reference_inpcrd, host_resname, guest_resname):
    
        
    reference = pmd.load_file(destination + reference_prmtop, xyz=destination + reference_inpcrd)
    box = reference.box
    
    create_pdb_with_conect(solvated_pdb=destination + reference_inpcrd,
                      amber_prmtop=destination + reference_prmtop,
                      output_pdb=destination + prefix + '.pdb')
    
    prune_conect(input_pdb=prefix + '.pdb',
            output_pdb=prefix + '.pruned.pdb',
            path=destination)
    
    components = split_topology(file_name=destination + prefix + '.pruned.pdb')
    hg_topology = create_host_guest_topology(components, 
                                         host_resname=host_resname, 
                                         guest_resname=guest_resname)
    
    create_host_mol2(
        solvated_pdb=destination + prefix + '.pruned.pdb',
        amber_prmtop=destination + reference_prmtop,
        mask=host_resname,
        output_mol2=destination + host_resname + '.mol2')

    create_host_mol2(
        solvated_pdb=destination + prefix + '.pdb',
        amber_prmtop=destination + reference_prmtop,
        mask=guest_resname,
        output_mol2=destination + guest_resname + '.mol2')

    convert_mol2_to_sybyl_antechamber(
        input_mol2=destination + host_resname + '.mol2',
        output_mol2=destination + host_resname + '-sybyl.mol2',
        ac_doctor=False)

    convert_mol2_to_sybyl_antechamber(
        input_mol2=destination + guest_resname + '.mol2',
        output_mol2=destination + guest_resname + '-sybyl.mol2',
        ac_doctor=False)
    
    extract_water_and_ions(
        amber_prmtop=reference_prmtop,
        amber_inpcrd=reference_inpcrd,
        host_residue=':' + host_resname,
        guest_residue=':' + guest_resname,
        dummy_atoms=True,
        output_pdb='water_ions.pdb',
        path=destination)

    create_water_and_ions_parameters(
        input_pdb='water_ions.pdb',
        output_prmtop='water_ions.prmtop',
        output_inpcrd='water_ions.inpcrd',
        dummy_atoms=False,
        path=destination)
    
    host = load_mol2(
        filename=destination + host_resname + '-sybyl.mol2',
        name=host_resname,
        add_tripos=True)

    guest = load_mol2(
        filename=destination + guest_resname + '-sybyl.mol2',
        name=guest_resname,
        add_tripos=False)

    check_unique_atom_names(host)
    check_unique_atom_names(guest)
    molecules = [host, guest]
    
    ff = ForceField('forcefield/smirnoff99Frosst.ffxml')
    system = ff.createSystem(
        hg_topology.topology,
        molecules,
        nonbondedCutoff=1.1 * unit.nanometer,
        ewaldErrorTolerance=1e-4)
    
    hg_structure = pmd.openmm.topsystem.load_topology(
    hg_topology.topology, system, hg_topology.positions)

    check_bond_lengths(hg_structure, threshold=4)

    try:
        hg_structure.save(destination + 'hg.prmtop')
    except OSError:
        print(
            'Check if the host-guest parameter file already exists...')

    try:
        hg_structure.save(destination + 'hg.inpcrd')
    except OSError:
        print(
            'Check if the host-guest coordinate file already exists...')
        
    water_and_ions = pmd.amber.AmberParm(
    destination + 'water_ions.prmtop',
    xyz=destination + 'water_ions.inpcrd')
    
    merged = mergeStructure(hg_structure, water_and_ions)
    merged.box = reference.box
    try:
        merged.save(destination + 'smirnoff.prmtop')
    except:
        print('Check if solvated parameter file already exists...')
    try:
        merged.save(destination + 'smirnoff.inpcrd')
    except:
        print('Check if solvated coordinate file already exists...')
    
    return merged

In [250]:
for file in systems:
    base_name = file[:-7].split('/')[-1]
    host_set = file[:-7].split('/')[-3]
    host = host_set.split('-')[0]
    guest = base_name.split('-')[1]
        
    destination = prefix + host_set + '/' + suffix + 'smirnoff/' + base_name + '/'
    reference_prmtop = base_name + '.prmtop'
    reference_inpcrd = base_name + '.rst7'
    
    if 'cb7' in host:
        host_resname = 'CB7'
    elif 'cd' in host:
        host_resname = 'MGO'
    elif 'gdcc' in host and 'temoa' in base_name:
        host_resname = 'OCB'
    elif 'gdcc' in host and 'oa' in base_name:
        host_resname = 'OCT'
    guest_resname = 'MOL'
    
    if not os.path.exists(destination):
        print(f'Creating {destination}...')
        os.makedirs(destination)
    if not os.path.exists(destination + reference_prmtop):
        print(f'Linking {destination + reference_prmtop}...')
        os.symlink('../../' + reference_prmtop, destination + reference_prmtop)
    if not os.path.exists(destination + reference_inpcrd):
        os.symlink('../../' + reference_inpcrd, destination + reference_inpcrd)
            
    if os.path.isfile(destination + 'smirnoff.prmtop'):
        print(f'Converted file for {base_name} exists, skipping...')
    else:
        convert(destination, base_name, reference_prmtop, reference_inpcrd, host_resname, guest_resname)

Converted file for cb7-1 exists, skipping...
Converted file for cb7-17 exists, skipping...
Converted file for cb7-18 exists, skipping...
Converted file for cb7-18b exists, skipping...
Converted file for cb7-22 exists, skipping...
Converted file for cb7-23 exists, skipping...
Converted file for cb7-24 exists, skipping...
Converted file for cb7-3 exists, skipping...
Converted file for cb7-5 exists, skipping...
Converted file for cb7-2 exists, skipping...
Converted file for cb7-20 exists, skipping...
Converted file for cb7-4 exists, skipping...
Converted file for cb7-5 exists, skipping...
Converted file for cb7-7 exists, skipping...
Converted file for cb7-8 exists, skipping...
Converted file for cb7-9 exists, skipping...
Converted file for acd-1-p exists, skipping...
Converted file for acd-1-s exists, skipping...
Converted file for acd-2-p exists, skipping...
Converted file for acd-2-s exists, skipping...
Converted file for acd-3-p exists, skipping...
Converted file for acd-3-s exists, sk