In [16]:
import pygromos
from pygromos.files.gromos_system import Gromos_System
from pygromos.files.blocks import imd_blocks
from pygromos.hpc_queuing.submission_systems.lsf import LSF
from pygromos.utils import bash

import os
import numpy as np
import glob

# Step 1 : Read in general system information 
(from RE-EDS input files and topologies)

### Define important path

In [17]:
# Every path will be created w.r.t. root_dir
# The topologies and cnfs used in the RE-EDS run to which we will compare are in the directories listed below.

root_dir = '/cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent'
input_topols_dir = root_dir + '/reeds_input'
input_ssm_cnfs   = root_dir + '/ssm_confs'

num_ligs = len(glob.glob(input_ssm_cnfs + '/*.cnf'))

print ('we will be working with ' + str(num_ligs) + ' ligands')

we will be working with 6 ligands


In [18]:
# Initial IMD file:
from pygromos.files.simulation_parameters.imd import Imd
from pygromos.files.blocks import imd_blocks

template_ti_imd_path = input_topols_dir + '/ti_prod.imd'
template_ti_imd = Imd(template_ti_imd_path)

# note: this file will have to be modified extensively to ensure 
# correct number of atoms in energy groups, force blocks, etc.



In [19]:
# Import the initial SSM coordinates
from pygromos.files.coord import Cnf
# Warning! Alphabetical sorting
list_coords = sorted(glob.glob(input_ssm_cnfs + '/*.cnf'))

ssm_confs = []

for ssm_conf in list_coords:
    print ('reading in: ' + ssm_conf)
    tmp_cnf = Cnf(ssm_conf)
    ssm_confs.append(tmp_cnf)

reading in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/ssm_confs/REEDS_SSM_state_1.cnf
reading in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/ssm_confs/REEDS_SSM_state_2.cnf
reading in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/ssm_confs/REEDS_SSM_state_3.cnf
reading in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/ssm_confs/REEDS_SSM_state_4.cnf
reading in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/ssm_confs/REEDS_SSM_state_5.cnf
reading in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/ssm_confs/REEDS_SSM_state_6.cnf


In [20]:
# These conformations have all N ligands used in the RE-EDS simulation. 
# So we need to figure out how many atoms there are of each type so we can reduce our coordinates accordingly later on.
# These can simply be read from the first file.

# for k in ssm_confs[0].residues.keys():
import copy

template_cnf = copy.deepcopy(ssm_confs[0])

num_atoms_ligs = np.zeros(num_ligs)
for i in range(1, num_ligs+1):
    num_atoms_ligs[i-1] = template_cnf.residues['LI'+str(i)][i]

num_solv_mols = len(template_cnf.residues['SOLV'])

num_protein_atoms = len(template_cnf.POSITION) - np.sum(num_atoms_ligs) - num_solv_mols*3

### Print out what was read in to check number of atoms are ok

In [21]:
print (str(num_atoms_ligs) + ' number of atoms per ligand')
if num_protein_atoms:
    print ('in complex with a ' + str(num_protein_atoms) + ' atom protein')
print ('solvated in ' + str(num_solv_mols) + ' water molecules')

[39. 45. 45. 46. 41. 42.] number of atoms per ligand
solvated in 1513 water molecules


## Step 2 : Produce basic input directories for all alchemical transformations of interest

Here we create a subdirectory called 'X_Y' with X and Y being the indices of the ligands for which we perform the transformation. 

We will create an appropriate reduced topology and input file for this transformation

Later we can use the template imd, ptp, and top generated to run the simulations.

In [22]:
# note: this function assumes the file is ordered such that ligands are before protein

def reduce_reeds_topology_for_TI(in_top_path, pair, with_protein, out_path, prefix):
    command = "red_top @topo " + in_top_path + " @atoms " + str(pair[0]) + ":a " + str(pair[1]) + ":a "
    if with_protein:
        command += str(with_protein) + ":a "
    
    # Specify the output file
    out_topology = out_path + '/' + prefix + "_TI_" + str(pair[0]) + "_" +str(pair[1]) + ".top"
    
    command+= " > " + out_topology
    
    print ('executing: ' + command)
    os.system(command)
    
    return out_topology

In [24]:
#
# notes:
# this functions sets the DUMMY atom type code as the last atom type code we have
# as this is the gromos convention. 

def prepare_ptp(input_pairwise_top):
    
    input_dir = '/'.join(input_pairwise_top.split('/')[:-1])
        
    grom_system = Gromos_System(in_top_path=input_pairwise_top, system_name="TEST_solv", work_folder=input_dir)
    
    from pygromos.files.topology.ptp import Pertubation_topology
    from pygromos.files.blocks.topology_blocks import pertubation_lam_state, atom_lam_pertubation_state, PERTATOMPARAM, TITLE

    pert_atoms=[]
    for atom_line in grom_system.top.SOLUTEATOM:
        
        # Exit as soon as we reach residue 3 ! 
        # This assumes the two states A and B are positioned 1 and 2 in the file
        
        if atom_line.MRES >= 3: break
        
        states = {}
        phys_state = pertubation_lam_state(IAC=atom_line.IAC, MASS=atom_line.MASS, CHARGE=atom_line.CG)
        states = {atom_line.MRES: phys_state }
                
        pert_atom = atom_lam_pertubation_state(atom_line.ATNM,RES=atom_line.MRES,NAME=atom_line.PANM, STATES=states)
        pert_atoms.append(pert_atom)
        
    dummy_IAC = grom_system.top.ATOMTYPENAME.content[0][0]
    
    pert_atom_block = PERTATOMPARAM(pert_atoms, dummy_IAC = dummy_IAC)

    # Generate ptp file
    grom_system.ptp = Pertubation_topology(in_value = None)
    grom_system.ptp.PERTATOMPARAM = pert_atom_block
    
    grom_system.ptp.TITLE = TITLE("Automatic generated pertubation file. ")
    
    output_perturbed_top = input_pairwise_top.replace('top', 'ptp')
    grom_system.ptp.write(out_path = output_perturbed_top)
    
    return output_perturbed_top
    
    
    

### execute the function above for all pairs we wish to work with

In [25]:
pairs_to_calculate = [(1, 2), (1, 3), (1, 4), (1, 5), (1, 6)]

In [26]:
print (root_dir)

for a, b in pairs_to_calculate:
    
    sub_dir = root_dir + '/leg_'+ str(a) + '_' + str(b)
    
    bash.make_folder(sub_dir)
    bash.make_folder(sub_dir + '/input')
    
    # Reduce the topologies
    #
    # NOTE if you want to use the protein
    # make sure to change the following!!!
    
    with_protein = 0
    #with_protein = num_ligs+1
    
    out_topology = reduce_reeds_topology_for_TI(in_top_path = input_topols_dir + '/NIK_ligands.top',
                                                pair = (a, b), 
                                                with_protein = with_protein, 
                                                out_path = sub_dir + '/input/', 
                                                prefix= 'NIK_openff')
    
    out_ptp = prepare_ptp(out_topology)
  
    


/cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent
executing: red_top @topo /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/reeds_input/NIK_ligands.top @atoms 1:a 2:a  > /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_2/input//NIK_openff_TI_1_2.top


	CHECK dependencies

Check /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_2/input/
Check /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_2/input//NIK_openff_TI_1_2.top
/cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_2/input/
/cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_2/input//NIK_openff_TI_1_2.top
All dependencies are correct! 


Generate Empty:  imd
Parsing File:  top
Generate Empty:  cnf
executing: red_top @topo /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/reeds_input/NIK_ligands.top @atoms 1:a 3:a  > /cluster/home/cchampion/work/REEDS/NIK/TI_co

### Adjust IMD FILE

Here we modify the imd so the Force blocks etc match the fact we removed atoms from the system compared to RE-EDS

We also adjust the simulation time here !

In [27]:
import copy
tmp_imd = copy.deepcopy(template_ti_imd)

In [28]:
# note: there might be some stuff to add here 

for a, b in pairs_to_calculate:
    tmp_imd = copy.copy(template_ti_imd)
    
    num_atoms_ligA = int(num_atoms_ligs[a-1])
    num_atoms_ligB = int(num_atoms_ligs[b-1])
    
    tmp_imd.FORCE.BONDS = 0
    
    tmp_imd.SYSTEM.NSM = num_solv_mols
    
    # Modifications common to both solvent/protein - could be done elsewhere later:
    
    tmp_imd.INITIALISE.NTIVEL = 1
    tmp_imd.INITIALISE.NTISHK = 3
    tmp_imd.INITIALISE.NTISHI = 1
    
    # Printout frequency
    
    tmp_imd.WRITETRAJ.NTWE = 500
    tmp_imd.WRITETRAJ.NTWG = 500
    
    # Simulation time / number of steps
    # 
    # 500000 = 1ns chucnks 
    # 250000 = 500ps chuncks (good for protein complex in 24h hour queue) 
    #
    tmp_imd.STEP.NSTLIM = 500000
    
    
    
    if num_protein_atoms: # When there is a protein, calculate the atom numbering
        tmp_imd.FORCE.NEGR = 4
        tmp_imd.FORCE.NRE = [num_atoms_ligA, 
                             num_atoms_ligA+num_atoms_ligB,
                             int(num_atoms_ligA+num_atoms_ligB+num_protein_atoms),
                             int(num_protein_atoms+num_atoms_ligA+num_atoms_ligB+num_solv_mols*3)
                            ]
        
        # Set proper multibath 
        multibath_block =  imd_blocks.MULTIBATH(ALGORITHM=0, NBATHS=3,
                                                TEMP0= [298.15] * 3,
                                                TAU=[0.1] * 3,
                                                DOFSET=3,
                                                LAST= [num_atoms_ligA+num_atoms_ligB, 
                                                       int(num_protein_atoms+num_atoms_ligA+num_atoms_ligB), 
                                                       int(num_protein_atoms+num_atoms_ligA+num_atoms_ligB+num_solv_mols*3)
                                                      ], 
                                                COMBATH=range(1,4),
                                                IRBATH=range(1,4))
        tmp_imd.MULTIBATH = multibath_block        
        
    else: # When there is no protein just write the values for the two ligands of interest
        tmp_imd.FORCE.NEGR = 3 # just out two ligands + solvent
        tmp_imd.FORCE.NRE = [num_atoms_ligA, 
                             num_atoms_ligA+num_atoms_ligB,
                            int(num_atoms_ligA+num_atoms_ligB+num_solv_mols*3)
                            ]
        
        # Set proper multibath:
        multibath_block =  imd_blocks.MULTIBATH(ALGORITHM=0, NBATHS=2,
                                                TEMP0= [298.15] * 2,
                                                TAU=[0.1] * 2,
                                                DOFSET=2,
                                                LAST= [num_atoms_ligA+num_atoms_ligB, 
                                                       int(num_atoms_ligA+num_atoms_ligB+num_solv_mols*3)
                                                      ], 
                                                COMBATH=range(1,3),
                                                IRBATH=range(1,3))
        tmp_imd.MULTIBATH = multibath_block
        
 
    # one all proper corrections have been made save the file:
    
    out_imd = root_dir + '/leg_'+ str(a) + '_' + str(b) + '/input/NIK_TI_' + str(a) + '_' + str(b) +'.imd'
    tmp_imd.write(out_imd)
    
    

#### Step 2 bis - Update the distance restraint files.

For now we will do this step manually


## Step 3: Reduce the cnf files

In [29]:
for a, b in pairs_to_calculate:
    # directory in which we will save the data
    sub_dir = root_dir + '/leg_'+ str(a) + '_' + str(b)
    
    tmp_cnf_a = copy.deepcopy(ssm_confs[a-1])
    tmp_cnf_b = copy.deepcopy(ssm_confs[b-1])
    
    # We will use a starting conformation from both ends.
    
    print ('working on leg: ' + str(a) + ' ' + str(b))
    
    for i in range(1, num_ligs+1):
        if i == a or i == b: continue
        
        resname = 'LI' + str(i)
        
        print ('deleting ' + resname)
        tmp_cnf_a.delete_residue(resName=resname)
        tmp_cnf_b.delete_residue(resName=resname)

    # Print out the new cnfs (renumbered to file)
    print ('file written in: ' + sub_dir + '/input/' + 'NIK_TI_leg_' + str(a) + '_' + str(b) + '_ssm_'+str(a)+'.cnf')
    tmp_cnf_a.write(sub_dir + '/input/' + 'NIK_TI_leg_' + str(a) + '_' + str(b) + '_ssm_'+str(a)+'.cnf')
    tmp_cnf_b.write(sub_dir + '/input/' + 'NIK_TI_leg_' + str(a) + '_' + str(b) + '_ssm_'+str(b)+'.cnf')

working on leg: 1 2
deleting LI3
deleting LI4
deleting LI5
deleting LI6
file written in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_2/input/NIK_TI_leg_1_2_ssm_1.cnf
working on leg: 1 3
deleting LI2
deleting LI4
deleting LI5
deleting LI6
file written in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_3/input/NIK_TI_leg_1_3_ssm_1.cnf
working on leg: 1 4
deleting LI2
deleting LI3
deleting LI5
deleting LI6
file written in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_4/input/NIK_TI_leg_1_4_ssm_1.cnf
working on leg: 1 5
deleting LI2
deleting LI3
deleting LI4
deleting LI6
file written in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_5/input/NIK_TI_leg_1_5_ssm_1.cnf
working on leg: 1 6
deleting LI2
deleting LI3
deleting LI4
deleting LI5
file written in: /cluster/home/cchampion/work/REEDS/NIK/TI_comparison/openff/solvent/leg_1_6/input/NIK_TI_leg_1_6_ssm_1.cnf


# Step 4: Make the distance restraint files

We just do this manually and copy them manaully for now