In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
import math
import shutil
import numpy as np
from collections import Counter
import seaborn as sns
from prepare_bracket import *
from util import cd

In [7]:
def verify_bracket(aBracket):
    if not isinstance(aBracket,list):
        raise RuntimeError("Please provide a list.")
    if len(set(aBracket))!=len(aBracket):
        raise RuntimeError("There are duplicated elements in the bracket list")
    return [e for e in aBracket]

# Test

In [8]:
brackets = ["L01_L02_L03_L14_L15_L16"]
bracket = brackets[0].split("_")
verify_bracket(bracket)

['L01', 'L02', 'L03', 'L14', 'L15', 'L16']

# Prepare a 2-competitor bracket

In [9]:
brackets = ["L01_L02_L03_L14_L15_L16"]

In [10]:
srcPathBase="/Tyk2/Data"  # Path for mol2,frcmod, lib folders
destPathBase="/Tyk2/6-competitor"  # Set the path 
PROTEIN_UNBOUND_DISTANCE = 50 # A, please modify this number properly!!!!
WATER_BOX_BUFFER = 15 # A
NUM_OF_COMPETITORS = 6 

### Run for a bracket
bracket = brackets[0].split("_")
print(f"Preparing binders: {bracket}")

### General mol info
mol_info=verify_bracket(bracket)


numOfWat = {}
numOfWatAfterEquil = {}
folderName = "_".join(sorted(mol_info))

#for bound_mol_id in mol_info.keys():
for bound_mol_id in mol_info:
    # create dest dir if needed
    target_path = f"{destPathBase}/{folderName}/1.Prepare/Bound_{bound_mol_id}"
    if not os.path.isdir(f"{target_path}"):
        os.makedirs(f"{target_path}")
    
    with cd(f"{target_path}"): # change working directory
        
        for other_mol_id in mol_info:    # iterate every other molecules
            for fltype in ["lib","frcmod","mol2"]:
                ligFile = FileBase(
                    fileSrc=f"{srcPathBase}/{fltype}/{other_mol_id}.{fltype}",
                    destDir=os.getcwd()
                )
                ligFile.copyFile()
        
        # copy receptor file
        protFile = FileBase(
            fileSrc=f"{srcPathBase}/rec_amber.pdb",
            destDir=os.getcwd()
        )
        protFile.copyFile()
        
        mols = []
        # prepare protein molecule
        protein = MoleculeFromPdb("rec_amber.pdb",mol_id="rec")
        mols.append(protein)
        ## No disulfide bonds
        #for res_i, res_j in [(22,66),(56,99),(158,206)]:
        #    protein.add_disulfide(res_i,res_j)
        #mols.append(protein)
        
        # compute unbound ligand anchor point                     
        if bound_mol_id == mol_info[0]: # make sure only done once
            sampleSurf = SampleSurfacePoint(radius=PROTEIN_UNBOUND_DISTANCE,NPoints=NUM_OF_COMPETITORS-1,
                                            surfPointSep=PROTEIN_UNBOUND_DISTANCE,protPDB="rec_amber.pdb")
            unbound_anchor_dict = sampleSurf.getPointDict(mol_info)
    
        # prepare small molecules
        anchor_xyz_gen = iter(unbound_anchor_dict.items())
        
        #for mol_id in mol_info.keys():
        #for mol_id, unit_name in mol_info.items():
        for mol_id in mol_info:
            mol = MoleculeFromMol2(f"{mol_id}.mol2",mol_id=mol_id,unit_name=mol_id)
            mol.add_lib_file(f"{mol_id}.lib")
            mol.add_frcmod_file(f"{mol_id}.frcmod")
        
            if mol_id != bound_mol_id:
                _,anchor_xyz = next(anchor_xyz_gen)
                #anchor_xyz = unbond_anchor_dict[unit_name]
                mol.prepare_translation_vector(anchor_xyz=anchor_xyz)        
            # append small molecules
            mols.append(mol)
    
        # build the complex
       # b = systemBuilder(forcefield=["ff14sb","gaff2_mod"],solvent_distance=WATER_BOX_BUFFER)
        b = systemBuilder(forcefield=["ff14sb","gaff2"],solvent_distance=WATER_BOX_BUFFER)
        b.build_system_from_molecules(mols,output_prefix=f"B{bound_mol_id}")
        
        # compute number of waters in each complex
        numWat,numNonWatIon = count_wat(f"B{bound_mol_id}.prmtop")
        numOfWat[bound_mol_id] = numWat
        
# sort ascendingly the number of waters in each complex. 
numOfWatSorted = {
    k:v for k,v in sorted(numOfWat.items(),
                          key=lambda item: item[1]
                         )
}
largestBox = numOfWatSorted.popitem()[1]
print("Preparing Done!")

Preparing binders: ['L01', 'L02', 'L03', 'L14', 'L15', 'L16']
Preparing Done!


In [11]:
# equilize the number of water in each complex to the largest one.
SOLVENT_DELTA_CUTOFF = 50
for mol_id in mol_info:
    target_path = f"{destPathBase}/{folderName}/1.Prepare/Bound_{mol_id}"
    if numOfWat[mol_id] != largestBox: # need equilization
        with cd(target_path): # change working directory
            
            equilizer = solventEquilizer(targetNumOfSolvent=largestBox, 
                                         buffer=WATER_BOX_BUFFER,
                                         top=f"B{mol_id}.prmtop",
                                         crd=f"B{mol_id}.inpcrd",
                                         leapin="leap.parm.in",
                                         tol=SOLVENT_DELTA_CUTOFF,
                                         soluteres=numNonWatIon,
                                         molname="solute"
                                        )
            try:
                equilizer.run()
            except:
                print(f"buffer 10 Å is not proper.")
                
            numWat,_ = count_wat(f"B{mol_id}.prmtop")
            numOfWatAfterEquil[mol_id] = numWat

assert len(set(list(numOfWatAfterEquil.values()))) == 1
print("All templates have equal number of solvent.")

All templates have equal number of solvent.
