## Import modules

In [1]:
import os
import glob
import shutil
from pdbtools import * 
import MDAnalysis as mda

## Make directory file tree from PDB's in directory

In [None]:
name = 'wt_mpro_'
ens_systems = [name+'ens_dimer_asymm',
               name+'ens_monomer']

apo_systems = [name+'apo_dimer',
               name+'apo_monomer']

In [2]:
import os
import shutil

def make_filetree(pbd_path,ligand,n_replicates):
    #Define PDB file and id
    #pdb_file = pdb_path[4:]
    #pdb_id = pdb_path[4:-4]

    #Make top directory
    system_name = 'wt_mpro_ens_monomer_holo'
    ligand_name = '7YY'
    os.mkdir(system_name)
    
    #Make files for each stage
    files = ['0-input','1-parmprep','2-sysprep','3-simprep']
    for file in files:
        os.mkdir(system_name+'/'+file)
        
    #Make files for MD
    sim_files = ['0-min','1-heat','2-equil','3-prod']
    for sim_file in sim_files:
        os.mkdir(system_name+'/3-simprep/'+sim_file)
        
    #Copy PDB in /pdb to system_file/0-input
    src = pdb_path
    dst = system_name+'/0-input'
    shutil.copy(src,dst)

make_filetree()

## Obtain structural information from PDB files (to ensure uniformity accross all systems)

In [35]:
wt_group = ['pdb/8HBK.pdb','pdb/8HUR.pdb','pdb/8DZ0.pdb','pdb/7VU6.pdb']
path_to_pdb_files = glob.glob('pdb/*')
test_pdb_group = ['pdb/8H31_copy.pdb','pdb/8HBK_copy.pdb']

#provide name, number of models, chains, and residues
def define_pdb(pdb_path):
    #Define PDB file and id
    pdb_file = pdb_path[4:]
    pdb_id = pdb_path[4:-4]
    pdb = mda.Universe(pdb_path,pdb_path)
    
    #Create chain dictionary (include pdb-tools logic for renaming chains)
    chain_id = ['A','B','C','D','E','F','G']
    pdb_chain_list = {}
    for chain in chain_id:
        pdb_chain = pdb.select_atoms('protein and segid ' + chain)
        if pdb_chain.n_residues != 0:
            pdb_chain_list['chain_'+ chain] = pdb_chain.residues
        else:
            pass
    
    #Write logic for determining symmetric and asymmetric chains
    #All atoms in chain A (water, protein, hetatms)
    pdb_all_a = pdb.select_atoms('segid A')
    pdb_chain_a = pdb.select_atoms('segid A and protein')
    pdb_water_a = pdb.select_atoms('segid A and resname HOH')
    pdb_hetatms_a = pdb.select_atoms('segid A and not protein and not resname HOH')
    pdb_ligands_a = pdb.select_atoms('segid A and resname 7YY')
    
    pdb_all_b = pdb.select_atoms('segid B')
    pdb_chain_b = pdb.select_atoms('segid B and protein')
    pdb_water_b = pdb.select_atoms('segid B and resname HOH')
    pdb_hetatms_b = pdb.select_atoms('segid B and not protein and not resname HOH')
    pdb_ligands_b = pdb.select_atoms('segid B and resname 7YY')
    
    pdb_sum_a = pdb_chain_a.n_residues + pdb_water_a.n_residues + pdb_hetatms_a.n_residues
    pdb_sum_b = pdb_chain_b.n_residues + pdb_water_b.n_residues + pdb_hetatms_b.n_residues 
    pdb_sum_segid = pdb_sum_a + pdb_sum_b

    pdb_sum_all = pdb_all_a.n_residues + pdb_all_b.n_residues
    pdb_waters_all = pdb_water_a.n_residues + pdb_water_b.n_residues
    
    if pdb_chain_a.n_residues == pdb_chain_b.n_residues:
        symmetric = True
    else:
        symmetric = False    
    
    print('PDB id: ', pdb_id,
         '\nTotal residues: ',pdb_sum_all,
          '\nChainA: ', pdb_chain_a.n_residues,
          '\nChainB: ', pdb_chain_b.n_residues,
          '\nWaters: ', pdb_waters_all,
          '\nHetatms: ', pdb_hetatms_a.residues, pdb_hetatms_b.residues,
          '\nSymmetric ',symmetric,
         '\nSum segid: ', pdb_sum_segid,
         '\nMatch: ', pdb_sum_all == pdb_sum_segid,
         '\nChains: ', pdb_chain_list,
         '\n')

In [36]:
for pdb in wt_group:
    define_pdb(pdb)

PDB id:  8HBK 
Total residues:  828 
ChainA:  300 
ChainB:  300 
Waters:  226 
Hetatms:  <ResidueGroup [<Residue 7YY, 401>]> <ResidueGroup [<Residue 7YY, 401>]> 
Symmetric  True 
Sum segid:  828 
Match:  True 
Chains:  {'chain_A': <ResidueGroup with 300 residues>, 'chain_B': <ResidueGroup with 300 residues>} 

PDB id:  8HUR 
Total residues:  738 
ChainA:  298 
ChainB:  298 
Waters:  140 
Hetatms:  <ResidueGroup [<Residue 7YY, 401>]> <ResidueGroup [<Residue 7YY, 401>]> 
Symmetric  True 
Sum segid:  738 
Match:  True 
Chains:  {'chain_A': <ResidueGroup with 298 residues>, 'chain_B': <ResidueGroup with 298 residues>} 

PDB id:  8DZ0 
Total residues:  657 
ChainA:  302 
ChainB:  306 
Waters:  44 
Hetatms:  <ResidueGroup [<Residue DMS, 401>, <Residue DMS, 402>, <Residue 7YY, 403>]> <ResidueGroup [<Residue DMS, 401>, <Residue 7YY, 402>]> 
Symmetric  False 
Sum segid:  657 
Match:  True 
Chains:  {'chain_A': <ResidueGroup with 302 residues>, 'chain_B': <ResidueGroup with 306 residues>} 

PDB 

## Make sure PDB chain residues, ligands, and waters have been properly named and organized. Remove unnecessary hetatms (info is gonna get lost after simulating)

In [38]:
#Ligand A: Last residue in Chain_A +1
#Ligand B: Last residue in Chain_B +1

#or

#Ligand A: Chain_A + Chain_B + 1
#Ligand B: Chain_A + Chain_B + 2
#Waters: (Ligand_A + 1) + (Ligand_A + 2)

#Example
Chain_A = 297
Chain_B = 298
Chain_AB = Chain_A + Chain_B
Ligand_A =  Chain_A + Chain_B + 1
Ligand_B = Chain_A + Chain_B + 2
Waters = Ligand_B + 1

print(Chain_A,
     Chain_B,
     Ligand_A,
     Ligand_B,
     Waters)


297 298 596 597 598
