In this notebook you are defining a contact map for critical regions involved in unbinding. These are contacts between the protein and the DNA as well as contacts in the hinge region. It can be used on dfferent structures, crystal and NMR structures, than can serve as a reference. In case of the NMR structure, there are several modes. The approach here is to find the pair that is closest in most modes. To have similar pairs as in the specific complexes,the same amino acid residues (atom ID) are used and the same bases (residue ID) of the DNA.

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
import numpy as np
import mdtraj as md
import nglview as ngl
import sys
#sys.path.append("/home/x_mallu/mln_lf/")



In [2]:
folder = "/home/x_mallu/mln_lf/"
folder_out = folder + "1OSL_CV_analsyis/"
NMR_pdb = md.load_pdb(folder + "1OSL_CV_analsyis/1osl_C52V_GMX_new_numbering.pdb")
topo = md.load_pdb(folder+ "1OSL_CV_analsyis/1osl_C52V_GMX_new_numbering_f1.pdb")

Get the DNA backnone and the DNA bases.

In [3]:
DNA_bb = topo.topology.select(\
    """(name =~ "C1\'") or (name =~ "C2\'") or (name =~ "C3\'") or (name =~ "C4\'") or (name =~ "C5\'") or 
    (name =~ "O1\'") or (name =~ "O2\'") or (name =~ "O3\'") or (name =~ "O4\'") or (name =~ "O5\'")  
    or (name P) or (name =~ 'O*P')""" )
print("Number if DNA backbone atoms", len(DNA_bb ))

Number if DNA backbone atoms 390


In [22]:
DNA_bases = topo.topology.select(\
    """((name N1) or (name N3) or (name C2) or (name C4) or (name C5) or (name C6) or (name C8) or
    (name N6) or (name N4) or (name N7) or (name N9) or (name O6)  
    or (name O4) or (name O2)) and ((resid 2) or (resid 3) or (resid 5) or (resid 10)
    or (resid 12) or (resid 19) or (resid 23) or (resid 28)or (resid 30))""" )
print("Number of heavy DNA base atoms in the residues that are specifically interacting in the crystal structure", len(DNA_bases ))

Number of heavy DNA base atoms in the residues that are specifically interacting in the crystal structure 80


In [5]:
spec_residues_ids = [6,7,17,18,21,22,29] #IDs according to LacI sequence Leu6,Tyr7,
hinge_residues_ids = [51,52,53,54,55,56,57] #IDs according to LacI sequence

In [6]:
def get_spec_residues_python_heavy(res_list, start=-35, chainB=None):
    """Function that writes out a string specifying residues with python numbering.
    
    Parameters
    ----------
    res_list: list
        list with residue IDs according to seqeuence.
    start: int
        by what the numbering needs to be moved for the python residue number.
        
    Returns
    -------
    specific_residues: str
        string that identifies the residues in res_list in python numbering (mdtraj), only heavy atoms!."""
    
    if chainB:
        res_list_chainB = [spec_residues_id + chainB for spec_residues_id in res_list]
        res_list_dimer = res_list + res_list_chainB
        specific_residues_list = [x - start for x in res_list_dimer]
    else:
        specific_residues_list = [x - start for x in res_list]
    #return(specific_residues_list)
    specific_residues = "(resid {} or ".format(specific_residues_list[0])
    for i in np.arange(1,len(specific_residues_list)-1,1):
        specific_residues = specific_residues + "resid {} or ".format(specific_residues_list[i])
    specific_residues = specific_residues + \
    "resid {}) and mass >= 2".format(specific_residues_list[-1])
    return(specific_residues)

In [7]:
topo.top.select("(resid 41) and mass >= 2") # this corresponds to residue 42 /A:Leu6

array([1229, 1231, 1233, 1236, 1238, 1242, 1246, 1247])

In [8]:
selection_specific = get_spec_residues_python_heavy(spec_residues_ids,chainB=62)
selection_hinges = get_spec_residues_python_heavy(hinge_residues_ids,chainB=62)

In [9]:
selection_specific

'(resid 41 or resid 42 or resid 52 or resid 53 or resid 56 or resid 57 or resid 64 or resid 103 or resid 104 or resid 114 or resid 115 or resid 118 or resid 119 or resid 126) and mass >= 2'

In [10]:
specific = topo.top.select(selection_specific)
[atom.residue for atom in topo.top.atoms if atom.index in specific]

[LEU42,
 LEU42,
 LEU42,
 LEU42,
 LEU42,
 LEU42,
 LEU42,
 LEU42,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR43,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 TYR53,
 GLN54,
 GLN54,
 GLN54,
 GLN54,
 GLN54,
 GLN54,
 GLN54,
 GLN54,
 GLN54,
 SER57,
 SER57,
 SER57,
 SER57,
 SER57,
 SER57,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 ARG58,
 HIS65,
 HIS65,
 HIS65,
 HIS65,
 HIS65,
 HIS65,
 HIS65,
 HIS65,
 HIS65,
 HIS65,
 LEU104,
 LEU104,
 LEU104,
 LEU104,
 LEU104,
 LEU104,
 LEU104,
 LEU104,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR105,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 TYR115,
 GLN116,
 GLN116,
 GLN116,
 GLN116,
 GLN116,
 GLN116,
 GLN116,
 GLN116,
 GLN116,
 SER119,
 SER119,
 SER119,
 SER119,
 SER119,
 SER119,
 ARG120,
 ARG120,
 ARG120,
 ARG12

In [11]:
hinges = topo.top.select(selection_hinges) # hinge atoms
[atom.residue for atom in topo.top.atoms if atom.index in hinges]

[ARG87,
 ARG87,
 ARG87,
 ARG87,
 ARG87,
 ARG87,
 ARG87,
 ARG87,
 ARG87,
 ARG87,
 ARG87,
 VAL88,
 VAL88,
 VAL88,
 VAL88,
 VAL88,
 VAL88,
 VAL88,
 ALA89,
 ALA89,
 ALA89,
 ALA89,
 ALA89,
 GLN90,
 GLN90,
 GLN90,
 GLN90,
 GLN90,
 GLN90,
 GLN90,
 GLN90,
 GLN90,
 GLN91,
 GLN91,
 GLN91,
 GLN91,
 GLN91,
 GLN91,
 GLN91,
 GLN91,
 GLN91,
 LEU92,
 LEU92,
 LEU92,
 LEU92,
 LEU92,
 LEU92,
 LEU92,
 LEU92,
 ALA93,
 ALA93,
 ALA93,
 ALA93,
 ALA93,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 ARG149,
 VAL150,
 VAL150,
 VAL150,
 VAL150,
 VAL150,
 VAL150,
 VAL150,
 ALA151,
 ALA151,
 ALA151,
 ALA151,
 ALA151,
 GLN152,
 GLN152,
 GLN152,
 GLN152,
 GLN152,
 GLN152,
 GLN152,
 GLN152,
 GLN152,
 GLN153,
 GLN153,
 GLN153,
 GLN153,
 GLN153,
 GLN153,
 GLN153,
 GLN153,
 GLN153,
 LEU154,
 LEU154,
 LEU154,
 LEU154,
 LEU154,
 LEU154,
 LEU154,
 LEU154,
 ALA155,
 ALA155,
 ALA155,
 ALA155,
 ALA155]

Get the pair list between specifically interacting residues and the DNA.

In [12]:
def paired_list(list1,list2):
    """Get the list of all pairs between memeber of list1 and list2.
    
    Parameters
    ----------
    
    list1: list
        list of atoms and residues extracted with mdtraj.
    list2: list
        list of atoms and residues extracted with mdtraj."""
    
    paired_list = []
    for x in list1:
        for y in list2:
            paired_list.append([x,y])  
    return(paired_list)

In [13]:
spec_DNA_bb = paired_list( specific, DNA_bb ) #this will give you atom pairs
distances_spec_DNA_bb = md.compute_distances(topo, spec_DNA_bb ) #this will give you the distances of the atom pairs


spec_DNA_bases = paired_list( specific, DNA_bases )
distances_spec_DNA_bases = md.compute_distances(topo, spec_DNA_bases ) 

Get the distance between atom pairs in the hinge region.

In [14]:
hinge_hinge = paired_list( hinges[:int(len(hinges)/2)], hinges[int(len(hinges)/2):])
distances_hinge_hinge = md.compute_distances(topo, hinge_hinge ) #this will give you atom pairs
len( hinge_hinge )

2916

In [15]:
def shortest_distances(pairs, distances, topology):
    """ Compute the shortest distance between two residues.
    
    Parameters
    ----------
    pairs: list
        atom pairs 
    distances: list
        distances between atom pairs
    Returns
    -------
    df_shortest_distances: pandas DataFrame
        contains the closest atom pairs between two sets of residues
    """
    
    listA = np.unique([x[0] for x in pairs])
    ref_df = pd.DataFrame( \
            { "pairA": [x[0] for x in pairs], "pairB": [x[1] for x in pairs], "dist" :  distances[0] } ) 
    shortest_distances = []
    for i in listA:
        # find the shortest distance
        shortest_dist = ref_df[ref_df["pairA"] == i ]
        shortest_distances.append( shortest_dist[ shortest_dist['dist']==shortest_dist['dist'].min() ] )
    df_shortest_distances = pd.concat([i for i in shortest_distances])
    #now you have one closest partner for all atoms of interaction partner A
    
    residues = [atom.residue.index for atom in topology.top.atoms if atom.index in \
                      list(df_shortest_distances["pairA"])] 
    residue_list = np.unique( residues )
    df_shortest_distances["residID_A"] = residues
    
    shortest_distances = []
    for i in residue_list:
        # find the shortest distance
        shortest_dist = df_shortest_distances[df_shortest_distances["residID_A"] == i ]
        shortest_distances.append( shortest_dist[ shortest_dist['dist']==shortest_dist['dist'].min() ] )
    df_shortest_distances = pd.concat([i for i in shortest_distances])
    return(df_shortest_distances)

In [23]:
df_shortest_distances_protein_DNA_bb = shortest_distances( spec_DNA_bb, distances_spec_DNA_bb, topo)
df_shortest_distances_protein_DNA_bases = shortest_distances( spec_DNA_bases, distances_spec_DNA_bases, topo)
df_shortest_distances_hinges = shortest_distances(hinge_hinge, distances_hinge_hinge, topo)

In [24]:
df_shortest_distances_protein_DNA_bb

Unnamed: 0,pairA,pairB,dist,residID_A
303,1229,891,0.360943,41
6544,1263,892,0.36985,42
10834,1400,892,0.276155,52
13689,1415,121,0.547956,53
17475,1462,924,0.339714,56
21081,1482,63,0.392321,57
23020,1576,30,0.294116,64
26628,2195,319,0.36495,103
33259,2231,320,0.343369,104
37160,2366,321,0.285294,114


In [25]:
df_shortest_distances_protein_DNA_bases

Unnamed: 0,pairA,pairB,dist,residID_A
1562,1238,937,0.686242,41
5118,1261,905,0.472291,42
9005,1400,903,0.368648,52
12039,1419,175,0.285982,53
14522,1462,937,0.409355,56
17852,1485,114,0.266616,57
19449,1578,41,0.36344,64
22775,2199,331,0.833589,103
26988,2227,333,0.420337,104
31208,2368,365,0.436012,114


In [19]:
df_shortest_distances_hinges

Unnamed: 0,pairA,pairB,dist,residID_A
554,1943,2916,0.428405,86
823,1954,2914,0.318055,87
1088,1964,2905,0.355268,88
1595,1982,2948,0.470887,89
1736,1987,2905,0.932509,90
2600,2022,2905,0.922863,91
2708,2025,2905,1.078564,92


In [27]:
def write_pymol_script(native_contacts,name="pymol.py"):
    """Function that formats pymol script.
    
    Parameters
    ----------
    native_contatcs: list
        contains the contact that you want to include in the CV
    name: string
        name of the script
    Writes
    ------
    file: file
        pymol.py
    """
    file = open(folder_out + "/{}".format(name), "w")
    file.write("")
    for m in range( len( native_contacts )):
        i, j = native_contacts[m]
        file.write("sele pk{m}, id {i} or id {j}\n"\
                   .format(i=i+1,j=j+1,m=m+1))
        #file.write("color , pk{m}"\
         #          .format(m=m+1))

    file.close()

In [33]:
pairs_binding = pd.concat([df_shortest_distances_protein_DNA_bases,df_shortest_distances_hinges])

In [34]:
write_pymol_script([[x,y] for x,y in zip(list(pairs_binding['pairA']),list(pairs_binding['pairB']))])


In [35]:
def write_plumed_input_rational_d0(native_contacts, reference_distances,name="NMR"):
    """Function that formats plumed based input contacts for rational switch with d0 != 0.
    
    Parameters
    ----------
    native_contatcs: list
        contains the contact that you want to include in the CV
    reference_distances: list
        distances between the contact partners in the reference/starting structure.
    Writes
    ------
    file: txt file
        plumed contact map
    """
    
    file = open(folder_out + "/cmap_rat_{}.txt".format(name), "w")
    file.write("CONTACTMAP ...\n")
    for m in range( len( native_contacts )):
        i, j = native_contacts[m]
        file.write("ATOMS{m}={i},{j} SWITCH{m}={{RATIONAL R_0=0.3 D_0={dist:.4f} }}\n"\
                   .format(i=i+1,j=j+1,m=m+1, dist = reference_distances[m]))
    file.write("LABEL=cmap\n")
    file.write("SUM\n")
    file.write("... CONTACTMAP\n")
    file.close()

In [36]:
write_plumed_input_rational_d0( \
[[x,y] for x,y in zip(list(df_shortest_distances_protein_DNA_bases['pairA']),list(df_shortest_distances_protein_DNA_bases['pairB']))]\
                            ,list(df_shortest_distances_protein_DNA_bases['dist']), name="specific_protein_DNA_NOD")

In [37]:
write_plumed_input_rational_d0( \
[[x,y] for x,y in zip(list(pairs_binding['pairA']),list(pairs_binding['pairB']))]\
                            ,list(pairs_binding['dist']), name="specific_protein_DNA_plus_protein_protein_NOD")