In [None]:
# Load the python package
import os
# import dynetan
from dynetan.toolkit import *
from dynetan.viz import *
from dynetan.proctraj import *
from dynetan.gencor import *
from dynetan.contact import *
from dynetan.datastorage import *

#from numpy.linalg import norm
from itertools import islice
from scipy import stats

import pandas as pd
import numpy as np
import scipy as sp

In [None]:
dnad = DNAdata()

# Load network data obtained by Dynetan module 

In [None]:
dataDir = "Path_to_data_dir" ## from first notebook output

# Path where results will be written (you may want plots and data files in a new location)
workDir = "Path_to_working_dir(data will be saved)"

fileNameRoot = "from_first_notebook_output" 
fullPathRoot = os.path.join(dataDir, fileNameRoot)

# Define the segID of the Ligand being studied.
ligandSegID = "SYST"
dnad.loadFromFile(fullPathRoot)
dcdVizFile = fullPathRoot + "_reducedTraj.dcd"
pdbVizFile = fullPathRoot + "_reducedTraj.pdb"

workUviz = mda.Universe(pdbVizFile, dcdVizFile)
# We add this to the object for ease of access.
dnad.nodesAtmSel = workUviz.atoms[ dnad.nodesIxArray ]
print(dnad.nodesAtmSel)

A function to calculate pathlengths in terms of edge betweenness:

In [None]:
def getBCsum(List, Indx):
        bc = 0
        # Iterate over edges in the path
        for i in range(len(List)-1):
            node1 = List[i]
            node2 = List[i+1]
            if node1 > node2:
                btw = (dnad.btws[Indx][( node2, node1)])/maximumBetweeness
            else:
                btw = (dnad.btws[Indx][( node1, node2)])/maximumBetweeness
            bc += btw
        return bc

## write down the opt- and sub-opt paths between source and sink:

In [None]:
source = list(range(1187,1191)) ## ## resid list to define source nodes
sink = [1047, 1052, 471, 476] ## resid list to define sink nodes

# Initialize variable with high value.
minimumBetweeness = 100
# Initialize variable with low value.
maximumBetweeness = -1

for pair,btw in dnad.btws[0].items():
    if btw < minimumBetweeness:
            minimumBetweeness = btw
    if btw > maximumBetweeness:
            maximumBetweeness = btw

# Normalize the value.
minimumBetweeness /= maximumBetweeness
# Determine how many extra sub-optimal paths will be written.
numSuboptimalPaths = 5   


pathListFileD = open(os.path.join(workDir, "Name_the_output_containing_all_paths.dat"), "w")

for srcNode in source:
    print(srcNode)
    for trgNode in sink:

        tmpList = getSelFromNode(srcNode,dnad.nodesAtmSel, atom=True).split()
        srcNodeSel = "".join([tmpList[1],tmpList[4],tmpList[10]])

        tmpList = getSelFromNode(trgNode,dnad.nodesAtmSel, atom=True).split()
        trgNodeSel = "".join([tmpList[1],tmpList[4],tmpList[10]])

        for winIndx in range(dnad.numWinds):
            
            normCorMat = copy.deepcopy( dnad.corrMatAll[winIndx,:,:] )
            normCorMat /= normCorMat.max()
            
            allPaths = []
            
            # Reconstructs the optimal path from Floyd-Warshall algorithm
            pathFW = nx.reconstruct_path(srcNode, trgNode, dnad.preds[winIndx])
            allPaths.append(pathFW)

#             Behind the scenes, use Dijkstra algorithm to find sub-optimal paths
            for pathSO in islice(nx.shortest_simple_paths(dnad.nxGraphs[0], 
                                                srcNode, trgNode, weight="dist"), 1, numSuboptimalPaths + 1):
                allPaths.append(pathSO)
    
                    
            ### ---- Write all the paths calculated so far ---- ####      
            
         # Create a counter of number of paths that go though each edge, among all (sub-)optimal path(s).
            pathCounter = defaultdict(int)
            for pathIndx, pathIter in enumerate(allPaths):
                # Iterate over edges in the path
                for i in range(len(pathIter)-1):

                    node1 = pathIter[i]
                    node2 = pathIter[i+1]

                    pathCounter[(node1, node2)] += 1
#                 print(pathCounter)

            # Normalize the count
            maxCount = np.max(list(pathCounter.values()))
            for pair, count in pathCounter.items():
                pathCounter[pair] = count/maxCount

            for pathIndx, pathIter in enumerate(allPaths):
            # Iterate over edges in the path
                for i in range(len(pathIter)-1):

                    node1 = pathIter[i] + 1
                    node2 = pathIter[i+1] + 1

                     # Get the betweeness value
                    try:
                        if node1 > node2:
                            btw = (dnad.btws[winIndx][( node2, node1)])
                        else:
                            btw = (dnad.btws[winIndx][( node1, node2)])
                    except:
                        # If one could not be calculated (very few paths going though this edge)
                         # set an arbitrarily low value.
                        btw = minimumBetweeness


                    string = "{} {} {} {} {} {}".format(node1, node2, 
                                                         normCorMat[ node1, node2], 
                                                         btw/maximumBetweeness, pathCounter[(node1, node2)], 
                                                         pathIndx)

                    pathListFileD.write( string + "\n" )

pathListFileD.close()

## Calculate occurance of nodes across optimal and Sub-optimal pathways

In [None]:
s1 = np.loadtxt(os.path.join(workDir,'file_containing_all_paths.dat'))
print(s1.shape)
print(s1.shape[0])

Occurence of residues in top-ranked paths:

In [None]:
def unique(list1):
     
    # insert the list to the set
    list_set = set(list1)
    # convert the set to the list
    unique_list = (list(list_set))
    return unique_list

source_sink = [1048, 1053, 477, 472, 1187, 1188, 1189, 1190] # Provide the whole list of resids previously defined as sources and sinks
path_ranks = [0, 1, 2, 3] # provide ranks of paths across which you want to calculate the occurence of nodes

def get_occurence(data,output_filename, cutoff=0): ##data = the loaded path data as np matrix
    
    with open(os.path.join(workDir,output_filename), 'w') as h:
        resid1 = []
        for i in list(range(data.shape[0])):
            if data[i,5] in path_ranks:
                resid1.append(int(data[i,1]))
        print("shape of residue list:", len(resid1))
        print("No of unique residues in the list:", len(unique(resid1)))
        print("Unique residues are:", unique(resid1))
        for x in unique(resid1):
            if x not in source_sink: ## Excluding the source and sink resids
                if int(resid1.count(x)) > cutoff: ## filter only those residues with ocuurance > cutoff
                print(x, int(resid1.count(x)), file=h)
                

get_occurence(s1, "nodefrequency.dat")


Sort "nodefrequency.dat" by resid ascending order and load the data

In [None]:
! sort -n -k1 nodefrequency.dat > nodefrequency_sorted.dat
s2 = np.loadtxt(os.path.join(workDir,"nodefrequency_sorted.dat"))

In [None]:
resid = []
for i in list(range(s2.shape[0])):
        resid.append(str(s2[i,0]))
count = []
for i in list(range(s2.shape[0])):
        count.append((s2[i,1]))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'  # high resolution
## Plotting
%matplotlib inline

plt.figure(figsize=(14,7))
plt.bar(resid, count, color ='mediumpurple',width = 0.8, label='Name_your_label')

# plt.axis([1153, 1200, -0.4, 0.4])

plt.xlabel("Residues", fontname = 'Arial', fontsize = 14, fontweight = 'bold')
plt.ylabel("Occurance in Optimal Paths", fontname = 'Arial', fontsize = 14, fontweight = 'bold')
plt.yticks(fontname = 'Arial', fontsize ='large')
plt.xticks(
    rotation=90, 
    horizontalalignment='center',
    fontweight='light',
    fontsize='large'  
)
plt.legend(prop={"size":14}, frameon=False)
plt.savefig(os.path.join(workDir,"nodefrequency_sorted.png"), dpi=600)

## Calculate path-length across all optimal and sub-optimal pathways between pair of source-sink

In [None]:
import numpy as np
s = np.loadtxt(os.path.join(workDir,"file_containing_all_paths.dat"))

subpathID = [0, 1, 2, 3, 4, 5] ## Give ID of path ranks ; if want to get lengths of only the optimal path, subpathID = [0]

def subpathlength(data, path_id_list):
    d = 0
    c = 0
    pathlengths = []
    pathcount = []
    
    for j in subpathID:
        for i in list(range(s.shape[0])):
            if (s[i,5] == j):
                d += s[i,3]
                c += 1
            elif d != 0:
                    pathlengths.append(d)
                    pathcount.append(c)
                    d = 0
                    c = 0
            else:
                d = 0
                c = 0
    print("shape of pathlengths list:", len(pathlengths))
    print("Path lengths:", pathcount)
    print("Average path length (betweenness estimate, pathlength estimate):", (sum(pathlengths) / len(pathlengths)), (sum(pathcount) / len(pathcount)))
    return pathcount

pathcount_sys = subpathlength(s,subpathlength)


Plotting distribution of pathcount:

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'  # high resolution
## Plotting
%matplotlib inline
fig = plt.figure(figsize=(8,6))

import seaborn as sb
ax = sb.kdeplot(pathcount_sys, color="springgreen", shade=True, alpha=0.5, linewidth=1, label = 'Name_your_label')

#Setting the border of the box
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
# plt.xlim(-2, 6)
# plt.ylim(0, 0.35)


plt.xlabel("Opt Path length", fontname = 'Arial', fontsize = 14, fontweight = 'bold')
plt.ylabel("Density", fontname = 'Arial', fontsize = 14, fontweight = 'bold')
plt.xticks(fontname = 'Arial', fontsize = 12)
plt.yticks(fontname = 'Arial', fontsize = 12)
plt.legend(prop={"size":14}, loc = "upper right",frameon=False)
# plt.legend(prop={"size":14}, frameon=False)
plt.tight_layout()
plt.savefig(os.path.join(workDir,"distribution_of_pathcount.png"), dpi=600)

## Calculate inter-domain betweennesses which is a more robust data than intercommunity betweenness data. 

In [None]:
# a function to assign nodes to a specific domain
def search(lst, value):
    for i in range(len(lst)):
        if value in lst[i]:
            return i

# define a list of domain residue ranges
domain = [list(range(167,307)), list(range(494, 710)), list(range(710, 762))+list(range(906, 1096)), list(range(762, 906)), list(range(1451, 1470)), list(range(1362, 1381))] 


def interdomain(domain_list, output_prefix):
    
    # create a 2d np.array to fill domain betweenness
    domain_bet = np.zeros(shape=(len(domain), len(domain)))
    
    for winIndx in range(dnad.numWinds):

        normCorMat = copy.deepcopy( dnad.corrMatAll[winIndx,:,:] )
        normCorMat /= normCorMat.max()

        for pair in np.asarray( np.where( np.triu(normCorMat[:,:]) ) ).T:
            
           ## A running matrix to fill out domain betweenness
            running_dom = np.zeros(shape=(len(domain), len(domain)))
            node1 = pair[0]
            node2 = pair[1]

          # if the nodes doesn't belong to the same domain, calculate the betweenness between them
            max_list = max[item for sublist in domain_list for item in sublist]
            if (node1 < max_list) and (node2 < max_list):
                if search(domain,node1) != search(domain,node2):
                    running_dom[search(domain,node1), search(domain,node2)] = normCorMat[ node1, node2]
                    running_dom[search(domain,node2), search(domain,node1)] = normCorMat[ node1, node2]
                    domain_bet = np.add(domain_bet, running_dom) # updating after each frame

    print(domain_bet)
    domain_bet_round = np.round_(domain_bet, decimals = 3)
    np.save(os.path.join(workDir, output_prefix+".npy"), domain_bet_round)
    np.savetxt(os.path.join(workDir, output_prefix+".csv"), domain_bet_round, fmt='%.3f', delimiter=",")


    
# Example: interdomain(domain,"interdomain_betweenness")

# This section reads the interdomain betweenness matrices and calculate the difference in interdomain betweennesses of 2 comparable systems...

In [None]:
## differential domain betweenness #######
def diff_domain_between(filename1, filename2, outputfilename): #all csv format files e.g.: filename1= interdomain_betweenness_s1.csv
    
    import numpy as np
    from numpy import genfromtxt
    s1 = genfromtxt(os.path.join(workDir, filename1), delimiter=',') # SYS-1
    s2 = genfromtxt(os.path.join(workDir, filename2), delimiter=',') # SYS-2
    
    t1 =  np.zeros(shape=(s1.shape[0], s1.shape[1]))

    for i in range(s1.shape[0]):
        for j in range(s1.shape[1]):
            t1[i,j] = s1[i,j] - s2[i,j]
    print("system-1:", s1)
    print("system-2:", s2)
    print("Difference:", t1)
    np.savetxt(os.path.join(workDir, outputfilename), t1, fmt='%.3f', delimiter=",")

## For read graph and visulaize network

In [None]:
import numpy as np
import pandas as pd

Load the interdomain betweenness data:

In [None]:
interdomain_bet = np.genfromtxt(os.path.join(workDir, "differential_betweenness_filename.csv"), delimiter=",")

print(interdomain_bet)
print(interdomain_bet.shape)

In [None]:
b = np.triu(interdomain_bet)
print(b)

 Specify domain names for which intedomain betweenness is calculated

In [None]:
node_names = ['NTD', 'Helical-1', 'HEPN1-I', 'Helical-2', 'HEPN1-II', 'Linker', 'HEPN2', 'crRNA'] 

Plot:

In [None]:
import matplotlib.pyplot as plt
# %matplotlib inline
# fig, ax = plt.subplots()
# ig.plot(g, target=ax)
import igraph as ig 
from igraph import *

# get the row, col indices of the non-zero elements in your adjacency matrix
conn_indices = np.where(b)

# get the weights corresponding to these indices
weights = b[conn_indices]

# a sequence of (i, j) tuples, each corresponding to an edge from i -> j
edges = zip(*conn_indices)

# initialize the graph from the edge sequence
G = ig.Graph(edges=edges, directed=False)

color_list = []
for i in weights:
    if i < 0:
        color_list.append('red')
    else:
        color_list.append('cyan')
# assign node names and weights to be attributes of the vertices and edges
# respectively
G.vs['label'] = node_names
G.es['color'] = color_list
G.es['weight'] = np.absolute(weights)

# G.es['color'] = color_list

# I will also assign the weights to the 'width' attribute of the edges. this
# means that igraph.plot will set the line thicknesses according to the edge
# weights
G.es['width'] = np.absolute(weights)



# plot the graph, just for fun
# igraph.plot(G, layout="rt", labels=True, margin=80, target=ax)
layout = G.layout("circle")
visual_style = {}
visual_style["vertex_size"] = 70
visual_style["vertex_color"] = "wheat"
visual_style["labels"] = True
visual_style["margin"] = 80
visual_style["layout"] = layout

ig.plot(G,"os.path.join(workDir, "differential_betweenness_filename.png"), **visual_style)