In [220]:
import networkx as nx
from community import community_louvain as cm
import pandas as pd
import numpy as np
import scipy as sp
from scipy.linalg import expm, sinm, cosm

In [221]:
def subgraph(A):
    "This returns the subgraph centrality matrix"
    from numpy import linalg as LA
    w, v = LA.eig(A)
    w=[l/max(w,key=abs) for l in w]
    expdiag=[np.exp(l) for l in w]
    intermediate=np.matmul(v,np.diag(expdiag))
    subgraphmat=np.matmul(intermediate,np.linalg.inv(v))
    subgraphmat=subgraphmat.real
    return subgraphmat;
   

In [223]:
# read yeast protein interaction file
g = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)

In [224]:
# thresholding
threshold_score = 750
for edge in g.edges: 
    weight = list(g.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        g.remove_edge(edge[0],edge[1])

In [225]:
#remove essential nodes
file = open("essential_nodes.txt")
for line in file.readlines():
    node = line.strip()
    if node in g:
        g.remove_node(node)
file.close()

In [226]:
#select largest component
largestcc = max(nx.connected_components(g),key=len)
g = g.subgraph(largestcc)
Adj = nx.to_numpy_array(g)
print('number of nodes of largest connected subgraph of g:',g.number_of_nodes())

number of nodes of largest connected subgraph of g: 4396


In [227]:
# louvain partition
partition = cm.best_partition(g, random_state=284)

In [228]:
modsizes=[]
for i in range(0,max(list(partition.values()))):
    modsizes.append(list(partition.values()).count(i))
print(modsizes)
print('There are',len(modsizes),'modules.')

[8, 101, 192, 233, 421, 200, 309, 317, 248, 32, 327, 10, 295, 146, 145, 548, 207, 41, 377, 85]
There are 20 modules.


In [234]:
len(modsizes)

20

In [238]:
partitems=list(partition.items())
summary = ["Module;Nodes in module"]
for j in range(0,len(modsizes)):
    proteins_in_mod=[]
    for i in range(0,len(partitems)):
        if partitems[i][1]==j:
            proteins_in_mod.append(partitems[i][0])
    string='proteins in mod {} ='.format(j)        
    # generate summary
    summary.append("{};{}".format(j, proteins_in_mod))


In [240]:
with open("communities list.csv", "w") as file:
    file.write("\n".join(summary))

In [230]:
partvals=list(partition.values())
newgnodes=list(g.nodes())
SOD1_index=newgnodes.index('4932.YJR104C')
newgnodes[SOD1_index]
SOD1_module=partvals[SOD1_index]
print('SOD is node',SOD1_index)
print('SOD1 is in module',SOD1_module)
print('Module',SOD1_module,'has',modsizes[SOD1_module],'nodes')

SOD is node 247
SOD1 is in module 7
Module 7 has 317 nodes


In [236]:
SOD1modnodes=[]
for i in range(0,len(partitems)):
    if partitems[i][1]==SOD1_module:
        SOD1modnodes.append([partitems[i][0],i,])

In [232]:
subgraphmat=subgraph(Adj)

In [217]:
# generate subgraph centrality dictionary for proteins in SOD1's module
summary = {}
for entry in SOD1modnodes:
    summary[entry[1]]=subgraphmat[entry[1],SOD1_index]
#Sort in descending subgraph centrality order
summary=["{};{}".format(k,v) for k, v in sorted(summary.items(), key=lambda item: item[1],reverse=True)]

In [219]:
with open("relative subgraph centralities in SOD1 community.csv", "w") as file:
    file.write("\n".join(summary))