In [4]:
import networkx as nx
from community import community_louvain as cm
import numpy as np
import scipy as sp
from numpy import linalg as la

In [5]:
def participation_coefficient(graph, partition):
    """return a dictionary with {node:participation_coefficent}"""
    #input: partition is a dictionary, {node:module}
    pc_dict = {}
    N = max(partition.values()) + 1
    for v in graph:
        k_is = [0]*N
        for w in graph.neighbors(v):
            k_is[partition[w]] += 1
        pc_dict[v] = 1 - sum(i**2 for i in k_is)/ graph.degree(v)**2
    return pc_dict

In [6]:
def proximity_to_sod1(g):
    d = {}
    for node in g:
        d[node] = nx.shortest_path_length(g, '4932.YJR104C', node)
    return d

In [7]:
def sc_matrix(g):
    """Return the subgraph centrality matrix"""
    A = nx.to_numpy_array(g) # adjacency matrix
    w, v = la.eig(A)
    w=[l/max(w,key=abs) for l in w]
    expdiag=[np.exp(l) for l in w]
    intermediate=np.matmul(v,np.diag(expdiag))
    subgraphmat=np.matmul(intermediate,np.linalg.inv(v))
    subgraphmat=subgraphmat.real
    return subgraphmat;

In [8]:
def subgraph_centralities(g):
    """Return both the regular and relative subgraph centralities"""
    sc = {}
    rsc = {}
    A = sc_matrix(g)
    i = 0
    s = list(g.nodes()).index('4932.YJR104C') #index of sod1 (247 apparently)
    for node in g:
        sc[node] = A[i, i]
        rsc[node] = A[i, s]
        i += 1
    return sc, rsc

In [9]:
# read yeast protein interaction file
g = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)

In [10]:
# thresholding
threshold_score = 750
for edge in g.edges: 
    weight = list(g.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        g.remove_edge(edge[0],edge[1])

In [11]:
#remove essential nodes
file = open("essential_nodes.txt")
for line in file.readlines():
    node = line.strip()
    if node in g:
        g.remove_node(node)
file.close()

In [12]:
#select largest component
components = sorted(nx.connected_components(g), key=len, reverse=True)
g = g.subgraph(components[0])

In [13]:
# louvain partition
partition = cm.best_partition(g, random_state=284)

In [14]:
# calculate participation coefficients
pc = participation_coefficient(g, partition)

In [16]:
# calculate proximites to SOD1
prox = proximity_to_sod1(g)

In [17]:
# calculate relative subgraph centralities
sc, rsc = subgraph_centralities(g)

In [18]:
# generate summary
summary = ["Node,Community,Proximity to SOD1,Relative subgraph centrality,Subgraph centrality,Participation coefficient"]
for node in g:
    summary.append("{},{},{},{},{},{}".format(node, partition[node], prox[node], rsc[node], sc[node], pc[node]))

In [19]:
#write summary to file
with open("summary.csv", "w") as file:
    file.write("\n".join(summary))