In [20]:
import networkx as nx
from community import community_louvain as cm
import numpy as np
import scipy as sp
from numpy import linalg as la

In [21]:
def participation_coefficient(graph, partition):
    """return a dictionary with {node:participation_coefficent}"""
    #input: partition is a dictionary, {node:module}
    pc_dict = {}
    N = max(partition.values()) + 1
    for v in graph:
        k_is = [0]*N
        for w in graph.neighbors(v):
            k_is[partition[w]] += 1
        pc_dict[v] = 1 - sum(i**2 for i in k_is)/ graph.degree(v)**2
    return pc_dict

In [22]:
def proximity_to_sod1(g):
    d = {}
    for node in g:
        d[node] = nx.shortest_path_length(g, '4932.YJR104C', node)
    return d

In [23]:
def sc_matrix(g):
    """Return the subgraph centrality matrix"""
    A = nx.to_numpy_array(g) # adjacency matrix
    w, v = la.eig(A)
    w=[l/max(w,key=abs) for l in w]
    expdiag=[np.exp(l) for l in w]
    intermediate=np.matmul(v,np.diag(expdiag))
    subgraphmat=np.matmul(intermediate,np.linalg.inv(v))
    subgraphmat=subgraphmat.real
    return subgraphmat;

In [24]:
def relative_subgraph_centrality(g):
    rsc = {}
    A = sc_matrix(g)
    i = 0
    s = list(g.nodes()).index('4932.YJR104C') #index of sod1 (247 apparently)
    for node in g:
        rsc[node] = A[i, s]
        i += 1
    return rsc

In [25]:
# read yeast protein interaction file
g = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)

In [26]:
# thresholding
threshold_score = 750
for edge in g.edges: 
    weight = list(g.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        g.remove_edge(edge[0],edge[1])

In [27]:
#remove essential nodes
file = open("essential_nodes.txt")
for line in file.readlines():
    node = line.strip()
    if node in g:
        g.remove_node(node)
file.close()

In [28]:
#select largest component
components = sorted(nx.connected_components(g), key=len, reverse=True)
g = g.subgraph(components[0])

In [29]:
# louvain partition
partition = cm.best_partition(g, random_state=284)

In [30]:
# calculate participation coefficients
pc = participation_coefficient(g, partition)

In [31]:
# calculate subgraph centralities
sc = nx.subgraph_centrality(g)

In [32]:
# calculate proximites to SOD1
prox = proximity_to_sod1(g)

In [33]:
# calculate relative subgraph centralities
rsc = relative_subgraph_centrality(g)

In [36]:
# generate summary
summary = ["Node,Community,Proximity to SOD1,Relative subgraph centrality,Subgraph centrality,Participation coefficient"]
for node in g:
    summary.append("{},{},{},{},{},{}".format(node, partition[node], prox[node], rsc[node], sc[node], pc[node]))

In [37]:
#write summary to file
with open("summary.csv", "w") as file:
    file.write("\n".join(summary))