In [1]:
import networkx as nx
from community import community_louvain as cm

In [2]:
def pc(graph, partition):
    """return a dictionary with {node:participation_coefficent}"""
    #input: partition is a dictionary, {node:module}
    pc_dict = {}
    N = max(partition.values()) + 1
    for v in graph:
        k_is = [0]*N
        for w in graph.neighbors(v):
            k_is[partition[w]] += 1
        pc_dict[v] = 1 - sum(i**2 for i in k_is)/ graph.degree(v)**2
    return pc_dict

In [3]:
# read yeast protein interaction file
g = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)

In [4]:
# thresholding
threshold_score = 750
for edge in g.edges: 
    weight = list(g.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        g.remove_edge(edge[0],edge[1])

In [5]:
#remove essential nodes
file = open("essential_nodes.txt")
for line in file.readlines():
    node = line.strip()
    if node in g:
        g.remove_node(node)
file.close()

In [6]:
#select largest component
components = sorted(nx.connected_components(g), key=len, reverse=True)
g = g.subgraph(components[0])

In [7]:
# louvain partition
partition = cm.best_partition(g, random_state=284)

In [8]:
# calculate participation coefficients
pc_dict = pc(g, partition)

In [9]:
# generate summary
summary = ["Node,Community,Participation Coefficient"]
for node in g:
    summary.append("{},{},{}".format(node, partition[node], pc_dict[node]))

In [10]:
with open("summary.csv", "w") as file:
    file.write("\n".join(summary))

In [11]:
print(g)

Graph with 4396 nodes and 44311 edges
