In [6]:
import sys
from collections import defaultdict

class MCODE():
    def __init__(self, filename, weight_threshold=0.2):
        self.weight_threshold = 1 - weight_threshold
        self.filename = filename
        self.clusters = []
        
    def cluster(self):
        edges = defaultdict(set)

        # Citanje mreze
        with open(self.filename, 'r') as f:
            for line in f:
                pair = line.split()
                a = pair[0]
                b = pair[2]
                edges[a].add(b)
                edges[b].add(a)
        print ('## Input graph loaded; %i nodes' % (len(edges),))

        # Mreza klastera
        clusters = []

        # Faza 1: Odredjivanje tezina cvorova
        print ('## Weighting vertices...')
        weights = dict((v, 1.) for v in edges)
        for i, v in enumerate(edges):
            neighborhood = set((v,)) | edges[v]
            # Ako cvor ima samo jednog suseda, vec znamo sve sto treba da znamo
            if len(neighborhood) <= 2:
                continue

            # Provera da li veci k-core postoji
            k = 1  # Najveci validni k-core
            while neighborhood:
                k_core = neighborhood.copy()
                invalid_nodes = True
                while invalid_nodes and neighborhood:
                    invalid_nodes = set(n for n in neighborhood if len(
                        edges[n] & neighborhood) <= k)
                    neighborhood -= invalid_nodes
                k += 1  # na kraju, k ce biti za jedan veci od onoga sto zelimo

            # tezina cvora = k-core broj * gustina od k-core
            weights[v] = (k - 1) * (sum(len(edges[n] & k_core)
                                        for n in k_core) / (2. * len(k_core)**2))

        # Faza 2: Predikcija Molekularnih Kompleksa
        print('## Molecular complex prediction...')
        unvisited = set(edges)
        num_clusters = 0

        for seed in sorted(weights, key=weights.get, reverse=True):
            if seed not in unvisited:
                continue

            cluster, frontier = set((seed,)), set((seed,))
            w = weights[seed] * self.weight_threshold
            while frontier:
                cluster.update(frontier)
                unvisited -= frontier
                frontier = set(n for n in set.union(
                    *(edges[n] for n in frontier)) & unvisited if weights[n] > w)

            # Odsecanje: samo sacuvati 2-core komplekse
            invalid_nodes = True
            while invalid_nodes and cluster:
                invalid_nodes = set(
                    n for n in cluster if len(edges[n] & cluster) < 2)
                cluster -= invalid_nodes

            if cluster:
                print (' '.join(cluster))
                num_clusters += 1
                print (num_clusters, len(cluster), seed)
                clusters.append(cluster)

        self.clusters = clusters

    def save_clusters(self, file):
        with open(file, 'w') as fh:
            for c in self.clusters:
                fh.write(' '.join(c) + "\n")
                


In [7]:
mcode = MCODE("files/original_network.sif")
mcode.cluster()
mcode.save_clusters("files/modules_from_original_network.txt")

## Input graph loaded; 6008 nodes
## Weighting vertices...
## Molecular complex prediction...
RL7A_HUMAN RL5_HUMAN RS26_HUMAN RS13_HUMAN NR2C2_HUMAN RL12_HUMAN RS23_HUMAN RL23A_HUMAN CUL3_HUMAN RSSA_HUMAN RS14_HUMAN RLA1_HUMAN RS6_HUMAN RS3A_HUMAN RL7_HUMAN RL4_HUMAN PRKN_HUMAN UBL4A_HUMAN FACD2_HUMAN RS9_HUMAN RS16_HUMAN RL21_HUMAN RL27_HUMAN RL15_HUMAN RS5_HUMAN FINC_HUMAN RL14_HUMAN BIRC3_HUMAN RL9_HUMAN RS27_HUMAN HMGB1_HUMAN CSN5_HUMAN FBXW7_HUMAN RS25_HUMAN RL31_HUMAN AAR2_HUMAN NOP56_HUMAN RS21_HUMAN RL36_HUMAN RC3H1_HUMAN RL38_HUMAN FZR1_HUMAN RS28_HUMAN RL10A_HUMAN RS2_HUMAN TIP_HUMAN RL37A_HUMAN WWP2_HUMAN RL6_HUMAN RL11_HUMAN RL30_HUMAN RL32_HUMAN HEXI1_HUMAN BTF3_HUMAN KIF23_HUMAN PP1G_HUMAN MEPCE_HUMAN RS15_HUMAN RL24_HUMAN PIHD1_HUMAN PAN2_HUMAN RL17_HUMAN KIF14_HUMAN RS19_HUMAN RL13A_HUMAN RL10_HUMAN RS3_HUMAN RLA0_HUMAN ECT2_HUMAN CUL1_HUMAN RS18_HUMAN RL18A_HUMAN RL35A_HUMAN CTRO_HUMAN RS8_HUMAN RL27A_HUMAN PRC1_HUMAN RL19_HUMAN UFL1_HUMAN RL23_HUMAN TRI25_HUMAN RL35_H