In [1]:
import sys
from collections import defaultdict

class MCODE():
    def __init__(self, filename, weight_threshold=0.2):
        self.weight_threshold = 1 - weight_threshold
        self.filename = filename
        self.clusters = []

    def cluster(self):
        edges = defaultdict(set)

        # Read edgelist
        with open(self.filename, 'r') as f:
            for line in f:
                a, b = line.split()[:2]
                edges[a].add(b)
                edges[b].add(a)
        print ('## Input graph loaded; %i nodes' % (len(edges),))

        # Clusters list
        clusters = []

        # Stage 1: Vertex Weighting
        print ('## Weighting vertices...')
        weights = dict((v, 1.) for v in edges)
        for i, v in enumerate(edges):
            neighborhood = set((v,)) | edges[v]
            # if node has only one neighbor, we know everything we need to know
            if len(neighborhood) <= 2:
                continue

            # see if larger k-cores exist
            k = 1  # highest valid k-core
            while neighborhood:
                k_core = neighborhood.copy()
                invalid_nodes = True
                while invalid_nodes and neighborhood:
                    invalid_nodes = set(n for n in neighborhood if len(
                        edges[n] & neighborhood) <= k)
                    neighborhood -= invalid_nodes
                k += 1  # on exit, k will be one greater than we want

            # vertex weight = k-core number * density of k-core
            weights[v] = (k - 1) * (sum(len(edges[n] & k_core)
                                        for n in k_core) / (2. * len(k_core)**2))

        # Stage 2: Molecular Complex Prediction
        print('## Molecular complex prediction...')
        unvisited = set(edges)
        num_clusters = 0

        for seed in sorted(weights, key=weights.get, reverse=True):
            if seed not in unvisited:
                continue

            cluster, frontier = set((seed,)), set((seed,))
            w = weights[seed] * self.weight_threshold
            while frontier:
                cluster.update(frontier)
                unvisited -= frontier
                frontier = set(n for n in set.union(
                    *(edges[n] for n in frontier)) & unvisited if weights[n] > w)

            # Haircut: only keep 2-core complexes
            invalid_nodes = True
            while invalid_nodes and cluster:
                invalid_nodes = set(
                    n for n in cluster if len(edges[n] & cluster) < 2)
                cluster -= invalid_nodes

            if cluster:
                print (' '.join(cluster))
                num_clusters += 1
                print (num_clusters, len(cluster), seed)
                clusters.append(cluster)

        self.clusters = clusters

    def save_clusters(self, filehandle):
        with open(filehandle, 'w') as fh:
            for c in self.clusters:
                fh.write(' '.join(c) + "\n")
                
