In [1]:
import numpy as np
import pandas as pd
import sklearn.preprocessing
import markov_clustering as mc
np.set_printoptions(precision=1,suppress=True)

In [2]:
mail = pd.read_csv('mails.csv')

source_set = set(mail['source'])
target_set = set(mail['target'])
num_people = len(source_set.union(target_set))

G = np.zeros((num_people,num_people))
G_no_weights = np.zeros((num_people,num_people))
G_no_weights = G.copy()
for i, m in mail.iterrows():
    G[m.source,m.target] = m.weight
    G[m.target,m.source] = m.weight
    G_no_weights[m.source,m.target] = 1
    G_no_weights[m.target,m.source] = 1

In [3]:
class MCL:
    def __init__(self, G, e, r, loop_value=1, verbose=False):
        self.T = G
        np.fill_diagonal(self.T, loop_value)
        self.e = e
        self.r = r
        self.verbose = verbose
        
    def normalize(self):
        self.T = sklearn.preprocessing.normalize(self.T, norm='l1', axis=0)
        
    def inflate(self):
        self.T = np.power(self.T, self.r)
    
    def expand(self):
        self.T = np.linalg.matrix_power(self.T, self.e)
    
    def converged(self):
        return np.allclose(self.T, self.G)
        
    def run(self):
        if self.verbose:
            print("="*30)
            print("Inflation Factor:", self.r)
            print("Expansion Factor:", self.e)
            print("Input Graph:")
            print(self.T)
            print("="*30)
        
        self.normalize()
        self.G = self.T.copy()
        while True:
            self.expand()
            self.inflate()
            self.normalize()
            if self.converged():
                break
            self.G = self.T.copy()
    
    def get_clusters(self):
        clusters = set()
        for row in set(self.T.nonzero()[0]):
            clusters.add(tuple(self.T[row].nonzero()[0]))
        return clusters
        
    def get_matrix(self):
        return self.T
    
    def to_json(self):
        export = []
        for row in range(self.T.shape[0]):
            for column in range(self.T.shape[1]):
                if self.T[row, column] > 1e-4:
                    export.append([row, column, self.T[row, column]])
        export = pd.DataFrame(export,columns=['source', 'target', 'weight'])
        return export.to_json(orient='records')
        

In [4]:
M = MCL(G, 2, 3.1) # optimal modularity
M.run()
print(M.get_clusters())
print(M.to_json())

{(4, 9, 10, 11, 12, 13, 14), (3, 6, 7, 8), (0, 1, 2), (5, 15, 17), (15, 16)}
[{"source":2,"target":0,"weight":1.0},{"source":2,"target":1,"weight":1.0},{"source":2,"target":2,"weight":1.0},{"source":3,"target":3,"weight":1.0},{"source":3,"target":6,"weight":1.0},{"source":3,"target":7,"weight":1.0},{"source":3,"target":8,"weight":1.0},{"source":4,"target":4,"weight":1.0},{"source":4,"target":9,"weight":1.0},{"source":4,"target":10,"weight":1.0},{"source":4,"target":11,"weight":1.0},{"source":4,"target":12,"weight":1.0},{"source":4,"target":13,"weight":1.0},{"source":4,"target":14,"weight":1.0},{"source":5,"target":5,"weight":1.0},{"source":5,"target":17,"weight":1.0},{"source":16,"target":15,"weight":1.0},{"source":16,"target":16,"weight":1.0}]


In [5]:
M = MCL(G_no_weights, 2, 5.0) # optimal modularity
M.run()
print(M.get_clusters())
print(M.to_json())

{(3, 6, 7, 8), (0, 1, 2), (4, 12), (5, 15, 16, 17), (9, 10, 11, 12, 13, 14)}
[{"source":2,"target":0,"weight":1.0},{"source":2,"target":1,"weight":1.0},{"source":2,"target":2,"weight":1.0},{"source":4,"target":4,"weight":1.0},{"source":7,"target":3,"weight":1.0},{"source":7,"target":6,"weight":1.0},{"source":7,"target":7,"weight":1.0},{"source":7,"target":8,"weight":1.0},{"source":9,"target":9,"weight":0.5},{"source":9,"target":10,"weight":0.5},{"source":9,"target":11,"weight":0.5},{"source":9,"target":12,"weight":0.5},{"source":9,"target":13,"weight":0.5},{"source":9,"target":14,"weight":0.5},{"source":10,"target":9,"weight":0.5},{"source":10,"target":10,"weight":0.5},{"source":10,"target":11,"weight":0.5},{"source":10,"target":12,"weight":0.5},{"source":10,"target":13,"weight":0.5},{"source":10,"target":14,"weight":0.5},{"source":16,"target":5,"weight":1.0},{"source":16,"target":15,"weight":1.0},{"source":16,"target":16,"weight":1.0},{"source":16,"target":17,"weight":1.0}]
