In [None]:
import random
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from collections import namedtuple
from scipy.sparse import csr_array
import graphblas as gb
from graphblas import Matrix, Vector, Scalar
from graphblas import dtypes
from graphblas import unary, binary, monoid, semiring

# Louvain Community Detection

https://en.wikipedia.org/wiki/Louvain_modularity

This will return a list of progressively smaller graphs, sort of like zooming out in a map and having houses collapse into a neighborhood and neighborhoods collapse into cities

In [None]:
class AdjMatrix:
    def __init__(self, adj):
        # Pre-define memory needed by GraphBLAS computations
        nn = adj.nrows
        nc = nn + 1  # comms should always be (n+1 x n)
        self.adj = adj
        self.nn = nn
        self.nc = nc
        self.total_links_doubled = adj.reduce_scalar(monoid.plus, allow_empty=False).new()
        self.community_tmp = Matrix(adj.dtype, nrows=nc, ncols=nn)
        self.community = Matrix(adj.dtype, nrows=nc, ncols=nc)
        self.diag_mask = Matrix.from_coo(
            range(nc), range(nc), [True] * nc, nrows=nc, ncols=nc, dtype=dtypes.BOOL
        )
        self.diag_matrix = Matrix(adj.dtype, nrows=nc, ncols=nc)
        self.diag_vector = Vector(adj.dtype, size=nc)
        self.modularity = Vector(dtypes.FP64, size=nc)
        self.stored_community = Vector(dtypes.INT64, size=nc)
        self.beyond_last_index = nn
        self.beyond_last = Vector.from_coo([self.beyond_last_index], [1], size=nc)
        self.ki_all = Vector(dtypes.FP64, size=nn)
        self.ki_all << adj.reduce_columnwise(monoid.plus)
        self.sigma_total = Vector(dtypes.FP64, size=nc)
        self.ki_in = Vector(dtypes.FP64, size=nc)
        self.max_modularity_delta = Scalar(dtypes.FP64)
        self.max_mask = Vector(dtypes.BOOL, size=nc)
        self._ident_comms = None

    def _get_comms_identity(self):
        # Build the identity matrix, assigning each node to its own community
        if self._ident_comms is None:
            nn, nc = self.nn, self.nc
            self._ident_comms = Matrix.from_coo(range(nn), range(nn), [1] * nn, nrows=nc, ncols=nn)
        return self._ident_comms

    def compute_modularity(self, comms=None):
        """
        Given adjacency matrix (nxn) and community matrix (cxn), compute the modularity metric
        """
        adj = self.adj
        if comms is None:
            comms = self._get_comms_identity()

        community = self.community
        modularity = self.modularity

        self.community_tmp << comms.mxm(adj)
        community << self.community_tmp.mxm(comms.T)
        self.diag_matrix(self.diag_mask.S) << community
        self.diag_vector << self.diag_matrix.reduce_columnwise()  # Aij
        modularity << community.reduce_columnwise(binary.plus)  # ki
        modularity << modularity.ewise_mult(modularity, binary.times)  # ki^2
        modularity << modularity.apply(binary.truediv, right=self.total_links_doubled)  # ki^2/2m
        modularity << modularity.apply(binary.times, left=-1)  # -ki^2/2m
        modularity << self.diag_vector.ewise_add(modularity, monoid.plus)  # Aij - ki^2/2m
        modularity << modularity.apply(
            binary.truediv, right=self.total_links_doubled
        )  # (Aij - ki^2/2m) / 2m
        result = modularity.reduce(
            monoid.plus, allow_empty=False
        ).new()  # (1/2m)*sum(Aij - ki^2/2m)

        return result

    def find_best_community(self, node, comms):
        """
        Updates comms (community grouping) for node to reach max modularity
        Returns True if node changed community. False otherwise.
        """
        adj = self.adj
        nc = self.nc
        community = self.community
        sigma_total = self.sigma_total
        # Save current modularity score in current community
        self.stored_community << comms[:, node]
        current_community_index = self.stored_community.to_coo()[0][0]
        orig_modularity_score = self.compute_modularity(comms)

        # Move node to its own community
        comms[:, node] << self.beyond_last
        baseline_modularity_score = self.compute_modularity(comms)

        # Compute modularity improvements for each neighbor
        total_links_doubled = self.total_links_doubled.value
        ki = self.ki_all[node].value
        self.community_tmp << comms.mxm(adj)
        community << self.community_tmp.mxm(comms.T)
        sigma_total << community.reduce_columnwise(monoid.plus)
        self.ki_in(~self.beyond_last.S, replace=True) << self.beyond_last.vxm(community)
        # Compute: delta = 2*ki_in/total_links_doubled - 2*sigma_total*ki/total_links_doubled^2
        delta = self.ki_in
        delta << delta.apply(binary.times, right=2 / total_links_doubled)
        sigma_total << sigma_total.apply(binary.times, right=-2 * ki / total_links_doubled**2)
        delta << delta.ewise_mult(sigma_total, binary.plus)

        # Choose best neighbor
        max_modularity_delta = self.max_modularity_delta
        max_modularity_delta << delta.reduce(monoid.max, allow_empty=False)

        # If modularity is improved, update comms and return True
        if (
            max_modularity_delta.value
            > orig_modularity_score.value - baseline_modularity_score.value
        ):
            self.max_mask << delta.apply(binary.eq, right=max_modularity_delta)
            delta(self.max_mask.V, replace=True) << delta  # eliminate all but the max value(s)
            indexes, vals = delta.to_coo()
            best_community_index = indexes[0]
            # Guard against reassigning a node to its existing community
            if (
                best_community_index != current_community_index
                and best_community_index != self.beyond_last_index
            ):
                self.stored_community << comms[:, best_community_index]
                comms[:, node] << self.stored_community
                return True

        # If modularity isn't improved, reset and return False
        comms[:, node] << self.stored_community
        return False

    def optimize_communities(self, max_iter=20):
        """
        Given an adjacency matrix `adj`, returns a compact community mapping of size cxn
        where c is the number of communities and n is the number of nodes in the graph (i.e. adj is nxn)
        The community mapping is done by repeatedly iterating over the nodes one by one
        to find the best community (defined as maximizing the modularity).
        Once an full pass over the nodes yields no changes, this will return.
        If max_iter is reached, it will also return.
        """
        adj = self.adj
        comms = self._get_comms_identity().dup()
        for i_iter in range(max_iter):
            nodes = list(range(self.nn))
            random.shuffle(nodes)
            comms_modified = False
            for node in nodes:
                node_moved = self.find_best_community(node, comms)
                if node_moved:
                    comms_modified = True
            if not comms_modified:
                break

        # Compact comms
        rows, cols, vals = comms.to_coo()
        nonzero_rows = list(sorted(set(rows)))
        compact_comms = Matrix(comms.dtype, nrows=len(nonzero_rows), ncols=self.nn)
        compact_comms << comms[nonzero_rows, :]

        return compact_comms

In [None]:
LouvainResult = namedtuple("LouvainResult", ["adj", "cmap", "modscore"])


def louvain_levels(adj, max_iter=20):
    """
    Returns a list of LouvainResult -- a namedtuple with
    - adj: adjacency matrix
    - cmap: community map matrix
    - modscore: modularity score
    Each item in the returned list represents one pass through the Louvain community detection algorithm.
    The size of the adjacency matrix should shrink while the modularity score should increase.
    """
    results = []
    adj = AdjMatrix(adj)
    while True:
        modscore = adj.compute_modularity()
        comms = adj.optimize_communities(max_iter)
        results.append(LouvainResult(adj.adj, comms, modscore))
        # Exit criteria: number of communities did not decrease
        if comms.nrows >= adj.nn:
            break
        # Compress the adjacency graph
        nc = comms.nrows
        prev_adj = adj.adj
        adj_tmp = Matrix(prev_adj.dtype, nrows=nc, ncols=prev_adj.nrows)
        adj = Matrix(prev_adj.dtype, nrows=nc, ncols=nc)
        adj_tmp << comms.mxm(prev_adj)
        adj << adj_tmp.mxm(comms.T)
        adj = AdjMatrix(adj)

    return results

## Example

- Nodes 0, 1, 3, 4 are fully connected
- Nodes 2, 5, 6 are fully connected
- There is a single connection between nodes 2 and 4 to connect the two groups
- All edges have a weight of 1

In [None]:
m = np.array(
    [
        [0, 1, 0, 1, 1, 0, 0],
        [1, 0, 0, 1, 1, 0, 0],
        [0, 0, 0, 0, 1, 1, 1],
        [1, 1, 0, 0, 1, 0, 0],
        [1, 1, 1, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 1],
        [0, 0, 1, 0, 0, 1, 0],
    ]
)
g = Matrix.from_dense(m, missing_value=0)

In [None]:
g

In [None]:
gb.viz.draw(g)

In [None]:
# Common solutions:
# - 3 levels with modscore=[-0.07, 0.346, 0.392]
# - 3 levels with modscore=[-0.07, 0.339, 0.375]
# - 2 levels with modscore=[-0.15, 0.355]
l = louvain_levels(g)
l

In [None]:
# This is the community mapping of the original nodes (nodes on the top, communities down the side)
l[0].cmap

In [None]:
# This is the new adjacency matrix after collapsing communities into nodes
l[1].adj

In [None]:
l[1].modscore

## Example

https://neo4j.com/docs/graph-algorithms/current/algorithms/louvain/#algorithms-louvain-examples-stream-intermediate

If the results don't match the website's clustering, try re-running.

In [None]:
m = np.array(
    [
        [0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1],
        [1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0],
        [0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    ]
)
g = Matrix.from_dense(m, missing_value=0)

In [None]:
# Common solutions:
# - 2 levels with modscore=[-0.072, 0.3768]
# - 3 levels with modscore=[-0.072, 0.376, 0.3816]
# - 3 levels with modscore=[-0.072, 0.3296, 0.3784]
l = louvain_levels(g)
l

In [None]:
l[0].cmap

In [None]:
l[1].adj

In [None]:
l[1].cmap

In [None]:
l[-1].adj

In [None]:
l[-1].modscore

## Example

Figure 1 from https://arxiv.org/pdf/0803.0476.pdf

If the results don't match the paper, try running it again. Because the algorithm is non-deterministic, sometimes the communities don't match the results in the paper. But often they will.

In [None]:
m = np.array(
    [
        [0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
        [0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    ]
)
g = Matrix.from_dense(m, missing_value=0)

In [None]:
# Common solutions:
# - 3 levels with modscore=[-0.071, 0.346, 0.392]
# - 3 levels with modscore=[-0.072, 0.339, 0.375]
l = louvain_levels(g)
l

In [None]:
l[1].adj

In [None]:
l[-1].adj