In [1]:
import networkx as nx

DATA_FILENAME = '/home/sami/py-graph/data/lesmis.gml'

print("Loading graph data...")
U = nx.read_gml(DATA_FILENAME, label='id')
G = U.to_directed()

print("Nodes: {}".format(G.number_of_nodes()))
print("Edges: {}".format(G.number_of_edges()))

Loading graph data...
Nodes: 77
Edges: 508


In [2]:
%load_ext Cython
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
%%cython
import numpy as np
cimport cython
from shared import fixed_width_print

cdef int UNMAPPED = -1

def linear_deterministic_greedy(int[:,::] edges,
                                float[::] edge_weights,
                                float[::] node_weights,
                                int num_partitions,
                                int[::] partition,
                                int[::] fixed,
                                int _print):
    """
    This algorithm favors a cluster if it has many neighbors of a node, but
    penalizes the cluster if it is close to capacity.
    
    edges: An [:,2] array of edges.
    num_nodes: The number of nodes in the graph.
    num_partitions: How many partitions we are breaking the graph into.
    partition: The partition from a previous run. Used for restreaming.

    Returns: A new partition.
    """
    # The output partition
    cdef int num_nodes = len(node_weights)
    
    if partition is None:
        partition = np.repeat(np.int32(UNMAPPED), num_nodes)
    if fixed is None:
        fixed = np.repeat(np.int32(UNMAPPED), num_nodes)
    elif len(partition) < num_nodes:
        print("THIS IS BAD AND MAY BREAK")
        print("len(partitions) = {}, num_nodes = {}".format(len(partition),num_nodes))

    cdef float[::] partition_sizes = np.zeros(num_partitions, dtype=np.float32)
         
    cdef float[::] partition_votes = np.zeros(num_partitions, dtype=np.float32)
    
    # Fine to be a little off, to stay integers
    cdef float partition_capacity = num_nodes / num_partitions
    
    cdef int last_left = edges[0,0]
    cdef int i = 0
    cdef int left = 0
    cdef int right = 0
    cdef int arg = 0
    cdef int max_arg = 0
    cdef float max_val = 0
    cdef float val = 0
    cdef int len_edges = len(edges)

    #_print = False

    for i in range(len_edges):
        left = edges[i,0]
        right = edges[i,1]

        if last_left != left:
            if fixed[last_left] != UNMAPPED:
                if _print:
                    print("Skipping node {}".format(last_left))
                partition_votes[:] = 0
                partition_sizes[partition[last_left]] += node_weights[last_left]
                last_left = left

            else:
                # We have found a new node so assign last_left to a partition

                # Calculate available space in each partition, multiply that by partition_votes to get max_val
                #max_arg: most likely partition asssignment, ie. the partition with enough space available
                #         and the highest number of edges (relationships to other nodes)
                #max_val: current highest value of votes against remaining capacity
                #partition_votes: the number of right nodes in a partition related to current left node

                if _print:
                    print("Assigning node {}".format(last_left))

                max_arg = 0
                max_val = (partition_votes[0]) * (
                           partition_capacity - partition_sizes[0])
                if _print:
                    print("\tP{} = {} x ({} - {}) = {}".format(0,
                                                               partition_votes[0],
                                                               partition_capacity,
                                                               partition_sizes[0],
                                                               max_val))

                for arg in range(1, num_partitions):
                    val = (partition_votes[arg]) * (
                           partition_capacity - partition_sizes[arg])

                    if _print:
                        print("\tP{} = {} x ({} - {}) = {}".format(arg,
                                                                   partition_votes[arg],
                                                                   partition_capacity,
                                                                   partition_sizes[arg],
                                                                   val))
                    if val > max_val:
                        max_arg = arg
                        max_val = val

                if max_val == 0:
                    max_arg = arg
                    # No neighbors (or multiple maxed out) so "randomly" select
                    # the smallest partition
                    for arg in range(i % num_partitions, num_partitions):
                        if partition_sizes[arg] < partition_capacity:
                            max_arg = arg
                            max_val = 1
                            break
                    if max_val == 0:
                        for arg in range(0, i % num_partitions):
                            if partition_sizes[arg] < partition_capacity:
                                max_arg = arg
                                break

                partition_sizes[max_arg] += node_weights[left]
                partition[last_left] = max_arg
                #fixed[last_left] = 1
                partition_votes[:] = 0
                last_left = left

                if _print:
                    fixed_width_print(np.asarray(partition))
                    fixed_width_print(np.asarray(fixed))
            
        if partition[right] != UNMAPPED:
            partition_votes[partition[right]] += edge_weights[i]

    # Clean up the last assignment    
    if _print:
        print("last one")

    if fixed[left] == UNMAPPED:
        max_arg = 0
        max_val = 0
        for arg in range(0, num_partitions):
            if partition_sizes[arg] < partition_capacity:
                val = (partition_votes[arg]) * (
                        1 - partition_sizes[arg] / partition_capacity)

                if _print:
                    print("\tP{} = {} x (1 - {} / {}) = {}".format(arg,
                                                                   partition_votes[arg],
                                                                   partition_sizes[0],
                                                                   partition_capacity,
                                                                   val))

                if val > max_val:
                    max_arg = arg
                    max_val = val

        partition[left] = max_arg
        #fixed[left] = 1

    return (np.asarray(partition), np.asarray(fixed))

In [4]:
edges = np.array(G.edges(), dtype=np.int32)
edge_weights = np.array([x[2]['weight'] for x in G.edges(data=True)], dtype=np.float32)
node_weights = np.array([x[1]['weight'] for x in G.nodes(data=True)], dtype=np.float32)

# run first pass - this is our initial seed
assignments = None
fixed = None
for i in range(1):
    assignments, fixed = linear_deterministic_greedy(edges, edge_weights, node_weights, 4, assignments, fixed, 0)

print("Initial assignment seed:")
fixed_width_print(assignments)
fixed_width_print(fixed)
print("\n")

# Order of people arriving
#arrivals = [4, 6, 7, 1, 2, 0, 3, 5, 8]
arrivals = list(range(0, G.number_of_nodes()))
#random.shuffle(arrivals)

print("People start arriving:")
for a in arrivals:
    #fixed_width_print(assignments)
    #fixed_width_print(fixed)    
    
    #print("Person {} -> shelter {}".format(a, assignments[a]))

    fixed[a] = 1
    
    for i in range(15):
        # restream non-fixed assignments
        assignments, fixed = linear_deterministic_greedy(edges, edge_weights, node_weights, 4, assignments, fixed, 0)

print("Final assignments:")
fixed_width_print(assignments)
fixed_width_print(fixed)

print("Partitions:")
print(len([x for x in assignments if x == 0]))
print(len([x for x in assignments if x == 1]))
print(len([x for x in assignments if x == 2]))
print(len([x for x in assignments if x == 3]))

for i in range(0, len(assignments)):
    G.add_nodes_from([i], partition=str(assignments[i]))

nx.write_gml(G, "test.gml")

Initial assignment seed:
[ 2  2  2  2  2  2  2  2  2  2  0  2  2  2  2  2  2  2  2  2  2  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  3  0  0  3  0  3  3  3  3  3  3  3  3  3  3  3  3  0  0  0  0  0  0  0  0  0  0  0  3  3  0  3  3  3  0  0  0]
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]


People start arriving:
Final assignments:
[ 2  2  2  2  2  2  2  2  2  2  3  3  1  3  3  3  1  1  1  1  1  1  1  1  0  0  0  0  3  3  3  3  3  0  3  3  3  3  3  0  0  0  0  0  3  3  1  1  1  0  0  0  0  0  0  1  1  1  1  1  0  1  0  1  3  0  3  2  2  2  2  2  2  2  2  2  0]
[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1]