In [1]:
import networkx as nx

DATA_FILENAME = '/home/sami/py-graph/data/lesmis.gml'

print("Loading graph data...")
U = nx.read_gml(DATA_FILENAME, label='id')
G = U.to_directed()

print("Nodes: {}".format(G.number_of_nodes()))
print("Edges: {}".format(G.number_of_edges()))

Loading graph data...
Nodes: 77
Edges: 508


In [2]:
%load_ext Cython
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
%%cython
import numpy as np
from shared import fixed_width_print

cdef int UNMAPPED = -1

def fennel(int[:,::] edges,
           float[::] edge_weights,
           float[::] node_weights,
           int num_partitions,
           int[::] partition,
           int[::] fixed,
           float alpha,
           int debug):
    """
    This algorithm favors a cluster if it has many neighbors of a node, but
    penalizes the cluster if it is close to capacity.

    edges: An [:,2] array of edges.
    edge_weights: An [:,2] array of edge weights. Length should match number of edges.
    node_weights: An [:,2] array of node weights. Length should match number of nodes.
    num_partitions: How many partitions we are breaking the graph into.
    partition: A previous partition of the nodes. Set to -1's if a node has not been assigned.
    fixed: An array to denote which nodes in the partition have been locked in place.
    alpha:
    debug: Prints helpful debug information.

    Returns: A new partition.
    """

    cdef int num_nodes = len(node_weights)
    cdef float[::] partition_sizes = None

    # The output partition
    if partition is None:
        partition = np.repeat(np.int32(UNMAPPED), num_nodes)
        partition_sizes = np.zeros(num_partitions, dtype=np.float32)
    else:
        partition_sizes = np.bincount(partition,
                                      weights=node_weights,
                                      minlength=num_partitions).astype(np.float32)

    if fixed is None:
        fixed = np.repeat(np.int32(UNMAPPED), num_nodes)

    cdef float[::] partition_votes = np.zeros(num_partitions, dtype=np.float32)

    cdef int last_left = edges[0,0]
    cdef int i = 0
    cdef int left = 0
    cdef int right = 0
    cdef int arg = 0
    cdef int max_arg = 0
    cdef float max_val = 0
    cdef float val = 0
    cdef int len_edges = len(edges)
    cdef int previous_assignment = 0

    for i in range(len_edges):
        left = edges[i,0]
        right = edges[i,1]

        if last_left != left:
            if fixed[last_left] != UNMAPPED:
                if debug:
                    print("Skipping node {}".format(last_left))
                partition_votes[:] = 0
                last_left = left

            else:
                # New left node, so we have to assign last left

                if debug:
                    print("Assigning node {}".format(last_left))
                    print("\tPn = Votes - Alpha x Size")

                # Remember placement of last_left in the previous assignment
                previous_assignment = partition[last_left]

                max_arg = 0
                max_val = partition_votes[0] - alpha * partition_sizes[0]
                if debug:
                    print("\tP{} = {} - {} x {} = {}".format(0,
                                                             partition_votes[0],
                                                             alpha,
                                                             partition_sizes[0],
                                                             max_val))

                if previous_assignment == 0:
                    # We remove the node from its current partition before
                    # deciding to re-add it, so subtract alpha to give
                    # result of 1 lower partition size.
                    max_val += alpha

                for arg in range(1, num_partitions):
                    val = partition_votes[arg] - alpha * partition_sizes[arg]

                    if debug:
                        print("\tP{} = {} - {} x {} = {}".format(arg,
                                                                 partition_votes[arg],
                                                                 alpha,
                                                                 partition_sizes[arg],
                                                                 val))
                    if previous_assignment == arg:
                        # See comment above
                        val += alpha
                    if val > max_val:
                        max_arg = arg
                        max_val = val

                if max_arg != previous_assignment:
                    partition[last_left] = max_arg
                    partition_sizes[max_arg] += node_weights[last_left]
                    if previous_assignment != UNMAPPED:
                        partition_sizes[previous_assignment] -= node_weights[last_left]

                partition_votes[:] = 0

                if debug:
                    print("\tassigned to P{}".format(partition[last_left]))
                    fixed_width_print(np.asarray(partition))
                    fixed_width_print(np.asarray(fixed))

                last_left = left

        if partition[right] != UNMAPPED:
            partition_votes[partition[right]] += edge_weights[i]

    # Clean up the last assignment
    if fixed[left] == UNMAPPED:
        if debug:
            print("Assigning last node {}".format(left))

        max_arg = 0
        max_val = 0
        for arg in range(0, num_partitions):
            val = partition_votes[arg] - alpha * partition_sizes[arg]

            if debug:
                print("\tP{} = {} - {} x {} = {}".format(arg,
                                                         partition_votes[arg],
                                                         alpha,
                                                         partition_sizes[arg],
                                                         val))

            if val > max_val:
                max_arg = arg
                max_val = val

        partition[left] = max_arg
        if debug:
            print("\tassigned to P{}".format(partition[left]))

    return (np.asarray(partition), np.asarray(fixed))

In [4]:
import shared

# the number of iterations for the prediction model
num_iterations = 1
num_partitions = 4

edges = np.array(G.edges(), dtype=np.int32)
edge_weights = np.array([x[2]['weight'] for x in G.edges(data=True)], dtype=np.float32)
node_weights = np.array([x[1]['weight'] for x in G.nodes(data=True)], dtype=np.float32)

# Order of people arriving
arrivals = list(range(0, G.number_of_nodes()))
#random.shuffle(arrivals)

# Values for alpha
#alphas = np.linspace(1.251608191745264e-07, 7.588951557309824e-05, len(arrivals) + num_iterations)
#alphas = [0.342722212852] * (len(arrivals) + num_iterations)
#alphas = np.linspace(0.342722212852e-07, 19.25, len(arrivals) + num_iterations)
#alphas = [0.5] * (len(arrivals) + num_iterations)
alphas = np.linspace(0.342722212852e-07, 0.5, len(arrivals) + num_iterations)

# run first pass - this is our prediction model
assignments = None
fixed = None
print("PREDICTION MODEL")
print("----------------\n")
print("WASTE\t\t\tCUT RATIO\t\tMISMATCH")
for i in range(num_iterations):
    alpha = alphas[i]
    assignments, fixed = fennel(edges, edge_weights, node_weights, num_partitions, assignments, fixed, alpha, 0)

    x = shared.score(assignments, edges)
    print("{}\t{}\t{}".format(x[0], x[1], x[2]))

print("\nAssignments:")
fixed_width_print(assignments)
fixed_width_print(fixed)

shared.print_partitions(assignments, num_partitions, node_weights)

print("\n")
print("Re-streaming as nodes arrive")
print("----------------------------\n")
print("WASTE\t\t\tCUT RATIO\t\tMISMATCH")
for a in arrivals:
    fixed[a] = 1

    # restream non-fixed assignments
    alpha = alphas[a]
    assignments, fixed = fennel(edges, edge_weights, node_weights, 4, assignments, fixed, alpha, 0)
    
    x = shared.score(assignments, edges)
    print("{}\t{}\t{}".format(x[0], x[1], x[2]))

# remove nodes not fixed
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1
print("\nAssignments:")
fixed_width_print(assignments)
fixed_width_print(fixed)

shared.print_partitions(assignments, num_partitions, node_weights)

for i in range(0, len(assignments)):
    G.add_nodes_from([i], partition=str(assignments[i]))

nx.write_gml(G, "test.gml")

PREDICTION MODEL
----------------

WASTE			CUT RATIO		MISMATCH
1.5454545454545454	0.14566929133858267	74

Assignments:
[ 0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  0  0  2  2  2  2  2  2  2  2  2  2  0  2  3  3  2  0  0  0  0  0  0  2  0  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2]
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]

Partitions - nodes (weight):
P0: 25.0 (24.899999618530273)
P1: 1.0 (1.0)
P2: 49.0 (49.0)
P3: 2.0 (2.0)


Re-streaming as nodes arrive
----------------------------

WASTE			CUT RATIO		MISMATCH
1.9610389610389611	0.06299212598425197	32
2.012987012987013	0.05905511811023622	30
2.012987012987013	0.05905511811023622	30
1.8051948051948052	0.07480314960629922	38
1.5974025974025974	0.09055118110236221	46
1.545454545454