In [1]:
import numpy as np
import networkx as nx

DATA_FILENAME = '/home/sami/py-graph/data/oneshot_fennel_weights.gml'

print("Loading graph data...")
U = nx.read_gml(DATA_FILENAME, label='id')
G = U.to_directed()

print("Nodes: {}".format(G.number_of_nodes()))
print("Edges: {}".format(G.number_of_edges()))

edges = np.array(G.edges(), dtype=np.int32)
if False:
    edge_weights = np.array([x[2]['weight'] for x in G.edges(data=True)], dtype=np.float32)
    node_weights = np.array([x[1]['weight'] for x in G.nodes(data=True)], dtype=np.float32)
else:
    edge_weights = np.array([1 for x in G.edges(data=True)], dtype=np.float32)
    node_weights = np.array([1 for x in G.nodes(data=True)], dtype=np.float32)

Loading graph data...
Nodes: 1000
Edges: 5878


In [2]:
%load_ext Cython
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
%%cython
import numpy as np
from shared import fixed_width_print

cdef int UNMAPPED = -1

def fennel(int[:,::] edges,
           float[::] edge_weights,
           float[::] node_weights,
           int num_partitions,
           int[::] partition,
           int[::] fixed,
           float alpha,
           int debug):
    """
    This algorithm favors a cluster if it has many neighbors of a node, but
    penalizes the cluster if it is close to capacity.

    edges: An [:,2] array of edges.
    edge_weights: An [:,2] array of edge weights. Length should match number of edges.
    node_weights: An [:,2] array of node weights. Length should match number of nodes.
    num_partitions: How many partitions we are breaking the graph into.
    partition: A previous partition of the nodes. Set to -1's if a node has not been assigned.
    fixed: An array to denote which nodes in the partition have been locked in place.
    alpha:
    debug: Prints helpful debug information.

    Returns: A new partition.
    """

    cdef int num_nodes = len(node_weights)
    cdef float[::] partition_sizes = None

    # The output partition
    if partition is None:
        partition = np.repeat(np.int32(UNMAPPED), num_nodes)
        partition_sizes = np.zeros(num_partitions, dtype=np.float32)
    else:
        partition_sizes = np.zeros(num_partitions, dtype=np.float32)
        
        for p in range(0, len(partition)):
            if partition[p] >= 0:
                partition_sizes[partition[p]] += 1 # XXX: should use node_weights

        #partition_sizes = np.bincount(partition,
        #                              weights=node_weights,
        #                              minlength=num_partitions).astype(np.float32)

    if fixed is None:
        fixed = np.repeat(np.int32(UNMAPPED), num_nodes)

    cdef float[::] partition_votes = np.zeros(num_partitions, dtype=np.float32)

    cdef int last_left = edges[0,0]
    cdef int i = 0
    cdef int left = 0
    cdef int right = 0
    cdef int arg = 0
    cdef int max_arg = 0
    cdef float max_val = 0
    cdef float val = 0
    cdef int len_edges = len(edges)
    cdef int previous_assignment = 0

    for i in range(len_edges):
        left = edges[i,0]
        right = edges[i,1]

        if last_left != left:
            if fixed[last_left] != UNMAPPED:
                if debug:
                    print("Skipping node {}".format(last_left))
                partition_votes[:] = 0
                last_left = left

            else:
                # New left node, so we have to assign last left

                if debug:
                    print("Assigning node {}".format(last_left))
                    print("\tPn = Votes - Alpha x Size")

                # Remember placement of last_left in the previous assignment
                previous_assignment = partition[last_left]

                max_arg = 0
                max_val = partition_votes[0] - alpha * partition_sizes[0]
                if debug:
                    print("\tP{} = {} - {} x {} = {}".format(0,
                                                             partition_votes[0],
                                                             alpha,
                                                             partition_sizes[0],
                                                             max_val))

                if previous_assignment == 0:
                    # We remove the node from its current partition before
                    # deciding to re-add it, so subtract alpha to give
                    # result of 1 lower partition size.
                    max_val += alpha

                for arg in range(1, num_partitions):
                    val = partition_votes[arg] - alpha * partition_sizes[arg]

                    if debug:
                        print("\tP{} = {} - {} x {} = {}".format(arg,
                                                                 partition_votes[arg],
                                                                 alpha,
                                                                 partition_sizes[arg],
                                                                 val))
                    if previous_assignment == arg:
                        # See comment above
                        val += alpha
                    if val > max_val:
                        max_arg = arg
                        max_val = val

                if max_arg != previous_assignment:
                    partition[last_left] = max_arg
                    partition_sizes[max_arg] += node_weights[last_left]
                    if previous_assignment != UNMAPPED:
                        partition_sizes[previous_assignment] -= node_weights[last_left]

                partition_votes[:] = 0

                if debug:
                    print("\tassigned to P{}".format(partition[last_left]))
                    fixed_width_print(np.asarray(partition))
                    fixed_width_print(np.asarray(fixed))

                last_left = left

        if partition[right] != UNMAPPED:
            partition_votes[partition[right]] += edge_weights[i]

    # Clean up the last assignment
    if fixed[left] == UNMAPPED:
        if debug:
            print("Assigning last node {}".format(left))

        max_arg = 0
        max_val = 0
        for arg in range(0, num_partitions):
            val = partition_votes[arg] - alpha * partition_sizes[arg]

            if debug:
                print("\tP{} = {} - {} x {} = {}".format(arg,
                                                         partition_votes[arg],
                                                         alpha,
                                                         partition_sizes[arg],
                                                         val))

            if val > max_val:
                max_arg = arg
                max_val = val

        partition[left] = max_arg
        if debug:
            print("\tassigned to P{}".format(partition[left]))

    # Assign single nodes
    for n in range(0, len(partition)):
        if partition[n] == -1:
            partition[n] = 0
            
    return (np.asarray(partition), np.asarray(fixed))

In [4]:
%%cython
import numpy as np
import networkx as nx

cdef int UNMAPPED = -1

def get_votes(graph, int node, float[::] edge_weights, int num_partitions, int[::] partition):
    seen = set()
    cdef float[::] partition_votes = np.zeros(num_partitions, dtype=np.float32)

    # find all neighbors from whole graph
    node_neighbors = list(nx.all_neighbors(graph, node))
    node_neighbors = [x for x in node_neighbors if x not in seen and not seen.add(x)]

    # calculate votes based on neighbors placed in partitions
    for n in node_neighbors:
        if partition[n] != UNMAPPED:
            partition_votes[partition[n]] += edge_weights[n]
            
    return partition_votes

def get_assignment(int node,
                   float[::] node_weights,
                   int num_partitions,
                   int[::] partition,
                   float[::] partition_votes,
                   float alpha,
                   int debug):

    cdef int arg = 0
    cdef int max_arg = 0
    cdef float max_val = 0
    cdef float val = 0
    cdef int previous_assignment = 0

    assert partition is not None, "Blank partition passed"

    cdef float[::] partition_sizes = np.zeros(num_partitions, dtype=np.float32)
    for p in range(0, len(partition)):
        if partition[p] >= 0:
            partition_sizes[partition[p]] += 1 # XXX: should use node_weights
    
    if debug:
        print("Assigning node {}".format(node))
        print("\tPn = Votes - Alpha x Size")

    # Remember placement of node in the previous assignment
    previous_assignment = partition[node]

    max_arg = 0
    max_val = partition_votes[0] - alpha * partition_sizes[0]
    if debug:
        print("\tP{} = {} - {} x {} = {}".format(0,
                                                 partition_votes[0],
                                                 alpha,
                                                 partition_sizes[0],
                                                 max_val))

    if previous_assignment == 0:
        # We remove the node from its current partition before
        # deciding to re-add it, so subtract alpha to give
        # result of 1 lower partition size.
        max_val += alpha

    for arg in range(1, num_partitions):
        val = partition_votes[arg] - alpha * partition_sizes[arg]

        if debug:
            print("\tP{} = {} - {} x {} = {}".format(arg,
                                                     partition_votes[arg],
                                                     alpha,
                                                     partition_sizes[arg],
                                                     val))
        if previous_assignment == arg:
            # See comment above
            val += alpha
        if val > max_val:
            max_arg = arg
            max_val = val

    # XXX: partition_sizes is re-calculated at the beginning, so just return max_arg
    #if max_arg != previous_assignment:
    #    partition[node] = max_arg
    #    partition_sizes[max_arg] += node_weights[node]
    #    if previous_assignment != UNMAPPED:
    #        partition_sizes[previous_assignment] -= node_weights[node]

    if debug:
        print("\tassigned to P{}".format(max_arg)) #partition[node]))

    return max_arg

def fennel_rework(graph, 
                  float[::] edge_weights,
                  float[::] node_weights,
                  int num_partitions,
                  int[::] assignments,
                  int[::] fixed,
                  float alpha,
                  int debug):

    single_nodes = []
    for n in range(0, graph.number_of_nodes()):

        # Exclude single nodes, deal with these later
        neighbors = list(nx.all_neighbors(graph, n))
        if not neighbors:
            single_nodes.append(n)
            continue
            
        # Skip fixed nodes
        if fixed[n] != UNMAPPED:
            if debug:
                print("Skipping node {}".format(n))
            continue

        partition_votes = get_votes(graph, n, edge_weights, num_partitions, assignments)
        assignments[n] = get_assignment(n, node_weights, num_partitions, assignments, partition_votes, alpha, debug)

    # Assign single nodes
    for n in single_nodes:
        if assignments[n] == UNMAPPED:
            assignments[n] = 0

    return np.asarray(assignments)

In [5]:
# the number of iterations for the prediction model
num_iterations = 1

# number of shelters
num_partitions = 4

# Order of people arriving
arrivals = list(range(0, G.number_of_nodes()))
#random.shuffle(arrivals)

# Values for alpha
#alphas = np.linspace(1.251608191745264e-07, 7.588951557309824e-05, len(arrivals) + num_iterations)
#alphas = np.linspace(0.342722212852e-07, 19.25, len(arrivals) + num_iterations)
#alphas = np.linspace(0.342722212852e-07, 0.5, len(arrivals) + num_iterations)
#alphas = [0.342722212852] * (len(arrivals) + num_iterations) # perfect alpha
#alphas = [0.5] * (len(arrivals) + num_iterations)
alphas = [0.011756] * (len(arrivals) + num_iterations)

alpha_for_prediction = 0.011756 # calculated

one_shot_alpha = 0.5

# percentage of prediction model to use before discarding
prediction_model_cut_off = 0.10

# number of arrivals to batch before recalculating alpha and restreaming
# set to 1 for one-shot and alpha value variable above
restream_batches = 1

In [6]:
import shared
UNMAPPED = -1

# reset
assignments = np.repeat(np.int32(UNMAPPED), len(node_weights))
fixed = np.repeat(np.int32(UNMAPPED), len(node_weights))

# run first pass - this is our prediction model
print("PREDICTION MODEL")
print("----------------\n")
print("WASTE\t\t\tCUT RATIO\t\tMISMATCH")
for i in range(num_iterations):
    alpha = alphas[i]
    #assignments, fixed = fennel(edges, edge_weights, node_weights, num_partitions, assignments, fixed, alpha, 0)
    assignments = fennel_rework(G, edge_weights, node_weights, num_partitions, assignments, fixed, alpha, 0)

    x = shared.score(assignments, edges)
    print("{}\t{}\t{}".format(x[0], x[1], x[2]))

print("\nAssignments:")
fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(assignments, num_partitions, node_weights)

PREDICTION MODEL
----------------

WASTE			CUT RATIO		MISMATCH
0.03200000000000003	0.21776114324600204	1280

Assignments:
[ 0  1  2  3  0  1  2  3  0  1  2  3  0  1  1  2  3  0  2  3  0  1  2  3  0  1  3  1  2  0  0  2  3  1  2  3  0  1  2  0  3  0  2  1  0  3  1  3  3  2  0  1  2  0  1  2  3  0  1  2  3  2  0  1  3  0  1  2  3  2  0  3  3  1  0  3  1  2  0  1  2  0  1  0  1  2  3  2  3  0  1  2  3  0  1  2  3  0  0  1  1  2  3  2  3  0  1  3  2  0  2  1  3  2  0  1  2  3  2  0  1  0  0  3  1  1  3  2  3  0  0  2  1  0  0  3  1  2  3  1  1  2  3  3  3  2  1  2  0  0  3  1  1  2  1  0  1  3  2  0  1  3  2  0  0  2  3  2  2  0  0  1  0  3  2  3  0  3  1  1  0  0  0  2  1  0  3  1  1  2  3  2  2  3  0  3  1  2  3  0  2  1  0  3  2  0  2  1  2  3  1  0  3  0  3  0  1  1  2  3  3  0  1  0  2  2  1  2  1  2  3  3  1  1  0  0  3  1  0  3  2  2  2  3  0  2  2  0  1  3  0  2  1  3  3  0  1  0  1  1  3  1  2  0  2  1  3  2  3  0  1  0  2  3  0  1  0  1  2  0  2  3  3  2  0  3  2  2  0  1  1  3  

In [7]:
cut_off_value = int(prediction_model_cut_off * G.number_of_nodes())
print("Assign first {} arrivals using prediction model, then discard\n".format(cut_off_value))

# fix arrivals
for a in arrivals:
    fixed[a] = 1
    num_fixed = len([o for o in fixed if o == 1])
    if num_fixed >= cut_off_value:
        break

# remove nodes not fixed, ie. discard prediction model
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("WASTE\t\t\tCUT RATIO\t\tMISMATCH")
x = shared.score(assignments, edges, num_partitions)
print("{}\t{}\t{}".format(x[0], x[1], x[2]))

print("\nAssignments:")
fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(assignments, num_partitions, node_weights)

Assign first 100 arrivals using prediction model, then discard

WASTE			CUT RATIO		MISMATCH
0.007999999999999997	0.18679823069071114	1098

Assignments:
[ 0  1  2  3  0  1  2  3  0  1  2  3  0  1  1  2  3  0  2  3  0  1  2  3  0  1  3  1  2  0  0  2  3  1  2  3  0  1  2  0  3  0  2  1  0  3  1  3  3  2  0  1  2  0  1  2  3  0  1  2  3  2  0  1  3  0  1  2  3  2  0  3  3  1  0  3  1  2  0  1  2  0  1  0  1  2  3  2  3  0  1  2  3  0  1  2  3  0  0  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -

In [8]:
print("Re-streaming in batches of {}".format(restream_batches))
print("--------------------------------\n")

batch_arrived = []
print("WASTE\t\tCUT RATIO\tMISMATCH\tALPHA")
for a in arrivals:
    # check if node is already arrived
    if fixed[a] == 1:
        continue

    # one-shot assigment: assign each node as it arrives
    if restream_batches == 1:
        alpha = one_shot_alpha
        partition_votes = get_votes(G, a, edge_weights, num_partitions, assignments)
        assignments[a] = get_assignment(a, node_weights, num_partitions, assignments, partition_votes, alpha, 0)
        fixed[a] = 1
        
        x = shared.score(assignments, edges, num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        continue
        
    batch_arrived.append(a)

    if restream_batches == len(batch_arrived):
        #print("Batch arrived: {}".format(batch_arrived))

        # make a subgraph of all arrived nodes
        nodes_arrived = []
        for n in range(0, len(assignments)):
            if fixed[n] == 1 or n in batch_arrived:
                nodes_arrived.append(n)
        Gsub = G.subgraph(nodes_arrived)
        
        #print("Arrived nodes: {}".format(Gsub.nodes()))

        # XXX: recalculate alpha
        nodes_fixed = len([o for o in fixed if o == 1])
        edges_fixed = len([e for e in edges if fixed[e[0]] == 1 and fixed[e[1]] == 1])
        #alpha = (edges fixed + edges arrived) x (partitions / (nodes fixed + nodes arrived)^2)
        alpha = (edges_fixed) * (num_partitions / (nodes_fixed + restream_batches)**2)
        # edges fixed, all edges that are not -1, so on first iteration this value would be 100
        # edges arrived, the edges that have arrived in this batch, here it would be 10

        for n in batch_arrived:
            partition_votes = get_votes(Gsub, n, edge_weights, num_partitions, assignments)
            assignments[n] = get_assignment(n, node_weights, num_partitions, assignments, partition_votes, alpha, 0)
            fixed[n] = 1

        #print("Assignments:")
        #fixed_width_print(assignments)

        x = shared.score(assignments, edges, num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        batch_arrived = []

# remove nodes not fixed
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("\nAssignments:")
fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(assignments, num_partitions, node_weights)

Re-streaming in batches of 1
--------------------------------

WASTE		CUT RATIO	MISMATCH	ALPHA
0.00700		0.1874787343	1102		0.5000000000
0.00600		0.1902007486	1118		0.5000000000
0.00500		0.1915617557	1126		0.5000000000
0.00400		0.1939435182	1140		0.5000000000
0.00300		0.1953045253	1148		0.5000000000
0.00200		0.1980265396	1164		0.5000000000
0.00500		0.1976862879	1162		0.5000000000
0.00400		0.1980265396	1164		0.5000000000
0.00300		0.1983667914	1166		0.5000000000
0.00200		0.2010888057	1182		0.5000000000
0.00100		0.2031303164	1194		0.5000000000
0.00000		0.2044913236	1202		0.5000000000
0.00300		0.2072133379	1218		0.5000000000
0.00600		0.2085743450	1226		0.5000000000
0.00500		0.2095951004	1232		0.5000000000
0.00400		0.2112963593	1242		0.5000000000
0.00700		0.2129976182	1252		0.5000000000
0.00600		0.2146988772	1262		0.5000000000
0.00500		0.2157196325	1268		0.5000000000
0.00400		0.2164001361	1272		0.5000000000
0.00300		0.2167403879	1274		0.5000000000
0.00200		0.2184416468	1284		0.5000000000
0.0

In [9]:
# Add partition attribute to nodes and write to file
for i in range(0, len(assignments)):
    G.add_nodes_from([i], partition=str(assignments[i]))
nx.write_gml(G, "test.gml")