In [1]:
import networkx as nx

DATA_FILENAME = '/home/sami/py-graph/data/lesmis.gml'

print("Loading graph data...")
U = nx.read_gml(DATA_FILENAME, label='id')
G = U.to_directed()

print("Nodes: {}".format(G.number_of_nodes()))
print("Edges: {}".format(G.number_of_edges()))

#edges, num_edges, num_nodes = get_clean_data(shuffle=True)

Loading graph data...
Nodes: 77
Edges: 508


In [2]:
%load_ext Cython
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
%%cython
import numpy as np
from shared import fixed_width_print

cdef int UNMAPPED = -1

def fennel(int[:,::] edges,
           float[::] edge_weights,
           float[::] node_weights,
           int num_partitions,
           int[::] partition,
           int[::] fixed,
           float alpha,
           int debug):
    """
    This algorithm favors a cluster if it has many neighbors of a node, but
    penalizes the cluster if it is close to capacity.
    
    partition: A previous partition of the nodes. Set to -1's if a node has not been assigned.
    """
    
    cdef int num_nodes = len(node_weights)
    cdef int[::] partition_sizes = None
    
    # The output partition
    if partition is None:
        partition = np.repeat(np.int32(UNMAPPED), num_nodes)
        partition_sizes = np.zeros(num_partitions, dtype=np.int32)
    else:
        partition_sizes = np.bincount(partition).astype(np.int32)
        
    if fixed is None:
        fixed = np.repeat(np.int32(UNMAPPED), num_nodes)
    
    cdef int[::] partition_votes = np.zeros(num_partitions, dtype=np.int32)
    
    cdef float partition_capacity = num_nodes / num_partitions
    
    cdef int last_left = edges[0,0]
    cdef int i = 0
    cdef int left = 0
    cdef int right = 0
    cdef int arg = 0
    cdef int max_arg = 0
    cdef float max_val = 0
    cdef float val = 0
    cdef int len_edges = len(edges)
    cdef int previous_assignment = 0

    for i in range(len_edges):
        left = edges[i,0]
        right = edges[i,1]
    
        if last_left != left:
            # New left node, so we have to assign last left

            if debug:
                print("Assigning node {}".format(last_left))
            
            # Remember placement of last_left in the previous assignment
            previous_assignment = partition[last_left]
            
            max_arg = 0
            max_val = partition_votes[0] - alpha * partition_sizes[0]
            if previous_assignment == 0:
                # We remove the node from its current partition before
                # deciding to re-add it, so subtract alpha to give
                # result of 1 lower partition size.
                max_val += alpha

            for arg in range(1, num_partitions):
                val = partition_votes[arg] - alpha * partition_sizes[arg]
                if previous_assignment == arg:
                    # See comment above
                    val += alpha
                if val > max_val:
                    max_arg = arg
                    max_val = val

            if max_arg != previous_assignment:
                partition[last_left] = max_arg
                partition_sizes[max_arg] += 1
                if previous_assignment != UNMAPPED:
                    partition_sizes[previous_assignment] -= 1

            partition_votes[:] = 0
            last_left = left
            
            if debug:
                fixed_width_print(np.asarray(partition))
                fixed_width_print(np.asarray(fixed))

        if partition[right] != -1:
            partition_votes[partition[right]] += 1
    
    # TODO: finish off this fencepost....
    if debug:
        print("last one")
    partition[left] = 0
    
    return (np.asarray(partition), np.asarray(fixed))

In [7]:
num_iterations = 5

edges = np.array(G.edges(), dtype=np.int32)
edge_weights = np.array([x[2]['weight'] for x in G.edges(data=True)], dtype=np.float32)
node_weights = np.array([x[1]['weight'] for x in G.nodes(data=True)], dtype=np.float32)

alphas = np.linspace(1.251608191745264e-07, 7.588951557309824e-05, num_iterations)

# run first pass - this is our initial seed
assignments = None
fixed = None
for i in range(1):
    alpha = alphas[i]
    assignments, fixed = fennel(edges, edge_weights, node_weights, 4, assignments, fixed, alpha, 0)

print("Initial assignment seed:")
fixed_width_print(assignments)
fixed_width_print(fixed)
print("\n")


Initial assignment seed:
[ 0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  0  0  2  2  2  2  2  2  2  2  2  2  0  2  3  3  2  0  0  0  0  0  0  2  0  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  0]
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]




In [5]:
from shared import score

def run_fennel_restreaming(edges, num_nodes, num_partitions, num_iterations):
    print('{} PARTITIONS'.format(num_partitions))
    assignments = None
    # We will explore this parameter in a future post. This is sub-optimal
    alphas = np.linspace(1.251608191745264e-07, 7.588951557309824e-05, num_iterations)
    waste_scores, edge_score_values = [], []
    print('ROUND\tALPHA\tWASTE\tSCORE\tCUT')
    for i in range(num_iterations):
        alpha = alphas[i]
        assignments = fennel(edges, num_nodes, num_partitions, assignments, alpha)
        (waste, edge_score, cut) = score(assignments, edges)
        print('{}\t{:.1e}\t{:0.3f}\t{:0.3f}\t{}'.format(i, alpha, waste, edge_score, cut))
        waste_scores.append(waste)
        edge_score_values.append(edge_score)
