In [1]:
import shared
import networkx as nx

DATA_FILENAME = "/home/sami/py-graph/data/oneshot_fennel_weights.txt"
OUTPUT_DIRECTORY = "/home/sami/py-graph/output"

# Read input file for prediction model
PREDICTION_MODEL = ""

# Number of shelters
num_partitions = 4

# The number of iterations when making prediction model
num_iterations = 10

# Percentage of prediction model to use before discarding
# When set to 0, prediction model is discarded, useful for one-shot
prediction_model_cut_off = 0.10

# Alpha value used in one-shot (when restream_batches set to 1)
one_shot_alpha = 0.5

# Number of arrivals to batch before recalculating alpha and restreaming.
# When set to 1, one-shot is used with alpha value from above
restream_batches = 10

# Create virtual nodes based on prediction model
use_virtual_nodes = False

# Virtual nodes: node weight and edge weight
virtual_node_weight = 1.0
virtual_edge_weight = 1.0

# read METIS file
G, edge_weights, node_weights = shared.load_metis_into_networkx_graph(DATA_FILENAME)

# Order of people arriving
arrivals = list(range(0, G.number_of_nodes()))
#random.shuffle(arrivals)

# Alpha value used in prediction model
prediction_model_alpha = G.number_of_edges() * (num_partitions / G.number_of_nodes()**2)

print("Graph loaded...")
print("Nodes: {}".format(G.number_of_nodes()))
print("Edges: {}".format(G.number_of_edges()))
if nx.is_directed(G):
    print("Graph is directed")
else:
    print("Graph is undirected")



Graph loaded...
Nodes: 1000
Edges: 2939
Graph is undirected


In [2]:
# Update edge weights for nodes that have an assigned probability of displacement
for edge in G.edges_iter(data=True):
    left = edge[0]
    right = edge[1]
    edge_weight = edge[2]['weight']

    # new edge weight
    edge[2]['weight'] = (float(G.node[left]['weight']) * edge_weight) * (float(G.node[right]['weight']) * edge_weight)


In [3]:
%load_ext Cython
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [4]:
%%cython
import numpy as np
import networkx as nx
from shared import bincount_assigned

cdef int UNMAPPED = -1

def get_votes(graph, int node, float[::] edge_weights, int num_partitions, int[::] partition):
    seen = set()
    cdef float[::] partition_votes = np.zeros(num_partitions, dtype=np.float32)

    # find all neighbors from whole graph
    node_neighbors = list(nx.all_neighbors(graph, node))
    node_neighbors = [x for x in node_neighbors if x not in seen and not seen.add(x)]

    # calculate votes based on neighbors placed in partitions
    for n in node_neighbors:
        if partition[n] != UNMAPPED:
            partition_votes[partition[n]] += edge_weights[n]
            
    return partition_votes

def get_assignment(int node,
                   float[::] node_weights,
                   int num_partitions,
                   int[::] partition,
                   float[::] partition_votes,
                   float alpha,
                   int debug):

    cdef int arg = 0
    cdef int max_arg = 0
    cdef float max_val = 0
    cdef float val = 0
    cdef int previous_assignment = 0

    assert partition is not None, "Blank partition passed"

    cdef float[::] partition_sizes = np.zeros(num_partitions, dtype=np.float32)
    s = bincount_assigned(partition, num_partitions, weights=node_weights)
    partition_sizes = np.fromiter(s, dtype=np.float32)
    
    if debug:
        print("Assigning node {}".format(node))
        print("\tPn = Votes - Alpha x Size")

    # Remember placement of node in the previous assignment
    previous_assignment = partition[node]

    max_arg = 0
    max_val = partition_votes[0] - alpha * partition_sizes[0]
    if debug:
        print("\tP{} = {} - {} x {} = {}".format(0,
                                                 partition_votes[0],
                                                 alpha,
                                                 partition_sizes[0],
                                                 max_val))

    if previous_assignment == 0:
        # We remove the node from its current partition before
        # deciding to re-add it, so subtract alpha to give
        # result of 1 lower partition size.
        max_val += alpha

    for arg in range(1, num_partitions):
        val = partition_votes[arg] - alpha * partition_sizes[arg]

        if debug:
            print("\tP{} = {} - {} x {} = {}".format(arg,
                                                     partition_votes[arg],
                                                     alpha,
                                                     partition_sizes[arg],
                                                     val))
        if previous_assignment == arg:
            # See comment above
            val += alpha
        if val > max_val:
            max_arg = arg
            max_val = val

    if debug:
        print("\tassigned to P{}".format(max_arg))

    return max_arg

def fennel_rework(graph, 
                  float[::] edge_weights,
                  float[::] node_weights,
                  int num_partitions,
                  int[::] assignments,
                  int[::] fixed,
                  float alpha,
                  int debug):

    single_nodes = []
    for n in range(0, graph.number_of_nodes()):

        # Exclude single nodes, deal with these later
        neighbors = list(nx.all_neighbors(graph, n))
        if not neighbors:
            single_nodes.append(n)
            continue
            
        # Skip fixed nodes
        if fixed[n] != UNMAPPED:
            if debug:
                print("Skipping node {}".format(n))
            continue

        partition_votes = get_votes(graph, n, edge_weights, num_partitions, assignments)
        assignments[n] = get_assignment(n, node_weights, num_partitions, assignments, partition_votes, alpha, debug)

    # Assign single nodes
    for n in single_nodes:
        if assignments[n] == UNMAPPED:
            parts = bincount_assigned(assignments, num_partitions)
            smallest = parts.index(min(parts))
            assignments[n] = smallest

    return np.asarray(assignments)

In [5]:
UNMAPPED = -1

# reset
assignments = np.repeat(np.int32(UNMAPPED), len(node_weights))
fixed = np.repeat(np.int32(UNMAPPED), len(node_weights))

print("PREDICTION MODEL")
print("----------------\n")
print("WASTE\t\tCUT RATIO\tMISMATCH")

if PREDICTION_MODEL:
    with open(PREDICTION_MODEL, "r") as inf:
        assignments = np.fromiter(inf.readlines(), dtype=np.int32)
    x = shared.score(assignments, G.edges())
    print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

else:
    for i in range(num_iterations):
        alpha = prediction_model_alpha
        assignments = fennel_rework(G, edge_weights, node_weights, num_partitions, assignments, fixed, alpha, 0)

        x = shared.score(assignments, G.edges())
        print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(assignments, num_partitions, node_weights)

PREDICTION MODEL
----------------

WASTE		CUT RATIO	MISMATCH
0.23600		0.2123171147	624
0.22000		0.1388227288	408
0.21200		0.1255529092	369
0.20800		0.1187478734	349
0.20000		0.1163661109	342
0.19600		0.1163661109	342
0.19200		0.1160258591	341
0.19200		0.1160258591	341
0.19200		0.1160258591	341
0.19200		0.1160258591	341

Assignments:
[ 0  1  2  3  0  2  3  2  0  2  0  2  0  2  2  1  0  2  0  2  0  2  0  2  0  2  2  2  0  0  0  2  3  0  0  0  2  0  2  1  0  2  0  1  2  2  3  0  0  0  3  2  3  0  2  2  0  2  3  2  2  3  3  2  3  0  0  0  3  0  2  2  2  3  0  2  3  2  2  0  2  0  3  0  3  0  1  2  3  0  3  0  0  0  0  3  0  1  2  2  3  3  0  3  2  2  1  3  0  3  3  3  0  0  2  2  2  3  2  3  3  2  3  2  3  0  2  3  2  0  0  0  3  0  3  3  2  2  3  1  3  2  0  2  2  3  2  3  0  1  0  3  2  0  2  2  3  2  3  2  1  3  3  1  2  3  3  0  3  1  1  3  0  2  2  2  2  0  3  3  2  3  2  0  2  2  1  3  0  3  0  3  3  2  0  0  0  3  3  3  2  0  2  3  0  1  2  1  3  0  2  0  3  3  0  2  0  3  0  0  1  

In [6]:
if use_virtual_nodes:
    print("Creating virtual nodes and assigning edges based on prediction model")

    # create virtual nodes
    virtual_nodes = list(range(G.number_of_nodes(), G.number_of_nodes() + num_partitions))
    print("\nVirtual nodes:")
    print(virtual_nodes)

    # create virtual edges
    virtual_edges = []
    for n in range(0, G.number_of_nodes()):
        virtual_edges += [(n, virtual_nodes[assignments[n]])]

    # extend assignments
    assignments = np.append(assignments, np.array(list(range(0, num_partitions)), dtype=np.int32))
    fixed = np.append(fixed, np.array([1] * num_partitions, dtype=np.int32))

    G.add_nodes_from(virtual_nodes, weight=virtual_node_weight)
    G.add_edges_from(virtual_edges, weight=virtual_edge_weight)

    edge_weights = np.array([x[2]['weight'] for x in G.edges(data=True)], dtype=np.float32)
    node_weights = np.array([x[1]['weight'] for x in G.nodes(data=True)], dtype=np.float32)

    print("\nAssignments:")
    shared.fixed_width_print(assignments)
    print("Last {} nodes are virtual nodes.".format(num_partitions))

In [7]:
cut_off_value = int(prediction_model_cut_off * G.number_of_nodes())
if prediction_model_cut_off == 0:
    print("Discarding prediction model\n")
else:
    print("Assign first {} arrivals using prediction model, then discard\n".format(cut_off_value))

# fix arrivals
nodes_arrived = []
print("WASTE\t\tCUT RATIO\tMISMATCH")
for a in arrivals:
    nodes_fixed = len([o for o in fixed if o == 1])
    if nodes_fixed >= cut_off_value:
        break
    fixed[a] = 1
    nodes_arrived.append(a)

    # make a subgraph of all arrived nodes
    Gsub = G.subgraph(nodes_arrived)

    x = shared.score(assignments, Gsub.edges(), num_partitions)
    print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

# remove nodes not fixed, ie. discard prediction model
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("WASTE\t\tCUT RATIO\tMISMATCH")
x = shared.score(assignments, G.edges(), num_partitions)
print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(assignments, num_partitions, node_weights)

Assign first 100 arrivals using prediction model, then discard

WASTE		CUT RATIO	MISMATCH
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		nan	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19200		0.0000000000	0
0.19

In [8]:
if restream_batches == 1:
    print("One-shot assignment mode")
    print("------------------------\n")
else:
    print("Re-streaming in batches of {}".format(restream_batches))
    print("--------------------------------\n")

batch_arrived = []
print("WASTE\t\tCUT RATIO\tMISMATCH\tALPHA")
for a in arrivals:
    # check if node is already arrived
    if fixed[a] == 1:
        continue

    # one-shot assigment: assign each node as it arrives
    if restream_batches == 1:
        alpha = one_shot_alpha
        partition_votes = get_votes(G, a, edge_weights, num_partitions, assignments)
        assignments[a] = get_assignment(a, node_weights, num_partitions, assignments, partition_votes, alpha, 0)
        fixed[a] = 1
        nodes_arrived.append(a)

        # make a subgraph of all arrived nodes
        Gsub = G.subgraph(nodes_arrived)

        x = shared.score(assignments, Gsub.edges(), num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        continue

    batch_arrived.append(a)

    if restream_batches == len(batch_arrived):

        # make a subgraph of all arrived nodes
        Gsub = G.subgraph(nodes_arrived + batch_arrived)

        # recalculate alpha
        if Gsub.is_directed():
            # as it's a directed graph, edges_arrived is actually double, so divide by 2
            edges_arrived = Gsub.number_of_edges() / 2
        else:
            edges_arrived = Gsub.number_of_edges()
        nodes_fixed = len([o for o in fixed if o == 1])
        alpha = (edges_arrived) * (num_partitions / (nodes_fixed + len(batch_arrived))**2)

        # restream
        for n in batch_arrived:
            partition_votes = get_votes(Gsub, n, edge_weights, num_partitions, assignments)
            assignments[n] = get_assignment(n, node_weights, num_partitions, assignments, partition_votes, alpha, 0)
            fixed[n] = 1
            nodes_arrived.append(n)

        x = shared.score(assignments, Gsub.edges(), num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        batch_arrived = []

# remove nodes not fixed
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(assignments, num_partitions, node_weights)

Re-streaming in batches of 10
--------------------------------

WASTE		CUT RATIO	MISMATCH	ALPHA
0.05400		0.0322580645	1		0.0102479339
0.05600		0.0526315789	2		0.0105555556
0.05800		0.0697674419	3		0.0101775148
0.06400		0.0588235294	3		0.0104081633
0.07400		0.0454545455	3		0.0117333333
0.07600		0.0519480519	4		0.0120312500
0.07000		0.0465116279	4		0.0119031142
0.07200		0.0700000000	7		0.0123456790
0.07800		0.0701754386	8		0.0126315789
0.08400		0.0737704918	9		0.0122000000
0.08600		0.0942028986	13		0.0125170068
0.09200		0.0915032680	14		0.0126446281
0.09400		0.0869565217	14		0.0121739130
0.09200		0.0877192982	15		0.0118750000
0.09800		0.0944444444	17		0.0115200000
0.10400		0.0932642487	18		0.0114201183
0.10200		0.0882352941	18		0.0111934156
0.11200		0.0852017937	19		0.0113775510
0.10600		0.1069958848	26		0.0115576694
0.11200		0.1050583658	27		0.0114222222
0.11000		0.1054545455	29		0.0114464100
0.11200		0.0990099010	30		0.0118359375
0.11400		0.0931677019	30		0.0118273646
0.11200		0.091988

In [9]:
if use_virtual_nodes:
    print("Remove virtual nodes")
    
    print("\nCurrent graph:")
    print("Nodes: {}".format(G.number_of_nodes()))
    print("Edges: {}".format(G.number_of_edges()))

    G.remove_nodes_from(virtual_nodes)
    assignments = np.delete(assignments, virtual_nodes)
    fixed = np.delete(fixed, virtual_nodes)

    print("\nVirtual nodes removed:")
    print("Nodes: {}".format(G.number_of_nodes()))
    print("Edges: {}".format(G.number_of_edges()))

In [10]:
# Add partition attribute to nodes
for i in range(0, len(assignments)):
    G.add_nodes_from([i], partition=str(assignments[i]))

# Freeze Graph from further modification
G = nx.freeze(G)

In [11]:
import os
import datetime

timestamp = datetime.datetime.now().strftime('%H%M%S')
data_filename,_ = os.path.splitext(os.path.basename(DATA_FILENAME))
data_filename += "-" + timestamp

graph_metrics = {
    "file": timestamp,
    "num_partitions": num_partitions,
    "num_iterations": num_iterations,
    "prediction_model_cut_off": prediction_model_cut_off,
    "one_shot_alpha": one_shot_alpha,
    "restream_batches": restream_batches,
    "use_virtual_nodes": use_virtual_nodes,
    "virtual_node_weight": virtual_node_weight,
    "virtual_edge_weight": virtual_edge_weight,
}
graph_fieldnames = [
    "file",
    "num_partitions",
    "num_iterations",
    "prediction_model_cut_off",
    "one_shot_alpha",
    "restream_batches",
    "use_virtual_nodes",
    "virtual_node_weight",
    "virtual_edge_weight",
    "edges_cut",
    "waste",
    "cut_ratio",
    "communication_volume",
    "network_permanence",
    "Q",
    "NQ",
    "Qds",
    "intraEdges",
    "interEdges",
    "intraDensity",
    "modularity degree",
    "conductance",
    "expansion",
    "contraction",
    "fitness",
    "QovL",
]

print("Complete graph with {} nodes".format(G.number_of_nodes()))
(file_maxperm, file_oslom) = shared.write_graph_files(OUTPUT_DIRECTORY, "{}-all".format(data_filename), G)

# original scoring algorithm
scoring = shared.score(assignments, G.edges(), num_partitions)
graph_metrics.update({
    "waste": scoring[0],
    "cut_ratio": scoring[1],
})

# edges cut and communication volume
edges_cut, steps = shared.base_metrics(G)
graph_metrics.update({
    "edges_cut": edges_cut,
    "communication_volume": steps,
})

# MaxPerm
max_perm = shared.run_max_perm(file_maxperm)
graph_metrics.update({"network_permanence": max_perm})

# Community Quality metrics
community_metrics = shared.run_community_metrics(OUTPUT_DIRECTORY,
                                                 "{}-all".format(data_filename),
                                                 file_oslom)
graph_metrics.update(community_metrics)

print("\nConfig")
print("-------\n")
for f in graph_fieldnames[:9]:
    print("{}: {}".format(f, graph_metrics[f]))

print("\nMetrics")
print("-------\n")
for f in graph_fieldnames[9:]:
    print("{}: {}".format(f, graph_metrics[f]))

# write metrics to CSV
metrics_filename = os.path.join(OUTPUT_DIRECTORY, "metrics.csv")
shared.write_metrics_csv(metrics_filename, graph_fieldnames, graph_metrics)

Complete graph with 1000 nodes
Writing GML file: /home/sami/py-graph/output/oneshot_fennel_weights-223925-all-graph.gml
Writing assignments: /home/sami/py-graph/output/oneshot_fennel_weights-223925-all-assignments.txt
Writing edge list (for MaxPerm): /home/sami/py-graph/output/oneshot_fennel_weights-223925-all-edges-maxperm.txt
Writing edge list (for OSLOM): /home/sami/py-graph/output/oneshot_fennel_weights-223925-all-edges-oslom.txt

Config
-------

file: 223925
num_partitions: 4
num_iterations: 10
prediction_model_cut_off: 0.1
one_shot_alpha: 0.5
restream_batches: 10
use_virtual_nodes: False
virtual_node_weight: 1.0
virtual_edge_weight: 1.0

Metrics
-------

edges_cut: 502
waste: 0.22799999999999998
cut_ratio: 0.17080639673358286
communication_volume: 583
network_permanence: 0.368160
Q: 0.8462205647005029
NQ: 1.0978523437647105
Qds: 0.17814689730502103
intraEdges: 62.766666666666666
interEdges: 11.921212121212122
intraDensity: 0.27338582913472464
modularity degree: 219.1567705374179


In [12]:
partition_metrics = {}
partition_fieldnames = [
    "file",
    "partition",
    "network_permanence",
    "Q",
    "NQ",
    "Qds",
    "intraEdges",
    "interEdges",
    "intraDensity",
    "modularity degree",
    "conductance",
    "expansion",
    "contraction",
    "fitness",
    "QovL",
]

for p in range(0, num_partitions):
    partition_metrics = {
        "file": timestamp,
        "partition": p
    }

    nodes = [i for i,x in enumerate(assignments) if x == p]
    Gsub = G.subgraph(nodes)
    print("\nPartition {} with {} nodes".format(p, Gsub.number_of_nodes()))
    print("-----------------------------\n")

    (file_maxperm, file_oslom) = shared.write_graph_files(OUTPUT_DIRECTORY, "{}-p{}".format(data_filename, p), Gsub)
    
    # MaxPerm
    max_perm = shared.run_max_perm(file_maxperm)
    partition_metrics.update({"network_permanence": max_perm})

    # Community Quality metrics
    community_metrics = shared.run_community_metrics(OUTPUT_DIRECTORY,
                                                     "{}-p{}".format(data_filename, p),
                                                     file_oslom)
    partition_metrics.update(community_metrics)

    print("\nMetrics")
    for f in partition_fieldnames:
        print("{}: {}".format(f, partition_metrics[f]))

    # write metrics to CSV
    metrics_filename = os.path.join(OUTPUT_DIRECTORY, "metrics-partitions.csv")
    shared.write_metrics_csv(metrics_filename, partition_fieldnames, partition_metrics)


Partition 0 with 307 nodes
-----------------------------

Writing GML file: /home/sami/py-graph/output/oneshot_fennel_weights-223925-p0-graph.gml
Writing assignments: /home/sami/py-graph/output/oneshot_fennel_weights-223925-p0-assignments.txt
Writing edge list (for MaxPerm): /home/sami/py-graph/output/oneshot_fennel_weights-223925-p0-edges-maxperm.txt
Writing edge list (for OSLOM): /home/sami/py-graph/output/oneshot_fennel_weights-223925-p0-edges-oslom.txt

Metrics
file: 223925
partition: 0
network_permanence: 1.000000
Q: 0.8783985652117521
NQ: 1.88177647940695
Qds: 0.2900222738564561
intraEdges: 41.26315789473684
interEdges: 3.6842105263157894
intraDensity: 0.43075019292080946
modularity degree: 85.97510049655283
conductance: 0.03267463742163711
expansion: 0.19515820504331383
contraction: 4.720163494335568
fitness: 0.9397985472947868
QovL: 0.9566300411737596

Partition 1 with 110 nodes
-----------------------------

Writing GML file: /home/sami/py-graph/output/oneshot_fennel_weights-