In [1]:
import os
import shared
import networkx as nx

pwd = %pwd

DATA_FILENAME = os.path.join(pwd, "data", "oneshot_fennel_weights.txt")
OUTPUT_DIRECTORY = os.path.join(pwd, "output")

# Read input file for prediction model, if not provided a prediction
# model is made using FENNEL
PREDICTION_MODEL = ""

# File containing simulated arrivals. This is used in simulating nodes
# arriving at the shelter. Nodes represented by line number; value of
# 1 represents a node as arrived; value of 0 represents the node as not
# arrived or needing a shelter.
#SIMULATED_ARRIVAL_FILE = os.path.join(pwd, "data", "simulated_arrival.txt")
SIMULATED_ARRIVAL_FILE = ""

# Number of shelters
num_partitions = 4

# The number of iterations when making prediction model
num_iterations = 10

# Percentage of prediction model to use before discarding
# When set to 0, prediction model is discarded, useful for one-shot
prediction_model_cut_off = 0.10

# Alpha value used in one-shot (when restream_batches set to 1)
one_shot_alpha = 0.5

# Number of arrivals to batch before recalculating alpha and restreaming.
# When set to 1, one-shot is used with alpha value from above
restream_batches = 10

# Create virtual nodes based on prediction model
use_virtual_nodes = False

# Virtual nodes: node weight and edge weight
virtual_edge_weight = 1.0

# If set, the node weight is set to 100 if the node arrives at the shelter,
# otherwise the node is removed from the graph. Also enables the edge calculation
# function.
graph_modification_functions = False

# read METIS file
G = shared.read_metis(DATA_FILENAME)

# Alpha value used in prediction model
prediction_model_alpha = G.number_of_edges() * (num_partitions / G.number_of_nodes()**2)

# Order of nodes arriving
arrival_order = list(range(0, G.number_of_nodes()))
#random.shuffle(arrival_order)

if SIMULATED_ARRIVAL_FILE == "":
    # mark all nodes as needing a shelter
    simulated_arrival_list = [1]*G.number_of_nodes()
else:
    with open(SIMULATED_ARRIVAL_FILE, "r") as f:
        simulated_arrival_list = [int(line.rstrip('\n')) for line in f]

print("Graph loaded...")
print("Nodes: {}".format(G.number_of_nodes()))
print("Edges: {}".format(G.number_of_edges()))
if nx.is_directed(G):
    print("Graph is directed")
else:
    print("Graph is undirected")

Graph loaded...
Nodes: 1000
Edges: 2939
Graph is undirected


In [2]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
import pyximport; pyximport.install()
import fennel
import numpy as np

UNMAPPED = -1

# reset
assignments = np.repeat(np.int32(UNMAPPED), G.number_of_nodes())
fixed = np.repeat(np.int32(UNMAPPED), G.number_of_nodes())

print("PREDICTION MODEL")
print("----------------\n")

if PREDICTION_MODEL:
    with open(PREDICTION_MODEL, "r") as inf:
        assignments = np.fromiter(inf.readlines(), dtype=np.int32)

else:
    assignments = fennel.generate_prediction_model(G, num_iterations, num_partitions, assignments, fixed, prediction_model_alpha)

x = shared.score(G, assignments)
print("WASTE\t\tCUT RATIO\tMISMATCH")
print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(G, assignments, num_partitions)

PREDICTION MODEL
----------------

WASTE		CUT RATIO	MISMATCH
0.00000		0.1224906431	360

Assignments:
[ 0  1  2  0  1  0  1  3  0  0  2  0  0  1  0  2  3  2  2  0  0  3  2  3  0  1  3  1  2  0  0  2  0  1  2  3  0  3  2  1  2  0  2  1  0  3  1  3  3  2  0  1  2  0  0  2  3  0  1  0  2  1  3  1  1  1  1  2  3  2  1  0  0  1  0  3  1  1  0  1  2  3  1  0  1  2  1  2  3  0  1  3  3  0  1  2  3  0  0  1  1  2  3  1  1  0  1  0  2  0  2  1  2  2  3  1  3  1  0  2  1  0  0  3  1  1  3  2  2  3  0  0  1  0  0  3  1  2  3  1  1  2  3  2  3  2  1  2  0  0  3  1  1  2  1  2  1  3  3  0  1  3  0  3  0  2  3  2  3  1  0  1  0  1  2  1  0  2  1  1  2  0  0  0  1  0  2  1  1  2  3  2  2  0  0  3  1  2  3  0  2  1  0  3  2  2  2  1  2  2  1  0  3  0  3  0  0  1  2  3  0  3  1  0  2  2  2  2  1  2  3  3  1  1  3  3  2  1  0  3  2  2  2  3  3  2  2  1  1  3  0  2  0  3  3  0  1  2  1  1  3  1  0  2  3  1  3  2  3  3  1  0  2  3  0  1  0  1  2  3  2  3  1  2  0  3  2  0  0  1  1  3  2  0  0  3  3  1  3  

In [4]:
if use_virtual_nodes:
    print("Creating virtual nodes and assigning edges based on prediction model")

    # create virtual nodes
    virtual_nodes = list(range(G.number_of_nodes(), G.number_of_nodes() + num_partitions))
    print("\nVirtual nodes:")

    # create virtual edges
    virtual_edges = []
    for n in range(0, G.number_of_nodes()):
        virtual_edges += [(n, virtual_nodes[assignments[n]])]

    # extend assignments
    assignments = np.append(assignments, np.array(list(range(0, num_partitions)), dtype=np.int32))
    fixed = np.append(fixed, np.array([1] * num_partitions, dtype=np.int32))

    G.add_nodes_from(virtual_nodes, weight=1)
    G.add_edges_from(virtual_edges, weight=virtual_edge_weight)

    print("\nAssignments:")
    shared.fixed_width_print(assignments)
    print("Last {} nodes are virtual nodes.".format(num_partitions))

In [5]:
cut_off_value = int(prediction_model_cut_off * G.number_of_nodes())
if prediction_model_cut_off == 0:
    print("Discarding prediction model\n")
else:
    print("Assign first {} arrivals using prediction model, then discard\n".format(cut_off_value))

# fix arrivals
nodes_arrived = []
print("WASTE\t\tCUT RATIO\tMISMATCH")
for a in arrival_order:
    # check if node needs a shelter
    if simulated_arrival_list[a] == 0:
        continue

    nodes_fixed = len([o for o in fixed if o == 1])
    if nodes_fixed >= cut_off_value:
        break
    fixed[a] = 1
    nodes_arrived.append(a)

    # make a subgraph of all arrived nodes
    Gsub = G.subgraph(nodes_arrived)

    x = shared.score(Gsub, assignments, num_partitions)
    print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

# remove nodes not fixed, ie. discard prediction model
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("WASTE\t\tCUT RATIO\tMISMATCH")
x = shared.score(G, assignments, num_partitions)
print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(G, assignments, num_partitions)

Assign first 100 arrivals using prediction model, then discard

WASTE		CUT RATIO	MISMATCH
0.00300		nan	0
0.00200		nan	0
0.00100		nan	0
0.00400		nan	0
0.00300		nan	0
0.00600		nan	0
0.00500		nan	0
0.00400		nan	0
0.00700		nan	0
0.01000		nan	0
0.00900		nan	0
0.01200		nan	0
0.01500		nan	0
0.01400		nan	0
0.01700		0.0000000000	0
0.01600		0.0000000000	0
0.01500		0.0000000000	0
0.01400		0.0000000000	0
0.01300		0.0000000000	0
0.01600		0.0000000000	0
0.01900		0.0000000000	0
0.01800		0.0000000000	0
0.01700		0.0000000000	0
0.01600		0.0000000000	0
0.01900		0.0000000000	0
0.01800		0.0000000000	0
0.01700		0.0000000000	0
0.01600		0.0000000000	0
0.01500		0.0000000000	0
0.01800		0.0000000000	0
0.02100		0.0000000000	0
0.02000		0.0000000000	0
0.02300		0.0000000000	0
0.02200		0.0000000000	0
0.02100		0.0000000000	0
0.02000		0.0000000000	0
0.02300		0.0000000000	0
0.02200		0.0000000000	0
0.02100		0.0000000000	0
0.02000		0.0000000000	0
0.01900		0.0000000000	0
0.02200		0.0000000000	0
0.02100		0.0000000000	0
0.02

  cut_ratio = mismatch / len(graph.edges())


In [6]:
if restream_batches == 1:
    print("One-shot assignment mode")
    print("------------------------\n")
else:
    print("Assigning in batches of {}".format(restream_batches))
    print("--------------------------------\n")

def edge_expansion(G):
    # Update edge weights for nodes that have an assigned probability of displacement
    for edge in G.edges_iter(data=True):
        left = edge[0]
        right = edge[1]
        
        if 'weight_orig' in edge[2]:
            edge_weight = edge[2]['weight_orig']
        else:
            edge[2]['weight_orig'] = edge[2]['weight']
            edge_weight = edge[2]['weight']

        # new edge weight
        edge[2]['weight'] = (float(G.node[left]['weight']) * edge_weight) * (float(G.node[right]['weight']) * edge_weight)

    return G
    
batch_arrived = []
print("WASTE\t\tCUT RATIO\tMISMATCH\tALPHA")
for a in arrival_order:

    # check if node is already arrived
    if fixed[a] == 1:
        continue

    # GRAPH MODIFICATION FUNCTIONS
    if graph_modification_functions:

        # Set high node weight for those that need a shelter, and reduce for those that don't
        if simulated_arrival_list[a] == 1:
            G.node[a]['weight'] = 100
        else:
            G.node[a]['weight'] = 0
            ## XXX remove node from graph
            continue

        G = edge_expansion(G)

    # one-shot assigment: assign each node as it arrives
    if restream_batches == 1:
        alpha = one_shot_alpha
        partition_votes = fennel.get_votes(G, a, num_partitions, assignments)
        assignments[a] = fennel.get_assignment(G, a, num_partitions, assignments, partition_votes, alpha, 0)
        fixed[a] = 1
        nodes_arrived.append(a)

        # make a subgraph of all arrived nodes
        Gsub = G.subgraph(nodes_arrived)

        x = shared.score(Gsub, assignments, num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        continue

    batch_arrived.append(a)

    if restream_batches == len(batch_arrived):

        # make a subgraph of all arrived nodes
        Gsub = G.subgraph(nodes_arrived + batch_arrived)

        # recalculate alpha
        if Gsub.is_directed():
            # as it's a directed graph, edges_arrived is actually double, so divide by 2
            edges_arrived = Gsub.number_of_edges() / 2
        else:
            edges_arrived = Gsub.number_of_edges()
        nodes_fixed = len([o for o in fixed if o == 1])
        alpha = (edges_arrived) * (num_partitions / (nodes_fixed + len(batch_arrived))**2)

        assignments = fennel.generate_prediction_model(G,
                                                       num_iterations,
                                                       num_partitions,
                                                       assignments,
                                                       fixed,
                                                       alpha)

        # assign nodes to prediction model
        for n in batch_arrived:
            fixed[n] = 1
            nodes_arrived.append(n)

        x = shared.score(Gsub, assignments, num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        batch_arrived = []

# remove nodes not fixed
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(G, assignments, num_partitions)

Assigning in batches of 10
--------------------------------

WASTE		CUT RATIO	MISMATCH	ALPHA
0.01800		0.0000000000	0		0.0102479339
0.02000		0.0263157895	1		0.0105555556
0.01800		0.0232558140	1		0.0101775148
0.01600		0.0196078431	1		0.0104081633
0.01800		0.0303030303	2		0.0117333333
0.02400		0.0259740260	2		0.0120312500
0.01800		0.0348837209	3		0.0119031142
0.02400		0.0500000000	5		0.0123456790
0.02600		0.0614035088	7		0.0126315789
0.02400		0.0737704918	9		0.0122000000
0.02200		0.0724637681	10		0.0125170068
0.02800		0.0784313725	12		0.0126446281
0.02200		0.0807453416	13		0.0121739130
0.02400		0.0818713450	14		0.0118750000
0.02200		0.0833333333	15		0.0115200000
0.02000		0.0932642487	18		0.0114201183
0.01400		0.0931372549	19		0.0111934156
0.02000		0.0941704036	21		0.0113775510
0.01800		0.1193415638	29		0.0115576694
0.01600		0.1167315175	30		0.0114222222
0.02600		0.1127272727	31		0.0114464100
0.02000		0.1056105611	32		0.0118359375
0.01400		0.0993788820	32		0.0118273646
0.00800		0.094955489

In [7]:
if use_virtual_nodes:
    print("Remove virtual nodes")
    
    print("\nCurrent graph:")
    print("Nodes: {}".format(G.number_of_nodes()))
    print("Edges: {}".format(G.number_of_edges()))

    G.remove_nodes_from(virtual_nodes)
    assignments = np.delete(assignments, virtual_nodes)
    fixed = np.delete(fixed, virtual_nodes)

    print("\nVirtual nodes removed:")
    print("Nodes: {}".format(G.number_of_nodes()))
    print("Edges: {}".format(G.number_of_edges()))

In [8]:
# Add partition attribute to nodes
for i in range(0, len(assignments)):
    G.add_nodes_from([i], partition=str(assignments[i]))

# Freeze Graph from further modification
G = nx.freeze(G)

In [9]:
import os
import datetime

timestamp = datetime.datetime.now().strftime('%H%M%S')
data_filename,_ = os.path.splitext(os.path.basename(DATA_FILENAME))
data_filename += "-" + timestamp

graph_metrics = {
    "file": timestamp,
    "num_partitions": num_partitions,
    "num_iterations": num_iterations,
    "prediction_model_cut_off": prediction_model_cut_off,
    "one_shot_alpha": one_shot_alpha,
    "restream_batches": restream_batches,
    "use_virtual_nodes": use_virtual_nodes,
    "virtual_edge_weight": virtual_edge_weight,
}
graph_fieldnames = [
    "file",
    "num_partitions",
    "num_iterations",
    "prediction_model_cut_off",
    "one_shot_alpha",
    "restream_batches",
    "use_virtual_nodes",
    "virtual_edge_weight",
    "edges_cut",
    "waste",
    "cut_ratio",
    "communication_volume",
    "network_permanence",
    "Q",
    "NQ",
    "Qds",
    "intraEdges",
    "interEdges",
    "intraDensity",
    "modularity degree",
    "conductance",
    "expansion",
    "contraction",
    "fitness",
    "QovL",
]

print("Complete graph with {} nodes".format(G.number_of_nodes()))
(file_maxperm, file_oslom) = shared.write_graph_files(OUTPUT_DIRECTORY, "{}-all".format(data_filename), G)

# original scoring algorithm
scoring = shared.score(G, assignments, num_partitions)
graph_metrics.update({
    "waste": scoring[0],
    "cut_ratio": scoring[1],
})

# edges cut and communication volume
edges_cut, steps = shared.base_metrics(G)
graph_metrics.update({
    "edges_cut": edges_cut,
    "communication_volume": steps,
})

# MaxPerm
max_perm = shared.run_max_perm(file_maxperm)
graph_metrics.update({"network_permanence": max_perm})

# Community Quality metrics
community_metrics = shared.run_community_metrics(OUTPUT_DIRECTORY,
                                                 "{}-all".format(data_filename),
                                                 file_oslom)
graph_metrics.update(community_metrics)

print("\nConfig")
print("-------\n")
for f in graph_fieldnames[:8]:
    print("{}: {}".format(f, graph_metrics[f]))

print("\nMetrics")
print("-------\n")
for f in graph_fieldnames[8:]:
    print("{}: {}".format(f, graph_metrics[f]))

# write metrics to CSV
metrics_filename = os.path.join(OUTPUT_DIRECTORY, "metrics.csv")
shared.write_metrics_csv(metrics_filename, graph_fieldnames, graph_metrics)

Complete graph with 1000 nodes
Writing GML file: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-all-graph.gml
Writing assignments: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-all-assignments.txt
Writing edge list (for MaxPerm): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-all-edges-maxperm.txt
Writing edge list (for OSLOM): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-all-edges-oslom.txt

Config
-------

file: 215743
num_partitions: 4
num_iterations: 10
prediction_model_cut_off: 0.1
one_shot_alpha: 0.5
restream_batches: 10
use_virtual_nodes: False
virtual_edge_weight: 1.0

Metrics
-------

edges_cut: 341
waste: 0.0
cut_ratio: 0.11602585913576047
communication_volume: 435
network_permanence: 0.368160
Q: -0.0012827457612951344
NQ: -71.34113442306348
Qds: -0.9999999999999944
intraEdges: 0.0
interEdges: 5.925403225806452
intraDensity: 0.0
modularity

In [10]:
partition_metrics = {}
partition_fieldnames = [
    "file",
    "partition",
    "network_permanence",
    "Q",
    "NQ",
    "Qds",
    "intraEdges",
    "interEdges",
    "intraDensity",
    "modularity degree",
    "conductance",
    "expansion",
    "contraction",
    "fitness",
    "QovL",
]

for p in range(0, num_partitions):
    partition_metrics = {
        "file": timestamp,
        "partition": p
    }

    nodes = [i for i,x in enumerate(assignments) if x == p]
    Gsub = G.subgraph(nodes)
    print("\nPartition {} with {} nodes".format(p, Gsub.number_of_nodes()))
    print("-----------------------------\n")

    (file_maxperm, file_oslom) = shared.write_graph_files(OUTPUT_DIRECTORY, "{}-p{}".format(data_filename, p), Gsub)
    
    # MaxPerm
    max_perm = shared.run_max_perm(file_maxperm)
    partition_metrics.update({"network_permanence": max_perm})

    # Community Quality metrics
    community_metrics = shared.run_community_metrics(OUTPUT_DIRECTORY,
                                                     "{}-p{}".format(data_filename, p),
                                                     file_oslom)
    partition_metrics.update(community_metrics)

    print("\nMetrics")
    for f in partition_fieldnames:
        print("{}: {}".format(f, partition_metrics[f]))

    # write metrics to CSV
    metrics_filename = os.path.join(OUTPUT_DIRECTORY, "metrics-partitions.csv")
    shared.write_metrics_csv(metrics_filename, partition_fieldnames, partition_metrics)


Partition 0 with 250 nodes
-----------------------------

Writing GML file: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-p0-graph.gml
Writing assignments: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-p0-assignments.txt
Writing edge list (for MaxPerm): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-p0-edges-maxperm.txt
Writing edge list (for OSLOM): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-215743-p0-edges-oslom.txt

Metrics
file: 215743
partition: 0
network_permanence: 1.000000
Q: -0.005343396263151615
NQ: -18.614258427451404
Qds: -0.999999999999999
intraEdges: 0.0
interEdges: 5.211382113821138
intraDensity: 0.0
modularity degree: -1282.0
conductance: 1.0
expansion: 5.211382113821138
contraction: 0.0
fitness: 0.0
QovL: -8.829724805260795E-8

Partition 1 with 250 nodes
-----------------------------

Writing GML file: /home/sami/repos/smbwebs/graph-pa