In [1]:
import os
import shared
import networkx as nx

pwd = %pwd

DATA_FILENAME = os.path.join(pwd, "data", "oneshot_fennel_weights.txt")
OUTPUT_DIRECTORY = os.path.join(pwd, "output")

# Read input file for prediction model, if not provided a prediction
# model is made using FENNEL
PREDICTION_MODEL = ""

# File containing simulated arrivals. This is used in simulating nodes
# arriving at the shelter. Nodes represented by line number; value of
# 1 represents a node as arrived; value of 0 represents the node as not
# arrived or needing a shelter.
#SIMULATED_ARRIVAL_FILE = os.path.join(pwd, "data", "simulated_arrival.txt")
SIMULATED_ARRIVAL_FILE = ""

# File containing the geographic location of each node.
POPULATION_LOCATION_FILE = os.path.join(pwd, "data", "population_location.csv")

# Number of shelters
num_partitions = 4

# The number of iterations when making prediction model
num_iterations = 10

# Percentage of prediction model to use before discarding
# When set to 0, prediction model is discarded, useful for one-shot
prediction_model_cut_off = 0.10

# Alpha value used in one-shot (when restream_batches set to 1)
one_shot_alpha = 0.5

# Number of arrivals to batch before recalculating alpha and restreaming.
# When set to 1, one-shot is used with alpha value from above
restream_batches = 10

# Create virtual nodes based on prediction model
use_virtual_nodes = False

# Virtual nodes: edge weight
virtual_edge_weight = 1.0


####
# GRAPH MODIFICATION FUNCTIONS

# Also enables the edge calculation function.
graph_modification_functions = True

# If set, the node weight is set to 100 if the node arrives at the shelter,
# otherwise the node is removed from the graph.
alter_arrived_node_weight_to_100 = False

# Uses generalized additive models from R to generate prediction of nodes not
# arrived. This sets the node weight on unarrived nodes the the prediction
# given by a GAM.
# Needs POPULATION_LOCATION_FILE to be set.
alter_node_weight_to_gam_prediction = False

# Alter the edge weight for nodes that haven't arrived. This is a way to
# de-emphasise the prediction model for the unknown nodes.
prediction_model_emphasis = 1.0





# read METIS file
G = shared.read_metis(DATA_FILENAME)

# Alpha value used in prediction model
prediction_model_alpha = G.number_of_edges() * (num_partitions / G.number_of_nodes()**2)

# Order of nodes arriving
arrival_order = list(range(0, G.number_of_nodes()))

# Arrival order should not be shuffled if using GAM to alter node weights
#random.shuffle(arrival_order)

if SIMULATED_ARRIVAL_FILE == "":
    # mark all nodes as needing a shelter
    simulated_arrival_list = [1]*G.number_of_nodes()
else:
    with open(SIMULATED_ARRIVAL_FILE, "r") as f:
        simulated_arrival_list = [int(line.rstrip('\n')) for line in f]

print("Graph loaded...")
print("Nodes: {}".format(G.number_of_nodes()))
print("Edges: {}".format(G.number_of_edges()))
if nx.is_directed(G):
    print("Graph is directed")
else:
    print("Graph is undirected")

Graph loaded...
Nodes: 1000
Edges: 2939
Graph is undirected


In [2]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
import pyximport; pyximport.install()
import fennel
import numpy as np

UNMAPPED = -1

# reset
assignments = np.repeat(np.int32(UNMAPPED), G.number_of_nodes())
fixed = np.repeat(np.int32(UNMAPPED), G.number_of_nodes())

print("PREDICTION MODEL")
print("----------------\n")

if PREDICTION_MODEL:
    with open(PREDICTION_MODEL, "r") as inf:
        assignments = np.fromiter(inf.readlines(), dtype=np.int32)

else:
    assignments = fennel.generate_prediction_model(G, num_iterations, num_partitions, assignments, fixed, prediction_model_alpha)

x = shared.score(G, assignments)
print("WASTE\t\tCUT RATIO\tMISMATCH")
print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(G, assignments, num_partitions)

PREDICTION MODEL
----------------

WASTE		CUT RATIO	MISMATCH
0.00000		0.1224906431	360

Assignments:
[ 0  1  2  0  1  0  1  3  0  0  2  0  0  1  0  2  3  2  2  0  0  3  2  3  0  1  3  1  2  0  0  2  0  1  2  3  0  3  2  1  2  0  2  1  0  3  1  3  3  2  0  1  2  0  0  2  3  0  1  0  2  1  3  1  1  1  1  2  3  2  1  0  0  1  0  3  1  1  0  1  2  3  1  0  1  2  1  2  3  0  1  3  3  0  1  2  3  0  0  1  1  2  3  1  1  0  1  0  2  0  2  1  2  2  3  1  3  1  0  2  1  0  0  3  1  1  3  2  2  3  0  0  1  0  0  3  1  2  3  1  1  2  3  2  3  2  1  2  0  0  3  1  1  2  1  2  1  3  3  0  1  3  0  3  0  2  3  2  3  1  0  1  0  1  2  1  0  2  1  1  2  0  0  0  1  0  2  1  1  2  3  2  2  0  0  3  1  2  3  0  2  1  0  3  2  2  2  1  2  2  1  0  3  0  3  0  0  1  2  3  0  3  1  0  2  2  2  2  1  2  3  3  1  1  3  3  2  1  0  3  2  2  2  3  3  2  2  1  1  3  0  2  0  3  3  0  1  2  1  1  3  1  0  2  3  1  3  2  3  3  1  0  2  3  0  1  0  1  2  3  2  3  1  2  0  3  2  0  0  1  1  3  2  0  0  3  3  1  3  

In [4]:
if use_virtual_nodes:
    print("Creating virtual nodes and assigning edges based on prediction model")

    # create virtual nodes
    virtual_nodes = list(range(G.number_of_nodes(), G.number_of_nodes() + num_partitions))
    print("\nVirtual nodes:")

    # create virtual edges
    virtual_edges = []
    for n in range(0, G.number_of_nodes()):
        virtual_edges += [(n, virtual_nodes[assignments[n]])]

    # extend assignments
    assignments = np.append(assignments, np.array(list(range(0, num_partitions)), dtype=np.int32))
    fixed = np.append(fixed, np.array([1] * num_partitions, dtype=np.int32))

    G.add_nodes_from(virtual_nodes, weight=1)
    G.add_edges_from(virtual_edges, weight=virtual_edge_weight)

    print("\nAssignments:")
    shared.fixed_width_print(assignments)
    print("Last {} nodes are virtual nodes.".format(num_partitions))

In [5]:
cut_off_value = int(prediction_model_cut_off * G.number_of_nodes())
if prediction_model_cut_off == 0:
    print("Discarding prediction model\n")
else:
    print("Assign first {} arrivals using prediction model, then discard\n".format(cut_off_value))

# fix arrivals
nodes_arrived = []
for a in arrival_order:
    # check if node needs a shelter
    if simulated_arrival_list[a] == 0:
        continue

    # set 100% node weight for those that need a shelter
    if alter_arrived_node_weight_to_100:
        G.node[a]['weight'] = 100

    nodes_fixed = len([o for o in fixed if o == 1])
    if nodes_fixed >= cut_off_value:
        break
    fixed[a] = 1
    nodes_arrived.append(a)

# remove nodes not fixed, ie. discard prediction model
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("WASTE\t\tCUT RATIO\tMISMATCH")
x = shared.score(G, assignments, num_partitions)
print("{0:.5f}\t\t{1:.10f}\t{2}".format(x[0], x[1], x[2]))

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(G, assignments, num_partitions)

Assign first 100 arrivals using prediction model, then discard

WASTE		CUT RATIO	MISMATCH
0.02400		0.1861177271	547

Assignments:
[ 0  1  2  0  1  0  1  3  0  0  2  0  0  1  0  2  3  2  2  0  0  3  2  3  0  1  3  1  2  0  0  2  0  1  2  3  0  3  2  1  2  0  2  1  0  3  1  3  3  2  0  1  2  0  0  2  3  0  1  0  2  1  3  1  1  1  1  2  3  2  1  0  0  1  0  3  1  1  0  1  2  3  1  0  1  2  1  2  3  0  1  3  3  0  1  2  3  0  0  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

In [6]:
if restream_batches == 1:
    print("One-shot assignment mode")
    print("------------------------\n")
else:
    print("Assigning in batches of {}".format(restream_batches))
    print("--------------------------------\n")

def edge_expansion(G):
    # Update edge weights for nodes that have an assigned probability of displacement
    for edge in G.edges_iter(data=True):
        left = edge[0]
        right = edge[1]
        edge_weight = edge[2]['weight_orig']

        # new edge weight
        edge[2]['weight'] = (float(G.node[left]['weight']) * edge_weight) * (float(G.node[right]['weight']) * edge_weight)

        if left in nodes_arrived or right in nodes_arrived:
            # change the emphasis of the prediction model
            edge[2]['weight'] = edge[2]['weight'] * prediction_model_emphasis

    return G

# preserve original node/edge weight
if graph_modification_functions:
    node_weights = {n[0]: n[1]['weight'] for n in G.nodes_iter(data=True)}
    nx.set_node_attributes(G, 'weight_orig', node_weights)

    edge_weights = {(e[0], e[1]): e[2]['weight'] for e in G.edges_iter(data=True)}
    nx.set_edge_attributes(G, 'weight_orig', edge_weights)

    
batch_arrived = []
print("WASTE\t\tCUT RATIO\tMISMATCH\tALPHA")
for a in arrival_order:

    # check if node is already arrived
    if fixed[a] == 1:
        continue

    # GRAPH MODIFICATION FUNCTIONS
    if graph_modification_functions:

        # remove nodes that don't need a shelter
        if simulated_arrival_list[a] == 0:
            G.remove_node(a)
            continue
        
        # set 100% node weight for those that need a shelter
        if alter_arrived_node_weight_to_100:
            G.node[a]['weight'] = 100

    # one-shot assigment: assign each node as it arrives
    if restream_batches == 1:
        alpha = one_shot_alpha
        partition_votes = fennel.get_votes(G, a, num_partitions, assignments)
        assignments[a] = fennel.get_assignment(G, a, num_partitions, assignments, partition_votes, alpha, 0)
        fixed[a] = 1
        nodes_arrived.append(a)

        # make a subgraph of all arrived nodes
        Gsub = G.subgraph(nodes_arrived)

        x = shared.score(Gsub, assignments, num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        continue

    batch_arrived.append(a)

    if restream_batches == len(batch_arrived):

        # GRAPH MODIFICATION FUNCTIONS
        if graph_modification_functions:

            # set node weight to prediction generated from a GAM
            if alter_node_weight_to_gam_prediction and len(total_arrived) >= 160: # XXX implement >=
                total_arrived = nodes_arrived + batch_arrived + [a]
                gam_weights = shared.gam_predict(POPULATION_LOCATION_FILE, len(total_arrived))

                for node in G.nodes_iter():
                    if alter_arrived_node_weight_to_100 and node in total_arrived:
                        pass
                    else:
                        G.node[node]['weight'] = int(gam_weights[node] * 100)

            G = edge_expansion(G)

        # make a subgraph of all arrived nodes
        Gsub = G.subgraph(nodes_arrived + batch_arrived)

        # recalculate alpha
        if Gsub.is_directed():
            # as it's a directed graph, edges_arrived is actually double, so divide by 2
            edges_arrived = Gsub.number_of_edges() / 2
        else:
            edges_arrived = Gsub.number_of_edges()
        nodes_fixed = len([o for o in fixed if o == 1])
        alpha = (edges_arrived) * (num_partitions / (nodes_fixed + len(batch_arrived))**2)

        if alter_node_weight_to_gam_prediction:
            # justification: the gam learns the entire population, so run fennal on entire population
            assignments = fennel.generate_prediction_model(G,
                                                           num_iterations,
                                                           num_partitions,
                                                           assignments,
                                                           fixed,
                                                           alpha)
        else:
            # use the information we have, those that arrived
            assignments = fennel.generate_prediction_model(Gsub,
                                                           num_iterations,
                                                           num_partitions,
                                                           assignments,
                                                           fixed,
                                                           alpha)


        # assign nodes to prediction model
        for n in batch_arrived:
            fixed[n] = 1
            nodes_arrived.append(n)

        x = shared.score(Gsub, assignments, num_partitions)
        print("{0:.5f}\t\t{1:.10f}\t{2}\t\t{3:.10f}".format(x[0], x[1], x[2], alpha))
        batch_arrived = []

# remove nodes not fixed
for i in range(0, len(assignments)):
    if fixed[i] == -1:
        assignments[i] = -1

print("\nAssignments:")
shared.fixed_width_print(assignments)

nodes_fixed = len([o for o in fixed if o == 1])
print("\nFixed: {}".format(nodes_fixed))

shared.print_partitions(G, assignments, num_partitions)

Assigning in batches of 10
--------------------------------

WASTE		CUT RATIO	MISMATCH	ALPHA
0.01800		0.0000000000	0		0.0102479339
0.01200		0.0263157895	1		0.0105555556
0.01000		0.0232558140	1		0.0101775148
0.00800		0.0196078431	1		0.0104081633
0.00200		0.0151515152	1		0.0117333333
0.00400		0.0129870130	1		0.0120312500
0.00200		0.0232558140	2		0.0119031142
0.00400		0.0300000000	3		0.0123456790
0.00600		0.0438596491	5		0.0126315789
0.00000		0.0655737705	8		0.0122000000
0.00200		0.0724637681	10		0.0125170068
0.00000		0.0915032680	14		0.0126446281
0.00600		0.0931677019	15		0.0121739130
0.00000		0.0935672515	16		0.0118750000
0.00200		0.0944444444	17		0.0115200000
0.00000		0.0984455959	19		0.0114201183
0.00200		0.0931372549	19		0.0111934156
0.00000		0.0986547085	22		0.0113775510
0.00600		0.1152263374	28		0.0115576694
0.00400		0.1167315175	30		0.0114222222
0.01400		0.1163636364	32		0.0114464100
0.00400		0.1122112211	34		0.0118359375
0.00600		0.1086956522	35		0.0118273646
0.00800		0.106824925

In [7]:
if use_virtual_nodes:
    print("Remove virtual nodes")
    
    print("\nCurrent graph:")
    print("Nodes: {}".format(G.number_of_nodes()))
    print("Edges: {}".format(G.number_of_edges()))

    G.remove_nodes_from(virtual_nodes)
    assignments = np.delete(assignments, virtual_nodes)
    fixed = np.delete(fixed, virtual_nodes)

    print("\nVirtual nodes removed:")
    print("Nodes: {}".format(G.number_of_nodes()))
    print("Edges: {}".format(G.number_of_edges()))

In [8]:
# Add partition attribute to nodes
for i in range(0, len(assignments)):
    G.add_nodes_from([i], partition=str(assignments[i]))

# Remove original node/edge weights
for node in G.nodes_iter(data=True):
    if 'weight_orig' in node[1]:
        del node[1]['weight_orig']
for edge in G.edges_iter(data=True):
    if 'weight_orig' in edge[2]:
        del edge[2]['weight_orig']

# Freeze Graph from further modification
G = nx.freeze(G)

In [9]:
import os
import datetime

timestamp = datetime.datetime.now().strftime('%H%M%S')
data_filename,_ = os.path.splitext(os.path.basename(DATA_FILENAME))
data_filename += "-" + timestamp

graph_metrics = {
    "file": timestamp,
    "num_partitions": num_partitions,
    "num_iterations": num_iterations,
    "prediction_model_cut_off": prediction_model_cut_off,
    "one_shot_alpha": one_shot_alpha,
    "restream_batches": restream_batches,
    "use_virtual_nodes": use_virtual_nodes,
    "virtual_edge_weight": virtual_edge_weight,
}
graph_fieldnames = [
    "file",
    "num_partitions",
    "num_iterations",
    "prediction_model_cut_off",
    "one_shot_alpha",
    "restream_batches",
    "use_virtual_nodes",
    "virtual_edge_weight",
    "edges_cut",
    "waste",
    "cut_ratio",
    "communication_volume",
    "network_permanence",
    "Q",
    "NQ",
    "Qds",
    "intraEdges",
    "interEdges",
    "intraDensity",
    "modularity degree",
    "conductance",
    "expansion",
    "contraction",
    "fitness",
    "QovL",
]

print("Complete graph with {} nodes".format(G.number_of_nodes()))
(file_maxperm, file_oslom) = shared.write_graph_files(OUTPUT_DIRECTORY, "{}-all".format(data_filename), G)

# original scoring algorithm
scoring = shared.score(G, assignments, num_partitions)
graph_metrics.update({
    "waste": scoring[0],
    "cut_ratio": scoring[1],
})

# edges cut and communication volume
edges_cut, steps = shared.base_metrics(G)
graph_metrics.update({
    "edges_cut": edges_cut,
    "communication_volume": steps,
})

# MaxPerm
max_perm = shared.run_max_perm(file_maxperm)
graph_metrics.update({"network_permanence": max_perm})

# Community Quality metrics
community_metrics = shared.run_community_metrics(OUTPUT_DIRECTORY,
                                                 "{}-all".format(data_filename),
                                                 file_oslom)
graph_metrics.update(community_metrics)

print("\nConfig")
print("-------\n")
for f in graph_fieldnames[:8]:
    print("{}: {}".format(f, graph_metrics[f]))

print("\nMetrics")
print("-------\n")
for f in graph_fieldnames[8:]:
    print("{}: {}".format(f, graph_metrics[f]))

# write metrics to CSV
metrics_filename = os.path.join(OUTPUT_DIRECTORY, "metrics.csv")
shared.write_metrics_csv(metrics_filename, graph_fieldnames, graph_metrics)

Complete graph with 1000 nodes
Writing GML file: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-all-graph.gml
Writing assignments: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-all-assignments.txt
Writing edge list (for MaxPerm): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-all-edges-maxperm.txt
Writing edge list (for OSLOM): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-all-edges-oslom.txt

Config
-------

file: 091137
num_partitions: 4
num_iterations: 10
prediction_model_cut_off: 0.1
one_shot_alpha: 0.5
restream_batches: 10
use_virtual_nodes: False
virtual_edge_weight: 1.0

Metrics
-------

edges_cut: 571
waste: 0.03600000000000003
cut_ratio: 0.19428376998979244
communication_volume: 649
network_permanence: 0.368160
Q: -0.0012827457612951344
NQ: -71.34113442306348
Qds: -0.9999999999999944
intraEdges: 0.0
interEdges: 5.925403225806452
intraDensity

In [10]:
partition_metrics = {}
partition_fieldnames = [
    "file",
    "partition",
    "network_permanence",
    "Q",
    "NQ",
    "Qds",
    "intraEdges",
    "interEdges",
    "intraDensity",
    "modularity degree",
    "conductance",
    "expansion",
    "contraction",
    "fitness",
    "QovL",
]

for p in range(0, num_partitions):
    partition_metrics = {
        "file": timestamp,
        "partition": p
    }

    nodes = [i for i,x in enumerate(assignments) if x == p]
    Gsub = G.subgraph(nodes)
    print("\nPartition {} with {} nodes".format(p, Gsub.number_of_nodes()))
    print("-----------------------------\n")

    (file_maxperm, file_oslom) = shared.write_graph_files(OUTPUT_DIRECTORY, "{}-p{}".format(data_filename, p), Gsub)
    
    # MaxPerm
    max_perm = shared.run_max_perm(file_maxperm)
    partition_metrics.update({"network_permanence": max_perm})

    # Community Quality metrics
    community_metrics = shared.run_community_metrics(OUTPUT_DIRECTORY,
                                                     "{}-p{}".format(data_filename, p),
                                                     file_oslom)
    partition_metrics.update(community_metrics)

    print("\nMetrics")
    for f in partition_fieldnames:
        print("{}: {}".format(f, partition_metrics[f]))

    # write metrics to CSV
    metrics_filename = os.path.join(OUTPUT_DIRECTORY, "metrics-partitions.csv")
    shared.write_metrics_csv(metrics_filename, partition_fieldnames, partition_metrics)


Partition 0 with 245 nodes
-----------------------------

Writing GML file: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-p0-graph.gml
Writing assignments: /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-p0-assignments.txt
Writing edge list (for MaxPerm): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-p0-edges-maxperm.txt
Writing edge list (for OSLOM): /home/sami/repos/smbwebs/graph-partitioning/output/oneshot_fennel_weights-091137-p0-edges-oslom.txt

Metrics
file: 091137
partition: 0
network_permanence: 1.000000
Q: -0.005610837744881655
NQ: -20.033273788121495
Qds: -0.9999999999999989
intraEdges: 0.0
interEdges: 4.821576763485477
intraDensity: 0.0
modularity degree: -1162.0
conductance: 1.0
expansion: 4.821576763485477
contraction: 0.0
fitness: 0.0
QovL: -9.660366978670604E-8

Partition 1 with 259 nodes
-----------------------------

Writing GML file: /home/sami/repos/smbwebs/graph-p