In [1]:
import os
import csv
import platform
import pandas as pd
import networkx as nx
from graph_partitioning import GraphPartitioning, utils

run_metrics = True

cols = ["WASTE", "CUT RATIO", "EDGES CUT", "TOTAL COMM VOLUME", "MODULARITY", "LONELINESS", "NETWORK PERMANENCE", "NORM. MUTUAL INFO", "EDGE CUT WEIGHT", "FSCORE", "FSCORE RELABEL IMPROVEMENT"]
pwd = %pwd

config = {
    "DATA_FILENAME": os.path.join(pwd, "data", "predition_model_tests", "network", "rand_edge_weights", "network_1.txt"),
    #"DATA_FILENAME": os.path.join(pwd, "data", "predition_model_tests", "network", "network_1.txt"),
    "OUTPUT_DIRECTORY": os.path.join(pwd, "output"),

    # Set which algorithm is run for the PREDICTION MODEL.
    # Either: 'FENNEL' or 'SCOTCH'
    "PREDICTION_MODEL_ALGORITHM": "PATOH",

    # Alternativly, read input file for prediction model.
    # Set to empty to generate prediction model using algorithm value above.
    "PREDICTION_MODEL": "",

    "PARTITIONER_ALGORITHM": "PATOH",

    # File containing simulated arrivals. This is used in simulating nodes
    # arriving at the shelter. Nodes represented by line number; value of
    # 1 represents a node as arrived; value of 0 represents the node as not
    # arrived or needing a shelter.
    "SIMULATED_ARRIVAL_FILE": os.path.join(pwd,
                                           "data",
                                           "predition_model_tests",
                                           "dataset_1_shift_rotate",
                                           "simulated_arrival_list",
                                           "percentage_of_prediction_correct_100",
                                           "arrival_100_1.txt"
                                          ),
    
    # File containing the prediction of a node arriving. This is different to the
    # simulated arrivals, the values in this file are known before the disaster.
    "PREDICTION_LIST_FILE": os.path.join(pwd,
                                         "data",
                                         "predition_model_tests",
                                         "dataset_1_shift_rotate",
                                         "prediction_list",
                                         "prediction_1.txt"
                                        ),

    # File containing the geographic location of each node, in "x,y" format.
    "POPULATION_LOCATION_FILE": os.path.join(pwd,
                                             "data",
                                             "predition_model_tests",
                                             "coordinates",
                                             "coordinates_1.txt"
                                            ),

    # Number of shelters
    "num_partitions": 4,

    # The number of iterations when making prediction model
    "num_iterations": 1,

    # Percentage of prediction model to use before discarding
    # When set to 0, prediction model is discarded, useful for one-shot
    "prediction_model_cut_off": 1.0,

    # Alpha value used in one-shot (when restream_batches set to 1)
    "one_shot_alpha": 0.5,

    # Number of arrivals to batch before recalculating alpha and restreaming.
    # When set to 1, one-shot is used with alpha value from above
    "restream_batches": 1000,

    # When the batch size is reached: if set to True, each node is assigned
    # individually as first in first out. If set to False, the entire batch
    # is processed and empty before working on the next batch.
    "sliding_window": False,

    # Create virtual nodes based on prediction model
    "use_virtual_nodes": False,

    # Virtual nodes: edge weight
    "virtual_edge_weight": 1.0,

    # Loneliness score parameter. Used when scoring a partition by how many
    # lonely nodes exist.
    "loneliness_score_param": 1.2,

    ####
    # GRAPH MODIFICATION FUNCTIONS

    # Also enables the edge calculation function.
    "graph_modification_functions": True,

    # If set, the node weight is set to 100 if the node arrives at the shelter,
    # otherwise the node is removed from the graph.
    "alter_arrived_node_weight_to_100": False,

    # Uses generalized additive models from R to generate prediction of nodes not
    # arrived. This sets the node weight on unarrived nodes the the prediction
    # given by a GAM.
    # Needs POPULATION_LOCATION_FILE to be set.
    "alter_node_weight_to_gam_prediction": False,

    # Enables edge expansion when graph_modification_functions is set to true
    "edge_expansion_enabled": True,

    # The value of 'k' used in the GAM will be the number of nodes arrived until
    # it reaches this max value.
    "gam_k_value": 100,

    # Alter the edge weight for nodes that haven't arrived. This is a way to
    # de-emphasise the prediction model for the unknown nodes.
    "prediction_model_emphasis": 1.0,
    
    # This applies the prediction_list_file node weights onto the nodes in the graph
    # when the prediction model is being computed and then removes the weights
    # for the cutoff and batch arrival modes
    "apply_prediction_model_weights": True,

    "SCOTCH_LIB_PATH": os.path.join(pwd, "libs/scotch/macOS/libscotch.dylib")
    if 'Darwin' in platform.system()
    else "/usr/local/lib/libscotch.so",
    
    # Path to the PaToH shared library
    "PATOH_LIB_PATH": os.path.join(pwd, "libs/patoh/lib/macOS/libpatoh.dylib")
    if 'Darwin' in platform.system()
    else os.path.join(pwd, "libs/patoh/lib/linux/libpatoh.so"),
    
    "PATOH_ITERATIONS": 5,
        
    # Expansion modes: 'no_expansion', 'avg_node_weight', 'total_node_weight', 'smallest_node_weight'
    # 'largest_node_weight', 'product_node_weight'
    # add '_squared' or '_sqrt' at the end of any of the above for ^2 or sqrt(weight)
    # add '_complete' for applying the complete algorithm
    #    for hyperedge with weights: A, B, C, D
    #      new weights are computed
    #       (A*B)^2 = H0
    #       (A*C)^2 = H1, ... Hn-1
    #      then normal hyperedge expansion computed on H0...Hn-1
    # i.e. 'avg_node_weight_squared
    "PATOH_HYPEREDGE_EXPANSION_MODE": 'total_node_weight_sqrt_complete',

    # Alters how much information to print. Keep it at 1 for this notebook.
    # 0 - will print nothing, useful for batch operations.
    # 1 - prints basic information on assignments and operations.
    # 2 - prints more information as it batches arrivals.
    "verbose": 1
}

#gp = GraphPartitioning(config)

# Optional: shuffle the order of nodes arriving
# Arrival order should not be shuffled if using GAM to alter node weights
#random.shuffle(gp.arrival_order)

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
#gp.load_network()

In [3]:
iterations = 50
#modes = ['largest_node_weight_complete']
#modes = ['avg_node_weight_complete', 'total_node_weight_complete', 'smallest_node_weight_complete','largest_node_weight_complete']
modes = ['no_expansion']

for mode in modes:

    metricsDataPrediction = []
    metricsDataAssign = []

    config['PATOH_HYPEREDGE_EXPANSION_MODE'] = mode

    for i in range(0, iterations):
        print('Mode', mode, 'Iteration', str(i))
        gp = GraphPartitioning(config)
        gp.verbose = 0
        gp.load_network()
        gp.init_partitioner()

        m = gp.prediction_model()
        metricsDataPrediction.append(m[0])

        m = gp.assign_cut_off()
        metricsDataAssign.append(m[0])

    ec = ''
    tcv = ''
    ecB = ''
    tcvB = ''

    dataEC = []
    dataTCV = [] 

    dataECB = []
    dataTCVB = [] 

    import scipy

    for i in range(0, iterations):
        dataEC.append(metricsDataPrediction[i][2])
        dataTCV.append(metricsDataPrediction[i][3])
        dataECB.append(metricsDataAssign[i][2])
        dataTCVB.append(metricsDataAssign[i][3])

        if(len(ec)):
            ec = ec + ','
        ec = ec + str(metricsDataPrediction[i][2])
        if(len(tcv)):
            tcv = tcv + ','
        tcv = tcv + str(metricsDataPrediction[i][3])

        if(len(ecB)):
            ecB = ecB + ','
        ecB = ecB + str(metricsDataAssign[i][2])
        if(len(tcvB)):
            tcvB = tcvB + ','
        tcvB = tcvB + str(metricsDataAssign[i][3])

    ec = 'EC_PM,' + config['PATOH_HYPEREDGE_EXPANSION_MODE'] + ',' + str(scipy.mean(dataEC)) + ',' + str(scipy.std(dataEC)) + ',' + ec
    tcv = 'TCV_PM,' + config['PATOH_HYPEREDGE_EXPANSION_MODE'] + ',' + str(scipy.mean(dataTCV)) + ',' + str(scipy.std(dataTCV)) + ',' + tcv
    ecB = 'EC_BM,' + config['PATOH_HYPEREDGE_EXPANSION_MODE'] + ',' + str(scipy.mean(dataECB)) + ',' + str(scipy.std(dataECB)) + ',' + ecB
    tcvB = 'TCV_BM,' + config['PATOH_HYPEREDGE_EXPANSION_MODE'] + ',' + str(scipy.mean(dataTCVB)) + ',' + str(scipy.std(dataTCVB)) + ',' + tcvB

    print(ec)
    print(tcv)
    print(ecB)
    print(tcvB)

    

Mode no_expansion Iteration 0
Mode no_expansion Iteration 1
Mode no_expansion Iteration 2
Mode no_expansion Iteration 3
Mode no_expansion Iteration 4
Mode no_expansion Iteration 5
Mode no_expansion Iteration 6
Mode no_expansion Iteration 7
Mode no_expansion Iteration 8
Mode no_expansion Iteration 9
Mode no_expansion Iteration 10
Mode no_expansion Iteration 11
Mode no_expansion Iteration 12
Mode no_expansion Iteration 13
Mode no_expansion Iteration 14
Mode no_expansion Iteration 15
Mode no_expansion Iteration 16
Mode no_expansion Iteration 17
Mode no_expansion Iteration 18
Mode no_expansion Iteration 19
Mode no_expansion Iteration 20
Mode no_expansion Iteration 21
Mode no_expansion Iteration 22
Mode no_expansion Iteration 23
Mode no_expansion Iteration 24
Mode no_expansion Iteration 25
Mode no_expansion Iteration 26
Mode no_expansion Iteration 27
Mode no_expansion Iteration 28
Mode no_expansion Iteration 29
Mode no_expansion Iteration 30
Mode no_expansion Iteration 31
Mode no_expansion 

In [None]:
print(gp.PATOH_HYPEREDGE_EXPANSION_MODE)

In [None]:
gp.init_partitioner()

In [None]:
m = gp.prediction_model()

In [None]:
rows = list(range(1, len(m)+1))
df = pd.DataFrame(m, index=rows, columns=cols)
print(df)

In [None]:
m = gp.assign_cut_off()

In [None]:
rows = list(range(1, len(m)+1))
df = pd.DataFrame(m, index=rows, columns=cols)
print(df)

In [None]:
m = gp.batch_arrival()

In [None]:
rows = list(range(1, len(m)+1))
df = pd.DataFrame(m, index=rows, columns=cols).astype(float)
print(df)

if len(df) > 1:
    df.plot(y=['EDGES CUT', 'TOTAL COMM VOLUME'], xticks=rows, figsize=(5,4))

    fig, axs = plt.subplots(1,6)
    df.plot(y=['CUT RATIO'], title='Cut ratio', xticks=rows, figsize=(12,2), legend=False, ax=axs[0])
    df.plot(y=['MODULARITY'], title='Modularity', xticks=rows, figsize=(12,2), legend=False, ax=axs[1])
    df.plot(y=['LONELINESS'], title='Loneliness', xticks=rows, figsize=(12,2), legend=False, ax=axs[2])
    df.plot(y=['NETWORK PERMANENCE'], title='Network permanence', xticks=rows, figsize=(12,2), legend=False, ax=axs[3])
    df.plot(y=['NORM. MUTUAL INFO'], title='Norm. Mutual Info', xticks=rows, figsize=(12,2), legend=False, ax=axs[4])
    df.plot(y=['FSCORE'], title='Fscore', xticks=rows, figsize=(12,2), legend=False, ax=axs[5])


else:
    print("\n\nNot enough data points to plot charts. There is only one row.")

## 