In [9]:
import os
import csv
import platform
import pandas as pd
import networkx as nx
from graph_partitioning import GraphPartitioning, utils

run_metrics = True

cols = ["WASTE", "CUT RATIO", "EDGES CUT", "TOTAL COMM VOLUME", "Qds", "CONDUCTANCE", "MAXPERM", "RBSE", "NMI", "FSCORE", "FSCORE RELABEL IMPROVEMENT", "LONELINESS"]

pwd = %pwd

config = {

    "DATA_FILENAME": os.path.join(pwd, "data", "predition_model_tests", "network", "network_$$.txt"),
    "OUTPUT_DIRECTORY": os.path.join(pwd, "output"),

    # Set which algorithm is run for the PREDICTION MODEL.
    # Either: 'FENNEL' or 'SCOTCH'
    "PREDICTION_MODEL_ALGORITHM": "FENNEL",

    # Alternativly, read input file for prediction model.
    # Set to empty to generate prediction model using algorithm value above.
    "PREDICTION_MODEL": "",

    
    "PARTITIONER_ALGORITHM": "FENNEL",

    # File containing simulated arrivals. This is used in simulating nodes
    # arriving at the shelter. Nodes represented by line number; value of
    # 1 represents a node as arrived; value of 0 represents the node as not
    # arrived or needing a shelter.
    "SIMULATED_ARRIVAL_FILE": os.path.join(pwd,
                                           "data",
                                           "predition_model_tests",
                                           "dataset_1_shift_rotate",
                                           "simulated_arrival_list",
                                           "percentage_of_prediction_correct_££",
                                           "arrival_££_$$.txt"
                                          ),

    # File containing the prediction of a node arriving. This is different to the
    # simulated arrivals, the values in this file are known before the disaster.
    "PREDICTION_LIST_FILE": os.path.join(pwd,
                                         "data",
                                         "predition_model_tests",
                                         "dataset_1_shift_rotate",
                                         "prediction_list",
                                         "prediction_$$.txt"
                                        ),

    # File containing the geographic location of each node, in "x,y" format.
    "POPULATION_LOCATION_FILE": os.path.join(pwd,
                                             "data",
                                             "predition_model_tests",
                                             "coordinates",
                                             "coordinates_$$.txt"
                                            ),

    # Number of shelters
    "num_partitions": 4,

    # The number of iterations when making prediction model
    "num_iterations": 1,

    # Percentage of prediction model to use before discarding
    # When set to 0, prediction model is discarded, useful for one-shot
    "prediction_model_cut_off": .0,

    # Alpha value used in one-shot (when restream_batches set to 1)
    "one_shot_alpha": 0.5,
    
    "use_one_shot_alpha" : False,
    
    # Number of arrivals to batch before recalculating alpha and restreaming.
    "restream_batches": 50,

    # When the batch size is reached: if set to True, each node is assigned
    # individually as first in first out. If set to False, the entire batch
    # is processed and empty before working on the next batch.
    "sliding_window": False,

    # Create virtual nodes based on prediction model
    "use_virtual_nodes": False,

    # Virtual nodes: edge weight
    "virtual_edge_weight": 1.0,
    
    # Loneliness score parameter. Used when scoring a partition by how many
    # lonely nodes exist.
    "loneliness_score_param": 1.2,


    ####
    # GRAPH MODIFICATION FUNCTIONS

    # Also enables the edge calculation function.
    "graph_modification_functions": True,

    # If set, the node weight is set to 100 if the node arrives at the shelter,
    # otherwise the node is removed from the graph.
    "alter_arrived_node_weight_to_100": False,

    # Uses generalized additive models from R to generate prediction of nodes not
    # arrived. This sets the node weight on unarrived nodes the the prediction
    # given by a GAM.
    # Needs POPULATION_LOCATION_FILE to be set.
    "alter_node_weight_to_gam_prediction": False,
    
    # Enables edge expansion when graph_modification_functions is set to true
    "edge_expansion_enabled": True,

    # The value of 'k' used in the GAM will be the number of nodes arrived until
    # it reaches this max value.
    "gam_k_value": 100,

    # Alter the edge weight for nodes that haven't arrived. This is a way to
    # de-emphasise the prediction model for the unknown nodes.
    "prediction_model_emphasis": 1.0,
    
    # This applies the prediction_list_file node weights onto the nodes in the graph
    # when the prediction model is being computed and then removes the weights
    # for the cutoff and batch arrival modes
    "apply_prediction_model_weights": True,

    "SCOTCH_LIB_PATH": os.path.join(pwd, "libs/scotch/macOS/libscotch.dylib")
    if 'Darwin' in platform.system()
    else "/usr/local/lib/libscotch.so",
    
    # Path to the PaToH shared library
    "PATOH_LIB_PATH": os.path.join(pwd, "libs/patoh/lib/macOS/libpatoh.dylib")
    if 'Darwin' in platform.system()
    else os.path.join(pwd, "libs/patoh/lib/linux/libpatoh.so"),
    
    "PATOH_ITERATIONS": 5,
        
    # Expansion modes: 'avg_node_weight', 'total_node_weight', 'smallest_node_weight'
    # 'largest_node_weight'
    # add '_squared' or '_sqrt' at the end of any of the above for ^2 or sqrt(weight)
    # i.e. 'avg_node_weight_squared
    "PATOH_HYPEREDGE_EXPANSION_MODE": 'no_expansion',
    
    # Edge Expansion: average, total, minimum, maximum, product, product_squared, sqrt_product
    "EDGE_EXPANSION_MODE" : 'total',
    
    # Whether nodes should be reordered using a centrality metric for optimal node assignments in batch mode
    # This is specific to FENNEL and at the moment Leverage Centrality is used to compute new noder orders
    "FENNEL_NODE_REORDERING_ENABLED": False,
    
    # Whether the Friend of a Friend scoring system is active during FENNEL partitioning.
    # FOAF employs information about a node's friends to determine the best partition when
    # this node arrives at a shelter and no shelter has friends already arrived
    "FENNEL_FRIEND_OF_A_FRIEND_ENABLED": False,
    
    # Alters how much information to print. Keep it at 1 for this notebook.
    # 0 - will print nothing, useful for batch operations.
    # 1 - prints basic information on assignments and operations.
    # 2 - prints more information as it batches arrivals.
    "verbose": 1
}

gp = GraphPartitioning(config)

# Optional: shuffle the order of nodes arriving
# Arrival order should not be shuffled if using GAM to alter node weights
#random.shuffle(gp.arrival_order)

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [10]:
import scipy
from copy import deepcopy
import time

iterations = 100

pm_cutoff = []
for i in range(0, 21):
    pm_cutoff.append(i * 0.05)

#virtual_edge_weight = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0]

for mode in pm_cutoff:

    metricsDataPrediction = []
    metricsDataAssign = []
    
    dataQdsOv = []
    dataCondOv = []

    # batches of 50 - use 10 restreaming iterations on FENNEL
    config['num_iterations'] = 10
        
    config['prediction_model_cut_off'] = mode
    #config['alter_node_weight_to_gam_prediction'] = False
    #config["use_virtual_nodes"] = True

    # Virtual nodes: edge weight
    #config["virtual_edge_weight"] = mode


    print('Mode', mode)
    elapsed_times = []
    for i in range(0, iterations):
        # how many networks
        if (i % 20) == 0:
            print('Mode', mode, 'Iteration', str(i))
        
        conf = deepcopy(config)

        correctedness = 80
        
        conf["DATA_FILENAME"] = conf["DATA_FILENAME"].replace('$$', str(i + 1))
        
        conf["SIMULATED_ARRIVAL_FILE"] = conf["SIMULATED_ARRIVAL_FILE"].replace('$$', str(i + 1))
        conf["SIMULATED_ARRIVAL_FILE"] = conf["SIMULATED_ARRIVAL_FILE"].replace('££', str(correctedness))

        conf["PREDICTION_LIST_FILE"] = conf["PREDICTION_LIST_FILE"].replace('$$', str(i + 1))
        conf["POPULATION_LOCATION_FILE"] = conf["POPULATION_LOCATION_FILE"].replace('$$', str(i + 1))


        with GraphPartitioning(conf) as gp:
            gp.verbose = 0
            
            start_time = time.time()
            
            gp.load_network()
            gp.init_partitioner()

            m = gp.prediction_model()
            m = gp.assign_cut_off()
            m = gp.batch_arrival()

            elapsed_time = time.time() - start_time
            elapsed_times.append(elapsed_time)
            
            totalM = len(m)
            metricsDataPrediction.append(m[totalM - 1])
    
    print('Average Elapsed Time =', scipy.mean(elapsed_times))

    waste = ''
    cutratio = ''
    ec = ''
    tcv = ''
    qds = ''
    conductance = ''
    maxperm = ''
    rbse = ''
    nmi = ''
    lonliness = ''
    fscore = ''
    fscoreimprove = ''
        
    qdsOv = ''
    condOv = ''

    dataWaste = []
    dataCutRatio = []
    dataEC = []
    dataTCV = [] 
    dataQDS = []
    dataCOND = []
    dataMAXPERM = []
    dataRBSE = []
    dataNMI = []
    dataLonliness = []
    dataFscore = []
    dataFscoreImprove = []

    
    for i in range(0, iterations):
        dataWaste.append(metricsDataPrediction[i][0])        
        dataCutRatio.append(metricsDataPrediction[i][1])
        dataEC.append(metricsDataPrediction[i][2])
        dataTCV.append(metricsDataPrediction[i][3])
        dataQDS.append(metricsDataPrediction[i][4])
        dataCOND.append(metricsDataPrediction[i][5])
        dataMAXPERM.append(metricsDataPrediction[i][6])
        dataRBSE.append(metricsDataPrediction[i][7])
        dataNMI.append(metricsDataPrediction[i][8])        
        dataFscore.append(metricsDataPrediction[i][9])        
        dataFscoreImprove.append(metricsDataPrediction[i][10])        
        dataLonliness.append(metricsDataPrediction[i][11])


        if(len(waste)):
            waste = waste + ','
        waste = waste + str(metricsDataPrediction[i][0])

        if(len(cutratio)):
            cutratio = cutratio + ','
        cutratio = cutratio + str(metricsDataPrediction[i][1])

        if(len(ec)):
            ec = ec + ','
        ec = ec + str(metricsDataPrediction[i][2])
        
        if(len(tcv)):
            tcv = tcv + ','
        tcv = tcv + str(metricsDataPrediction[i][3])

        if(len(qds)):
            qds = qds + ','
        qds = qds + str(metricsDataPrediction[i][4])

        if(len(conductance)):
            conductance = conductance + ','
        conductance = conductance + str(metricsDataPrediction[i][5])

        if(len(maxperm)):
            maxperm = maxperm + ','
        maxperm = maxperm + str(metricsDataPrediction[i][6])

        if(len(rbse)):
            rbse = rbse + ','
        rbse = rbse + str(metricsDataPrediction[i][7])


        
        if(len(nmi)):
            nmi = nmi + ','
        nmi = nmi + str(metricsDataPrediction[i][8])

        if(len(fscore)):
            fscore = fscore + ','
        fscore = fscore + str(metricsDataPrediction[i][9])

        if(len(fscoreimprove)):
            fscoreimprove = fscoreimprove + ','
        fscoreimprove = fscoreimprove + str(metricsDataPrediction[i][10])
        
        if(len(lonliness)):
            lonliness = lonliness + ','
        lonliness = lonliness + str(dataLonliness[i])
        
    '''
    waste = 'WASTE,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataWaste)) + ',' + str(scipy.std(dataWaste)) + ',' + waste

    cutratio = 'CUT_RATIO,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataCutRatio)) + ',' + str(scipy.std(dataCutRatio)) + ',' + cutratio
    ec = 'EC,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataEC)) + ',' + str(scipy.std(dataEC)) + ',' + ec
    tcv = 'TCV,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataTCV)) + ',' + str(scipy.std(dataTCV)) + ',' + tcv

    lonliness = "LONELINESS," + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataLonliness)) + ',' + str(scipy.std(dataLonliness)) + ',' + lonliness

    qds = 'QDS,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataQDS)) + ',' + str(scipy.std(dataQDS)) + ',' + qds
    conductance = 'CONDUCTANCE,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataCOND)) + ',' + str(scipy.std(dataCOND)) + ',' + conductance
    maxperm = 'MAXPERM,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataMAXPERM)) + ',' + str(scipy.std(dataMAXPERM)) + ',' + maxperm
    rbse = 'RBSE,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataRBSE)) + ',' + str(scipy.std(dataRBSE)) + ',' + rbse

    nmi = 'NMI,' + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataNMI)) + ',' + str(scipy.std(dataNMI)) + ',' + nmi

    fscore = "FSCORE," + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataFscore)) + ',' + str(scipy.std(dataFscore)) + ',' + fscore
    fscoreimprove = "FSCORE_IMPROVE," + 'virtual_e_w_' + str(int(10 * config['virtual_edge_weight'])) + ',' + str(scipy.mean(dataFscoreImprove)) + ',' + str(scipy.std(dataFscoreImprove)) + ',' + fscoreimprove
    
    '''
    waste = 'WASTE,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataWaste)) + ',' + str(scipy.std(dataWaste)) + ',' + waste

    cutratio = 'CUT_RATIO,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataCutRatio)) + ',' + str(scipy.std(dataCutRatio)) + ',' + cutratio
    ec = 'EC,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataEC)) + ',' + str(scipy.std(dataEC)) + ',' + ec
    tcv = 'TCV,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataTCV)) + ',' + str(scipy.std(dataTCV)) + ',' + tcv

    lonliness = "LONELINESS," + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataLonliness)) + ',' + str(scipy.std(dataLonliness)) + ',' + lonliness

    qds = 'QDS,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataQDS)) + ',' + str(scipy.std(dataQDS)) + ',' + qds
    conductance = 'CONDUCTANCE,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataCOND)) + ',' + str(scipy.std(dataCOND)) + ',' + conductance
    maxperm = 'MAXPERM,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataMAXPERM)) + ',' + str(scipy.std(dataMAXPERM)) + ',' + maxperm
    rbse = 'RBSE,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataRBSE)) + ',' + str(scipy.std(dataRBSE)) + ',' + rbse

    nmi = 'NMI,' + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataNMI)) + ',' + str(scipy.std(dataNMI)) + ',' + nmi

    fscore = "FSCORE," + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataFscore)) + ',' + str(scipy.std(dataFscore)) + ',' + fscore
    fscoreimprove = "FSCORE_IMPROVE," + 'pm_cutoff_' + str(int(100 * config['prediction_model_cut_off'])) + ',' + str(scipy.mean(dataFscoreImprove)) + ',' + str(scipy.std(dataFscoreImprove)) + ',' + fscoreimprove
    ''''''

    print(cutratio)
    print(ec)
    print(tcv)
    print(lonliness)
    print(qds)
    print(conductance)
    print(maxperm)
    print(rbse)
    print(nmi)
    print(fscore)
    print(fscoreimprove)

Mode 0.0
Mode 0.0 Iteration 0
Mode 0.0 Iteration 20
Mode 0.0 Iteration 40
Mode 0.0 Iteration 60
Mode 0.0 Iteration 80
Average Elapsed Time = 17.9519121361
CUT_RATIO,pm_cutoff_0,0.177430170804,0.0240368275651,0.173310225303,0.18044077135,0.1984,0.192358366271,0.187898089172,0.189723320158,0.17472698908,0.189306358382,0.179381443299,0.13058419244,0.174174174174,0.173469387755,0.199351701783,0.172789115646,0.132192846034,0.209090909091,0.181208053691,0.156302521008,0.157514450867,0.158478605388,0.156773211568,0.196825396825,0.224318658281,0.206060606061,0.143270622287,0.191616766467,0.164102564103,0.1703125,0.206654991243,0.186170212766,0.225757575758,0.1696,0.168384879725,0.154471544715,0.158016147636,0.163297045101,0.168302945302,0.190036900369,0.177215189873,0.179620034542,0.182061579652,0.222222222222,0.156794425087,0.185964912281,0.14623655914,0.167872648336,0.181311018131,0.16577540107,0.17130620985,0.124361158433,0.157980456026,0.183976261128,0.182948490231,0.170984455959,0.2030769

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Mode 0.05 Iteration 20


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Mode 0.05 Iteration 40


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Mode 0.05 Iteration 60
Mode 0.05 Iteration 80


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Average Elapsed Time = 15.7166816688
CUT_RATIO,pm_cutoff_5,0.169772297911,0.0277002330833,0.197573656846,0.219008264463,0.1584,0.177865612648,0.194267515924,0.189723320158,0.170046801872,0.14450867052,0.164948453608,0.164948453608,0.204204204204,0.195578231293,0.189627228525,0.141496598639,0.143079315708,0.141818181818,0.159395973154,0.18487394958,0.17774566474,0.164817749604,0.155251141553,0.242857142857,0.199161425577,0.177272727273,0.150506512301,0.197604790419,0.131623931624,0.1796875,0.138353765324,0.166666666667,0.186363636364,0.1936,0.180412371134,0.159349593496,0.145328719723,0.225505443235,0.182328190743,0.208487084871,0.169303797468,0.141623488774,0.151271753681,0.168209876543,0.160278745645,0.166666666667,0.0795698924731,0.172214182344,0.238493723849,0.179144385027,0.156316916488,0.163543441227,0.14983713355,0.170623145401,0.207815275311,0.160621761658,0.215384615385,0.174438687392,0.193700787402,0.194486983155,0.152108433735,0.162686567164,0.137614678899,0.147826086957,0.18

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Mode 0.1 Iteration 20
Mode 0.1 Iteration 40
Mode 0.1 Iteration 60
Mode 0.1 Iteration 80
Average Elapsed Time = 16.4379676962
CUT_RATIO,pm_cutoff_10,0.150207121467,0.0233565777264,0.175043327556,0.192837465565,0.1488,0.177865612648,0.148089171975,0.140316205534,0.13104524181,0.125722543353,0.173195876289,0.0962199312715,0.162162162162,0.207482993197,0.192868719611,0.144217687075,0.158631415241,0.165454545455,0.139261744966,0.183193277311,0.14450867052,0.10618066561,0.132420091324,0.174603174603,0.121593291405,0.140909090909,0.163531114327,0.194610778443,0.14358974359,0.1875,0.15761821366,0.170212765957,0.174242424242,0.1264,0.166666666667,0.108943089431,0.140715109573,0.143079315708,0.136044880785,0.147601476015,0.186708860759,0.131260794473,0.132530120482,0.12962962963,0.156794425087,0.152631578947,0.12688172043,0.112879884226,0.163179916318,0.145721925134,0.141327623126,0.151618398637,0.133550488599,0.137982195846,0.197158081705,0.13298791019,0.156923076923,0.162348877375,0.1023622047