In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import scipy.stats as stats
from scipy.stats import mannwhitneyu
import os
from fuzzywuzzy import process



```df``` is the dataframe with the columns:
- ```graphname```: the name of the graph (24 graphs in total)
- ```vertices```: the number of vertices in the graph  
- ```edges```: the number of edges in the graph
- ```MDG```: result of the Maximum Degree (MDG) algorithm
- ```reverseMDS```: result of the Minimum Degree (reverseMDG) algorithm
- ```bestBRKGA```: best result of the BRKGA algorithm
- ```avgBRKGA```: average result of the BRKGA algorithm
- ```BRKGAtime```: average time of the BRKGA algorithm (we didn't use this column in the paper)
- ```bestBRKGArev```: best result of the BRKGArev algorithm 
- ```avgBRKGArev```: average result of the BRKGArev algorithm 
- ```bestfastBRKGA```: best result of the fastBRKGA algorithm 
- ```avgfastBRKGA```: average result of the fastBRKGA algorithm 
- ```fastBRKGAtime```: average time of the fastBRKGA algorithm (we didn't use this column in the paper)
- ```bestfastBRKGArev```: best result of the fastBRKGArev algorithm
- ```avgfastBRKGArev```: average result of the fastBRKGArev algorithm
- ```bestMMAS```: best result of the MMAS algorithm
- ```avgMMAS```: average result of the MMAS algorithm
- ```bestMMASlearn```: best result of the MMASlearn algorithm 
- ```avgMMASlearn```: average result of the MMASlearn algorithm

Note that all data about MMAS and MMASlearn algorithms are taken from López Serrano and Blum paper. Except from MDG and reverseMDG which are deterministic algorithms, all other algorithms are stochastic. Therefore, we run each algorithm 10 times and take the best and average results.

In [15]:
# save df to csv
df = pd.read_csv('results.csv')
display(df)

Unnamed: 0,graphname,vertices,edges,MDG,reverseMDG,bestBRKGA,avgBRKGA,BRKGAtime,bestBRKGArev,avgBRKGArev,bestfastBRKGA,avgfastBRKGA,fastBRKGAtime,bestfastBRKGArev,avgfastBRKGArev,bestMMAS,avgMMAS,bestMMASlearn,avgMMASlearn
0,Dolphins,62,159,8,7,6,6.0,0.01,6,6.0,6,6.0,0.01,6,6.0,6,6.0,6,6.0
1,Football,115,613,31,28,22,23.4,10.2,22,23.4,22,23.5,21.3,22,23.5,23,23.0,22,23.0
2,Karate,34,78,3,3,3,3.0,0.01,3,3.0,3,3.0,0.01,3,3.0,3,3.0,3,3.0
3,Jazz,198,2742,29,24,20,21.1,12.3,20,21.1,20,21.1,5.8,20,21.1,20,20.0,20,20.0
4,CA-AstroPh,18772,198050,1638,1381,1428,1438.0,162.2,1375,1384.9,1427,1438.4,176.3,1375,1385.6,1405,1412.5,1405,1413.0
5,CA-GrQc,5242,14484,1031,889,928,930.7,51.3,891,895.7,924,930.6,52.1,892,897.0,898,900.1,897,899.4
6,CA-HepPh,12008,118489,1524,1257,1343,1347.8,102.9,1280,1286.0,1338,1349.2,84.9,1278,1285.7,1289,1297.2,1289,1298.4
7,CA-HepTh,9877,25973,1388,1154,1234,1242.4,88.2,1155,1160.2,1237,1242.4,82.3,1157,1160.6,1179,1186.2,1182,1189.2
8,CA-CondMat,23133,93439,2933,2326,2563,2592.8,225.8,2350,2360.4,2580,2599.5,226.2,2354,2364.1,2416,2422.3,2419,2428.0
9,Email-Enron,36692,183831,2882,2676,2670,2679.1,328.9,2635,2643.5,2664,2671.7,334.0,2633,2639.1,2679,2686.0,2692,2699.4


In [16]:
def extractBRKGA(filename):
    """
    Extracts result, generation and time from BRKGA output file (BRKGA.txt)
    """
    lines = open(filename, "r").readlines()
    res = []
    for line in lines:
        x = line.split()
        if x[0] == "Finish":
            time, result, generation = float(x[1]), int(x[2]), int(x[3])
            res.append(result)
    # return just the result of each run
    return res

def extractReverse(filename):
    """
    Extracts result from BRKGArev output file (BRKGAreverse.txt)
    """
    lines = open(filename, "r").readlines()
    res = []
    for line in lines:
        x = line.split()
        result = int(x[0])
        res.append(result)
    # return the result of each run
    return res
    

BRKGA = extractBRKGA("benchmark/amazon0505/BRKGA.txt")
fastBRKGA = extractBRKGA("benchmark/amazon0505/fastBRKGA.txt")
BRKGArev = extractReverse("benchmark/amazon0505/BRKGAreverse.txt")
fastBRKGArev = extractReverse("benchmark/amazon0505/fastBRKGAreverse.txt")
MMAS_learn = [26801, 26871.2]
MMAS = [26945, 27000.9]
print(BRKGA)
print(BRKGArev)
# test statistic
U1, p = mannwhitneyu(MMAS_learn, BRKGArev, alternative="greater")
p

[31763, 31780, 31776, 31768, 31799, 31786, 31784, 31789, 31770, 31770]
[24125, 24134, 24144, 24170, 24115, 24135, 24127, 24128, 24132, 24136]


0.015151515151515152

In [17]:
def get_closest_match(query, choices):
    result, score = process.extractOne(query, choices)
    return result
# go through all files in the directory benchmark
directory = "benchmark/"
files = os.listdir(directory)

# create dataframe with column graphname, p-value of BRKGA+rev and MMASlearn
dfres = pd.DataFrame(columns=['graphname', 'p-value'])

algo1 = "fastBRKGAreverse"
algo2 = "MMASlearn"

for f in files:
    # if directory + f + "/BRKGAreverse.txt" is not a file, skip
    filepath = directory + f + "/" + algo1 + ".txt"
    if not os.path.isfile(filepath): continue
    BRKGArev = extractReverse(filepath)
    # match f with closest graphname in dataframe
    closest_match = get_closest_match(f, df["graphname"].to_list())
    if f == "fb":   closest_match = "ego-facebook"
   
    # extract MMASlearn from dataframe
    MMAS_learn = df[df["graphname"] == closest_match]["best" + algo2], df[df["graphname"] == closest_match]["avg" + algo2]
    MMAS_learn = MMAS_learn[0].to_list() + MMAS_learn[1].to_list()
    
    # test statistic, p-value = mannwhitneyu(BRKGA, MMAS_learn)
    U1, p = mannwhitneyu(BRKGArev, MMAS_learn, alternative="less")
    print(closest_match, MMAS_learn, BRKGArev, p)
    # add row to dataframe
    dfres.loc[len(dfres)] = [closest_match, p]

display(dfres)

loc-gowalla_edges [5155, 5177.6] [4774, 4782, 4772, 4762, 4779, 4787, 4792, 4787, 4774, 4780] 0.020275044761546
com-dblp [32364, 32397.7] [29240, 29271, 29218, 29212, 29225, 29268, 29221] 0.027777777777777776
socfb-Mich67 [177, 179.3] [172, 173, 166, 173, 165, 168, 169, 171, 163, 160] 0.020451850368734037
Karate [3, 3.0] [3, 3, 3, 3, 3, 3, 3, 3, 3, 3] 1.0
Dolphins [6, 6.0] [6, 6, 6, 6, 6, 6, 6, 6, 6, 6] 1.0
Amazon0312 [26186, 26201.2] [23542, 23568, 23571, 23546, 23570, 23564, 23543, 23566, 23557, 23567] 0.015151515151515152
Football [22, 23.0] [23, 23, 23, 23, 24, 26, 23, 24, 24, 24, 24, 23, 24, 23, 23, 24, 22, 24, 23, 24] 0.9647331331111013
deezer_HR [2240, 2255.9] [1888, 1877, 1901, 1890, 1896, 1878, 1926, 1873, 1876, 1917] 0.015151515151515152
soc-gplus [61, 61.9] [61, 62, 61, 61, 61, 61, 61, 61, 61, 61] 0.16087516911944588
ego-facebook [478, 481.9] [467, 472, 466, 467, 468, 467, 460, 468, 463, 469] 0.019746877552104688
Amazon0505 [26801, 26871.2] [24136, 24148, 24120, 24139, 24130

Unnamed: 0,graphname,p-value
0,loc-gowalla_edges,0.020275
1,com-dblp,0.027778
2,socfb-Mich67,0.020452
3,Karate,1.0
4,Dolphins,1.0
5,Amazon0312,0.015152
6,Football,0.964733
7,deezer_HR,0.015152
8,soc-gplus,0.160875
9,ego-facebook,0.019747


In [7]:
def get_closest_match(query, choices):
    result, score = process.extractOne(query, choices)
    return result
# go through all files in the directory benchmark
directory = "benchmark/"
files = os.listdir(directory)

def runComparison(algo1, algo2, directory, files):
    # create dataframe with column graphname, p-value of BRKGA+rev and MMASlearn
    dfres = pd.DataFrame(columns=['graphname', 'p-value'])
    for f in files:
        filepath1 = directory + f + "/" + algo1 + ".txt"
        filepath2 = directory + f + "/" + algo2 + ".txt"
        # if directory + f + "???" is not a file, skip
        if not os.path.isfile(filepath1):       continue
        if not os.path.isfile(filepath2):       continue
        res1, res2 = [], []
        if "reverse" in algo1:      res1 = extractReverse(filepath1)
        else:                       res1 = extractBRKGA(filepath1)
        if "reverse" in algo2:      res2 = extractReverse(filepath2)
        else:                       res2 = extractBRKGA(filepath2)
        U1, p = mannwhitneyu(res1, res2, alternative="less")
        print(f, res1, res2, p, U1)
        # add row to dataframe
        dfres.loc[len(dfres)] = [f, p]
    display(dfres)

runComparison("fastBRKGAreverse", "BRKGA", directory, files)

loc-gowalla [4774, 4782, 4772, 4762, 4779, 4787, 4792, 4787, 4774, 4780] [5483, 5452, 5465, 5450, 5461, 5450, 5475, 5481, 5465, 5470] 8.930724418684081e-05 0.0
com-dblp [29240, 29271, 29218, 29212, 29225, 29268, 29221] [36917, 36892, 36929, 36916, 36926, 36911, 36875, 36944, 36897, 36963] 5.141916906622789e-05 0.0
Mich67 [172, 173, 166, 173, 165, 168, 169, 171, 163, 160] [167, 173, 171, 170, 166, 170, 166, 171, 163, 168] 0.48483929431005784 49.0
karate [3, 3, 3, 3, 3, 3, 3, 3, 3, 3] [3, 3, 3, 3, 3, 3, 3, 3, 3, 3] 1.0 50.0
dolphin [6, 6, 6, 6, 6, 6, 6, 6, 6, 6] [6, 6, 6, 6, 6, 6, 6, 6, 6, 6] 1.0 50.0
amazon0312 [23542, 23568, 23571, 23546, 23570, 23564, 23543, 23566, 23557, 23567] [31062, 31082, 31051, 31078, 31060, 31082, 31080, 31079, 31096, 31082] 8.930724418684081e-05 0.0
football [23, 23, 23, 23, 24, 26, 23, 24, 24, 24, 24, 23, 24, 23, 23, 24, 22, 24, 23, 24] [25, 24, 24, 23, 23, 23, 24, 23, 24, 24, 24, 23, 22, 24, 23, 24, 23, 24, 22, 22] 0.6269602851204208 210.5
deezer [1888, 1877

Unnamed: 0,graphname,p-value
0,loc-gowalla,8.9e-05
1,com-dblp,5.1e-05
2,Mich67,0.484839
3,karate,1.0
4,dolphin,1.0
5,amazon0312,8.9e-05
6,football,0.62696
7,deezer,9.1e-05
8,gplus,2.8e-05
9,fb,0.023492


In [5]:
def get_closest_match(query, choices):
    result, score = process.extractOne(query, choices)
    return result
# go through all files in the directory benchmark
directory = "benchmark/"
files = os.listdir(directory)

# create dataframe with column graphname, p-value of BRKGA+rev and MMASlearn
dfres = pd.DataFrame(columns=['graphname', 'p-value'])

for f in files:
    # if directory + f + "/BRKGAreverse.txt" is not a file, skip
    if not os.path.isfile(directory + f + "/fastBRKGAreverse.txt"):
        continue
    # if directory + f + "/BRKGA.txt" is not a file, skip
    if not os.path.isfile(directory + f + "/BRKGAreverse.txt"):
        continue
    BRKGA = extractReverse(directory + f + "/BRKGAreverse.txt")
    fastBRKGA = extractReverse(directory + f + "/fastBRKGAreverse.txt")
    U1, p = mannwhitneyu(BRKGA, fastBRKGA)
    print(f, BRKGA, fastBRKGA, p)
    # add row to dataframe
    dfres.loc[len(dfres)] = [f, p]

display(dfres)

loc-gowalla [4784, 4773, 4777, 4766, 4760, 4779, 4777, 4776, 4798, 4782] [4774, 4782, 4772, 4762, 4779, 4787, 4792, 4787, 4774, 4780] 0.6225213993195784
com-dblp [29221, 29212, 29231, 29216, 29170, 29176, 29239, 29210, 29261, 29222] [29240, 29271, 29218, 29212, 29225, 29268, 29221] 0.1565436217809416
Mich67 [167, 173, 171, 170, 166, 170, 165, 171, 163, 168] [172, 173, 166, 173, 165, 168, 169, 171, 163, 160] 1.0
karate [3, 3, 3, 3, 3, 3, 3, 3, 3, 3] [3, 3, 3, 3, 3, 3, 3, 3, 3, 3] 1.0
dolphin [6, 6, 6, 6, 6, 6, 6, 6, 6, 6] [6, 6, 6, 6, 6, 6, 6, 6, 6, 6] 1.0
amazon0312 [23569, 23580, 23532, 23527, 23529, 23559, 23559, 23576, 23542, 23523] [23542, 23568, 23571, 23546, 23570, 23564, 23543, 23566, 23557, 23567] 0.3253873197470524
football [25, 24, 24, 23, 23, 23, 24, 23, 24, 24, 24, 23, 22, 24, 23, 24, 23, 24, 22, 22] [23, 23, 23, 23, 24, 26, 23, 24, 24, 24, 24, 23, 24, 23, 23, 24, 22, 24, 23, 24] 0.7684708523006007
deezer [1917, 1883, 1889, 1900, 1918, 1874, 1933, 1923, 1891, 1917] [1888, 1

Unnamed: 0,graphname,p-value
0,loc-gowalla,0.622521
1,com-dblp,0.156544
2,Mich67,1.0
3,karate,1.0
4,dolphin,1.0
5,amazon0312,0.325387
6,football,0.768471
7,deezer,0.185214
8,gplus,0.36812
9,fb,0.110364
