# Reparation techniques
This notebook allows reproducing the results of section 4.2 "Reparation techniques selection"

In [10]:
# set working dir
import os
os.chdir('/home/angel/host_data/')

import pymongo

# MongoDB
from sources.mongo_connection.mongo_connector import MongoDBConnection

# Loader
from sources.gloaders.dancer_loader import DancerLoader

MongoDBConnection.initialize_connection('db', 27017)

# load bechmarks names
# fixed nodes
names = [
    "nodos_fijo_pierde_arista_mas_claro",
    "nodos_fijo_pierde_arista_mas_difuso",
    "nodos_fijo_pierde_arista_equilibrio",
    "nodos_fijo_gana_arista_mas_claro",
    "nodos_fijo_gana_arista_mas_difuso",
    "nodos_fijo_gana_arista_equilibrio",
    "nodos_fijo_mixto_mas_claro",
    "nodos_fijo_mixto_mas_difuso",
    "nodos_fijo_mixto_equilibrio",
    "nodos_fijos_split",
    "nodos_fijos_merge",
    "nodos_fijos_split_merge",
    "nodos_fijos_split_merge__merge_mas",
    "nodos_fijos_split_merge_split_mas"
]

nikcs = [
    "nf_pa_mc",
    "nf_pa_md",
    "nf_pa_eq",
    "nf_ga_mc",
    "nf_ga_md",
    "nf_ga_eq",
    "nf_mx_mc",
    "nf_mx_md",
    "nf_mx_eq",
    "nf_sp",
    "nf_mg",
    "nf_sm_eq",
    "nf_sm_mm",
    "nf_sm_sm",
]

subfixes = ["_01", "_02", "_03"]

dataset_nf = []
labels_nf = []
dataset_names_nf = []

for i, name in enumerate(names):
    for subfix in subfixes:
        try:
            label = name + subfix
    
            dataset_names_nf.append(label)    
            labels_nf.append(nikcs[i] + subfix)
        except Exception as e:
            print("{0} not available".format(label))

# variable nodes
names = [
    "pierde_nodos",
    "gana_nodos",
    "mixto",
    "split",
    "merge",
    "split_merge_equilibrio",
    "split_merge_merge_mas",
    "split_merge_split_mas",
    "todo_mixto"
]

nikcs = [
    "pd",
    "gn",
    "mx",
    "sp",
    "mg",
    "sm_eq",
    "sm_mm",
    "sm_sm",
    "todo"
]

subfixes = ["_01", "_02", "_03"]

datasets = []
labels = []
dataset_names = []

for i, name in enumerate(names):
    for subfix in subfixes:
        try:
            label = name + subfix
    
            dataset_names.append(label)    
            labels.append(nikcs[i] + subfix)
        except Exception as e:
            print("{0} not available".format(label))

# Load benchmarks

## Fixed number of nodes

In [3]:
for dataset_name in dataset_names_nf:
    dancer1 = DancerLoader(dataset_directory="data/synthetic_benchmarks/{0}".format(dataset_name))
    try:
        dancer1.store_or_update_db(dataset_name)
    except Exception as e:
        print(e)

listo!


## Variable number of nodes

In [5]:
for dataset_name in dataset_names:
    dancer1 = DancerLoader(dataset_directory="data/synthetic_benchmarks/{0}".format(dataset_name))
    try:
        dancer1.store_or_update_db(dataset_name)
    except Exception as e:
        print(e)

listo!


## Travian

In [15]:
from sources.gloaders.trevian_loader import TrevianLoader

# market
dataset_name = "travian_market"
dataset = TrevianLoader(graphml_directory="data/travian/trevian_market/graphml",
                        communities_directory="data/travian/trevian_market/communities")
dataset.store_or_update_db(dataset_name)
dataset.summary()

# messages
dataset_name = "travian_messages"
dataset = TrevianLoader(graphml_directory="data/travian/trevian_messages/graphml",
                        communities_directory="data/travian/trevian_messages/communities")
dataset.store_or_update_db(dataset_name)
dataset.summary()

{'graphml_directory': 'data/travian/trevian_market/graphml', 'communities_directory': 'data/travian/trevian_market/communities', 'snapshot_count': 30, 'n_nodes': [1091, 1065, 1028, 1013, 1067, 1079, 1090, 1026, 1039, 1042, 974, 965, 1006, 1040, 1002, 981, 1007, 958, 909, 870, 853, 877, 876, 821, 847, 805, 846, 850, 817, 796], 'n_edges': [2478, 2305, 2258, 2263, 2460, 2551, 2598, 2351, 2408, 2440, 2430, 2401, 2491, 2506, 2475, 2542, 2464, 2364, 2272, 2084, 2091, 2191, 2024, 1949, 1947, 2055, 2178, 2115, 2054, 1985], 'ground_truth': True, 'n_communites': [107, 96, 92, 93, 93, 92, 94, 85, 85, 81, 78, 79, 81, 81, 88, 78, 83, 79, 72, 70, 74, 74, 70, 75, 81, 83, 79, 81, 73, 74]}
{'graphml_directory': 'data/travian/trevian_messages/graphml', 'communities_directory': 'data/travian/trevian_messages/communities', 'snapshot_count': 30, 'n_nodes': [1789, 1931, 1711, 1717, 1657, 1730, 1836, 1750, 1771, 1560, 1641, 1681, 1647, 1564, 1604, 1649, 1475, 1617, 1568, 1401, 1461, 1495, 1527, 1492, 1439, 1

# Configure experiments

In [4]:
from sources.gas.dynamic_ga_configuration import DynamicGaConfiguration
from sources.gas.dynamic_ga_immigrants_config import DynamicGaImmigrantsConfiguration as DCDImmigrantsGAConfig
from sources.gas.dcd_gas_immigrants_combine_reparators_config import DCDGasImmigrantsCombineReparatorsConfig
from sources.gas.nsga2_config import NSGAIIConfig

from sources.reparators.greedy_reparator import GreedyReparator
from sources.reparators.walk_reparator import RandomWalkReparator

# Standard
d_config = DynamicGaConfiguration(NSGAIIConfig())
d_config.store_or_update_db("standard")

# Label Propagation
ga_configs = NSGAIIConfig()
reparator = GreedyReparator("greedy")

rates = 0.0
d_config = DCDImmigrantsGAConfig(ga_configs, rates, reparator)
d_config.store_or_update_db("label_propagation")

# Random walks
ga_configs = NSGAIIConfig()
reparator = RandomWalkReparator("rw", 5, 6)

rates = 0.0
d_config = DCDImmigrantsGAConfig(ga_configs, rates, reparator)
d_config.store_or_update_db("random_walks")

# Hybrid
ga_configs = NSGAIIConfig()
r1 = RandomWalkReparator("rw", 5, 6)
r2 = GreedyReparator("greedy")

rates = 0.0
d_config = DCDGasImmigrantsCombineReparatorsConfig(ga_configs, rates, r1, r2)
d_config.store_or_update_db("greedy_random_walks_combine")

# Run experiments

## Standard method

In [None]:
# Experiments
import sources.experiment.experiment_execution as executor

for dataset_name in dataset_names_nf:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "standard", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))
    
for dataset_name in dataset_names:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "standard", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))

## Label propagation method

In [None]:
# Experiments
import sources.experiment.experiment_execution as executor

for dataset_name in dataset_names_nf:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "label_propagation", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))
    
for dataset_name in dataset_names:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "label_propagation", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))

## Random walks method

In [None]:
# Experiments
import sources.experiment.experiment_execution as executor

# synthetic
for dataset_name in dataset_names_nf:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "random_walks", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))
    
for dataset_name in dataset_names:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "random_walks", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))
        
# real
executor.add_iterations_if_needed("travian_market", "random_walks", 20)
executor.add_iterations_if_needed("travian_messages", "random_walks", 20)

## Hybrid method

In [None]:
# Experiments
import sources.experiment.experiment_execution as executor

for dataset_name in dataset_names_nf:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "greedy_random_walks_combine", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))
    
for dataset_name in dataset_names:
    try:
        print(dataset_name)
        executor.add_iterations_if_needed(dataset_name, "greedy_random_walks_combine", 20)
    except Exception as e:
        print("Error in dataset {0}: {1}".format(dataset_name, e))

# Generate tables

## Table 3

In [None]:
from sources.experiment.experiment_loader import ExperimentLoader
from sources.experiment.experiment_tables import save_max_sum_hv_table_csv, save_max_sum_hv_kruskall_hypothesis_table_csv

settings_list_nf = ["label_propagation", "random_walks", "greedy_random_walks_combine"]
settings_nick_nf = ["label propagation", "Random Walks", "Hybrid"]

exp_matrix_nf = []
for settings in settings_list_nf:
    exp_list = []
    
    for dataset in dataset_names_nf:
        exp_list.append(ExperimentLoader(dataset, settings))
    exp_matrix_nf.append(exp_list)
    
save_max_sum_hv_table_csv("tables/table_3_mean_std.csv", exp_matrix_nf, settings_nick_nf, labels_nf)
save_max_sum_hv_kruskall_hypothesis_table_csv("tables/table_3_kruskall.csv", exp_matrix_nf, settings_nick_nf, labels_nf)

## Table 4

In [None]:
from sources.experiment.experiment_loader import ExperimentLoader
from sources.experiment.experiment_tables import save_max_sum_hv_table_csv, save_max_sum_hv_kruskall_hypothesis_table_csv

settings_list = ["label_propagation", "random_walks", "greedy_random_walks_combine"]
settings_nick = ["label propagation", "Random Walks", "Hybrid"]

exp_matrix = []
for settings in settings_list:
    exp_list = []
    
    for dataset in dataset_names:
        exp_list.append(ExperimentLoader(dataset, settings))
    exp_matrix.append(exp_list)
    
save_max_sum_hv_table_csv("tables/table_4_mean_std.csv", exp_matrix, settings_nick, labels)
save_max_sum_hv_kruskall_hypothesis_table_csv("tables/table_4_kruskall.csv", exp_matrix, settings_nick, labels)

## Tables 5/6

In [None]:
from sources.experiment.experiment_loader import ExperimentLoader
from sources.experiment.experiment_tables import save_avg_sqr_error_table_csv, save_avg_sqr_error_kruskall_hypothesis_table_csv

lbs = ["nf_mx_eq_01", "nf_mx_eq_02", "nf_mx_eq_03",
       "nf_sm_eq_01", "nf_sm_eq_02", "nf_sm_eq_03",
       "mx_01", "mx_02", "mx_03",
       "sm_eq_01", "sm_eq_02", "sm_eq_03"
        ]
d_names = [
    dataset_names_nf[labels_nf.index(lbs[0])], dataset_names_nf[labels_nf.index(lbs[1])], dataset_names_nf[labels_nf.index(lbs[2])],
    dataset_names_nf[labels_nf.index(lbs[3])], dataset_names_nf[labels_nf.index(lbs[4])], dataset_names_nf[labels_nf.index(lbs[5])],
    dataset_names[labels.index(lbs[6])], dataset_names[labels.index(lbs[7])], dataset_names[labels.index(lbs[8])],
    dataset_names[labels.index(lbs[9])], dataset_names[labels.index(lbs[10])], dataset_names[labels.index(lbs[11])]
]

settings_list = ["standard", "random_walks"]
settings_nick = ["Standard", "Immigrant's"]

exp_matrix = []
for settings in settings_list:
    exp_list = []
    
    for dataset in d_names:
        exp_list.append(ExperimentLoader(dataset, settings))
    exp_matrix.append(exp_list)

save_avg_sqr_error_table_csv("tables/table_5-6_mean_std.csv", exp_matrix, settings_nick, lbs)
save_avg_sqr_error_kruskall_hypothesis_table_csv("tables/table_5_kruskall.csv", exp_matrix, settings_nick, lbs)

## Table 7

In [None]:
from sources.experiment.experiment_loader import ExperimentLoader
from sources.experiment.experiment_tables import save_mni_table_csv

save_mni_table_csv("tables/table_7_mean_std.csv", [ExperimentLoader("travian_messages", "random_walks")], ["Immigrant's"])

## Table 8

In [None]:
from sources.experiment.experiment_loader import ExperimentLoader
from sources.experiment.experiment_tables import save_mni_table_csv

save_mni_table_csv("tables/table_8_mean_std.csv", [ExperimentLoader("travian_market", "random_walks")], ["Immigrant's"])

# Generate figures

## Figure 9

In [None]:
from sources.experiment.experiment_loader import ExperimentLoader
from sources.experiment.experiment_plotting import compare_times_taken, generations_total_comparison

lbs = ["nf_mx_eq_01", "nf_mx_eq_02", "nf_mx_eq_03",
       "nf_sm_eq_01", "nf_sm_eq_02", "nf_sm_eq_03",
       "mx_01", "mx_02", "mx_03",
       "sm_eq_01", "sm_eq_02", "sm_eq_03"
        ]
d_names = [
    dataset_names_nf[labels_nf.index(lbs[0])], dataset_names_nf[labels_nf.index(lbs[1])], dataset_names_nf[labels_nf.index(lbs[2])],
    dataset_names_nf[labels_nf.index(lbs[3])], dataset_names_nf[labels_nf.index(lbs[4])], dataset_names_nf[labels_nf.index(lbs[5])],
    dataset_names[labels.index(lbs[6])], dataset_names[labels.index(lbs[7])], dataset_names[labels.index(lbs[8])],
    dataset_names[labels.index(lbs[9])], dataset_names[labels.index(lbs[10])], dataset_names[labels.index(lbs[11])]
]

settings_list = ["standard", "random_walks"]
settings_nick = ["Standard", "Immigrant's"]

exp_matrix = []
for settings in settings_list:
    exp_list = []
    
    for dataset in d_names:
        exp_list.append(ExperimentLoader(dataset, settings))
    exp_matrix.append(exp_list)
    
compare_times_taken("figures/figure_9_b.png", exp_matrix, settings_nick, lbs,
                   yticks=[0, 5*60, 10*60, 15*60, 20*60, 25*60, 30*60],
                   ylabels=["0 minutes", "5 minutes", "10minutes", "15minutes", "20minutes", "25minutes", "30minutes"])
generations_total_comparison("figures/figure_9_a.png", exp_matrix, settings_nick, lbs)