In [8]:
import igraph as ig
from igraph import Graph

import pickle
import random
import pandas as pd
import numpy as np

from sklearn.metrics import normalized_mutual_info_score
import community as community_louvain
import networkx as nx

from graph_modification import remove_edges, remove_vertices 

#https://igraph.org/python/doc/api/igraph.Graph.html

In [None]:
# Embedding imports
from gem.embedding.gf       import GraphFactorization
from gem.embedding.hope     import HOPE
from gem.embedding.lap      import LaplacianEigenmaps
from gem.embedding.lle      import LocallyLinearEmbedding
from gem.embedding.node2vec import node2vec
from gem.embedding.sdne     import SDNE

from time import time

In [9]:
# Loading serialized datasets
filename = 'datasets.data'

infile = open(filename,'rb')
datasets = pickle.load(infile)
infile.close()

In [33]:
# Baseline for every single one
base_rows = []

for dataset in datasets:
    p_size = float(dataset['vertices']['community'].nunique())

    graph = dataset['graph']

    # louvian
    method = 'Louvain-igraph'

    louvian = ig.Graph.community_multilevel(graph, return_levels=True)
    louvian = louvian[len(louvian)-1]
    p_louvian = len(set(louvian.membership))

    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P',
                 'value':p_louvian})    
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P*/P',
                 'value':p_size/p_louvian})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'Modularity',
                 'value':graph.modularity(louvian.membership)})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'NMI',
                 'value':normalized_mutual_info_score(dataset['vertices']['community'], louvian.membership)})

    # FastGreedy
    method = 'Fastgreedy'

    fg = ig.Graph.community_fastgreedy(graph)
    p_fg = fg.optimal_count
    fg = fg.as_clustering()
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P',
                 'value':p_fg})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P*/P',
                 'value':p_size/p_fg})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'Modularity',
                 'value':graph.modularity(fg.membership)})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'NMI',
                 'value':normalized_mutual_info_score(dataset['vertices']['community'], fg.membership)})
    # Infomap
    method = 'Infomap'

    infomap = ig.Graph.community_infomap(graph)
    p_im = len(set(infomap.membership))
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P',
                 'value':p_im})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P*/P',
                 'value':p_size/p_im})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'Modularity',
                 'value':graph.modularity(infomap.membership)})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'NMI',
                 'value':normalized_mutual_info_score(dataset['vertices']['community'], infomap.membership)})

    # Label Propagation
    method = 'Label Propagation'

    lp = ig.Graph.community_label_propagation(graph)
    p_lp = len(set(lp.membership))
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P',
                 'value':p_lp})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P*/P',
                 'value':p_size/p_lp})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'Modularity',
                 'value':graph.modularity(lp.membership)})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'NMI',
                 'value':normalized_mutual_info_score(dataset['vertices']['community'], lp.membership)})

    # Louvain
    method = 'Louvain'

    # Creating nx Graph to for other Louvain implementation
    nxG = nx.Graph()
    nxG.add_nodes_from([vertex.index for vertex in graph.vs])
    nxG.add_edges_from([edge.tuple for edge in graph.es])

    lv_partition = community_louvain.best_partition(nxG)
    p_lv = len(set(lv_partition.values()))
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P',
                 'value':p_lv})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'P*/P',
                 'value':p_size/p_lv})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'Modularity',
                 'value':graph.modularity(lv_partition.values())})
    base_rows.append({'dataset':dataset['name'], 'method':method, 'metric':'NMI',
                 'value':normalized_mutual_info_score(dataset['vertices']['community'], pd.Series(lv_partition.values()))})


In [None]:
# Values that will vary during the cycles 

pcts = [x*0.1 for x in range(1,10)]
strategies = ['random','betweenness']

rows = []

# Repeat the results for each of the experiments that are going to be run
for base_row in base_rows:
    for strat in strategies:
        if strat == 'random':
            orders = ['asc']
        else:
            orders = ['asc','desc']

        for order in orders:
            base_row['modification'] = strat+' '+order
            base_row['percentage'] = 0.0

            rows.append(base_row.copy())

In [36]:
for dataset in datasets:
    for strat in strategies:
        for pct in pcts:
            if strat == 'random':
                orders = ['asc']
            else:
                orders = ['asc','desc']

            for order in orders:

                p_size = float(dataset['vertices']['community'].nunique())

                # modify the graph
                if pct == 0:
                    graph = dataset['graph']
                else:
                    graph = remove_edges(dataset['graph'], pct=pct, deep_copy=True, strategy=strat, order=order)

                # louvian
                method = 'Louvain-igraph'

                louvian = ig.Graph.community_multilevel(graph, return_levels=True)
                louvian = louvian[len(louvian)-1]
                p_louvian = len(set(louvian.membership))

                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P',
                             'value':p_louvian})    
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P*/P',
                             'value':p_size/p_louvian})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'Modularity',
                             'value':graph.modularity(louvian.membership)})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'NMI',
                             'value':normalized_mutual_info_score(dataset['vertices']['community'], louvian.membership)})

                # FastGreedy
                method = 'Fastgreedy'

                fg = ig.Graph.community_fastgreedy(graph)
                p_fg = fg.optimal_count
                fg = fg.as_clustering()
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P',
                             'value':p_fg})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P*/P',
                             'value':p_size/p_fg})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'Modularity',
                             'value':graph.modularity(fg.membership)})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'NMI',
                             'value':normalized_mutual_info_score(dataset['vertices']['community'], fg.membership)})
                # Infomap
                method = 'Infomap'

                infomap = ig.Graph.community_infomap(graph)
                p_im = len(set(infomap.membership))
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P',
                             'value':p_im})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P*/P',
                             'value':p_size/p_im})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'Modularity',
                             'value':graph.modularity(infomap.membership)})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'NMI',
                             'value':normalized_mutual_info_score(dataset['vertices']['community'], infomap.membership)})

                # Label Propagation
                method = 'Label Propagation'

                lp = ig.Graph.community_label_propagation(graph)
                p_lp = len(set(lp.membership))
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P',
                             'value':p_lp})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P*/P',
                             'value':p_size/p_lp})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'Modularity',
                             'value':graph.modularity(lp.membership)})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'NMI',
                             'value':normalized_mutual_info_score(dataset['vertices']['community'], lp.membership)})

                # Louvain
                method = 'Louvain'

                # Creating nx Graph to for other Louvain implementation
                nxG = nx.Graph()
                nxG.add_nodes_from([vertex.index for vertex in graph.vs])
                nxG.add_edges_from([edge.tuple for edge in graph.es])

                lv_partition = community_louvain.best_partition(nxG)
                p_lv = len(set(lv_partition.values()))
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P',
                             'value':p_lv})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'P*/P',
                             'value':p_size/p_lv})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'Modularity',
                             'value':graph.modularity(lv_partition.values())})
                rows.append({'dataset':dataset['name'], 'modification':strat+' '+order, 'percentage':pct, 'method':method, 'metric':'NMI',
                             'value':normalized_mutual_info_score(dataset['vertices']['community'], pd.Series(lv_partition.values()))})

experiment_results = pd.DataFrame(rows)

In [None]:
#https://github.com/palash1992/GEM

models = []
# You can comment out the methods you don't want to run
# GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta), 
# regularization coefficient (regu) as inputs
#models.append(GraphFactorization(d=2, max_iter=100000, eta=1*10**-4, regu=1.0, data_set='karate'))
# HOPE takes embedding dimension (d) and decay factor (beta) as inputs
models.append(HOPE(d=4, beta=0.01))
# LE takes embedding dimension (d) as input
models.append(LaplacianEigenmaps(d=2))
# LLE takes embedding dimension (d) as input
models.append(LocallyLinearEmbedding(d=2))
# node2vec takes embedding dimension (d),  maximum iterations (max_iter), random walk length (walk_len), 
# number of random walks (num_walks), context size (con_size), return weight (ret_p), inout weight (inout_p) as inputs
models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1))
# SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight (alpha),
# lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers (K), 
# size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch),
# location of modelfile and weightfile save (modelfile and weightfile) as inputs
models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[50, 15,], n_iter=50, xeta=0.01, n_batch=500,
                modelfile=['enc_model.json', 'dec_model.json'],
                weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))

for embedding in models:
    print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True)
    print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
    # Evaluate on graph reconstruction
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(G, embedding, Y, None)
    #---------------------------------------------------------------------------------
    print(("\tMAP: {} \t precision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5]))
    #---------------------------------------------------------------------------------
    # Visualize
    viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None)
    plt.show()

In [None]:
#https://github.com/palash1992/GEM

models = []
# You can comment out the methods you don't want to run
# GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta), 
# regularization coefficient (regu) as inputs
#models.append(GraphFactorization(d=2, max_iter=100000, eta=1*10**-4, regu=1.0, data_set='karate'))
# HOPE takes embedding dimension (d) and decay factor (beta) as inputs
models.append(HOPE(d=4, beta=0.01))
# LE takes embedding dimension (d) as input
models.append(LaplacianEigenmaps(d=2))
# LLE takes embedding dimension (d) as input
models.append(LocallyLinearEmbedding(d=2))
# node2vec takes embedding dimension (d),  maximum iterations (max_iter), random walk length (walk_len), 
# number of random walks (num_walks), context size (con_size), return weight (ret_p), inout weight (inout_p) as inputs
models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1))
# SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight (alpha),
# lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers (K), 
# size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch),
# location of modelfile and weightfile save (modelfile and weightfile) as inputs
models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[50, 15,], n_iter=50, xeta=0.01, n_batch=500,
                modelfile=['enc_model.json', 'dec_model.json'],
                weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))

for embedding in models:
    print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True)
    print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
    # Evaluate on graph reconstruction
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(G, embedding, Y, None)
    #---------------------------------------------------------------------------------
    print(("\tMAP: {} \t precision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5]))
    #---------------------------------------------------------------------------------
    # Visualize
    viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None)
    plt.show()

In [38]:
for dataset in datasets:
    for strat in strategies:
        for pct in pcts:
            if strat == 'random':
                orders = ['asc']
            else:
                orders = ['asc','desc']

            for order in orders:

                p_size = float(dataset['vertices']['community'].nunique())

                # modify the graph
                if pct == 0:
                    graph = dataset['graph']
                else:
                    #graph = remove_edges(dataset['graph'], pct=pct, deep_copy=True, strategy=strat, order=order)
                    graph = dataset['graph']

                # Creating nx Graph to for other Louvain implementation
                nxG = nx.Graph()
                nxG.add_nodes_from([vertex.index for vertex in graph.vs])
                nxG.add_edges_from([edge.tuple for edge in graph.es])
                
                print('Hola')
                break
            break
        break
    break

Hola


In [35]:
experiment_results.head()

Unnamed: 0,dataset,method,metric,value,modification,percentage
0,Dancer 01,Louvain-igraph,P,9.0,betweenness desc,0.0
1,Dancer 01,Louvain-igraph,P,9.0,betweenness desc,0.0
2,Dancer 01,Louvain-igraph,P,9.0,betweenness desc,0.0
3,Dancer 01,Louvain-igraph,P*/P,0.666667,betweenness desc,0.0
4,Dancer 01,Louvain-igraph,P*/P,0.666667,betweenness desc,0.0


In [7]:
experiment_results.to_csv('experiment_results.csv', index=False)