# Comparative analysis description

# Input data description

In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import inspect
from IPython.display import clear_output
os.chdir("..")
import relegy.embeddings as rle
import relegy.metrics as rlm
import relegy.graphs as rlr
os.chdir("Analysis")

In [2]:
def get_embedding_methods_iterable():
    return filter(lambda x: x[0][:2] != "__", inspect.getmembers(sys.modules['relegy.embeddings']))

In [45]:
node_labels = np.genfromtxt("C:\\Users\\Lukasz\\Desktop\\temp\\BA-1_10_60-L5.node_labels")
edges = np.genfromtxt("C:\\Users\\Lukasz\\Desktop\\temp\\BA-1_10_60-L5.edges")
nodes = node_labels[:, 0].astype(int)
labels1 = node_labels[:, 1].astype(int)

In [46]:
G1 = nx.Graph()
G1.add_nodes_from(nodes)
G1.add_edges_from(edges)
G1 = nx.convert_node_labels_to_integers(G, first_label=0)

In [47]:
G2, labels2_ = rlr.get_karate_graph()
labels2_ = np.array(labels2_)[:, 1]
labels2 = (labels2_ == 'Mr. Hi').astype(int)

In [50]:
def get_results_from_all_viable_methods(d, graphs, graph_names=None):
    results = [None] * len(graphs)
    for i, G in enumerate(graphs):
        results_dictionary = {}
        for name, class_handle in get_embedding_methods_iterable():
            info = "Currently processing " + str(name) + ", d: " + str(d)
            if graph_names is not None:
                info += ", graph: " + graph_names[i]
            if not (name == "GCN" or name == "GNN" or name == "Struc2Vec"):
                if name in ["GraphWave", "HOPE", "LINE"]:
                    current_d = d // 2
                else:
                    current_d = d
                print(info)
                if not (name == "HARP"):
                    results_dictionary[name] = class_handle.fast_embed(G, d=current_d)
                    clear_output()
                else:
                    temp_name = "HARP_Deepwalk"
                    results_dictionary[temp_name] = class_handle.fast_embed(G, d=current_d)
                    temp_name = "HARP_Node2Vec"
                    results_dictionary[temp_name] = class_handle.fast_embed(G, d=current_d, method = "Node2Vec")
                    clear_output()
        results[i] = results_dictionary
    return results

In [51]:
results_d2 = get_results_from_all_viable_methods(d=2, graphs=[G1, G2], graph_names=["G1", "G2"])

In [52]:
results_d2

[{'DNGR': array([[0.52801013, 0.50776   ],
         [0.5522349 , 0.4667893 ],
         [0.53306603, 0.51268363],
         ...,
         [0.11852121, 0.08446684],
         [0.12768799, 0.12157556],
         [0.07877666, 0.13896063]], dtype=float32),
  'DeepWalk': array([[ 0.55350703,  0.09130004],
         [ 0.48132488,  0.19263336],
         [ 0.5158226 ,  0.14041042],
         ...,
         [-0.15041147,  0.42749006],
         [-0.094973  ,  0.44671202],
         [-0.1489519 ,  0.32385814]], dtype=float32),
  'GraRep': array([[ 3.0863500e-06, -1.7061816e-06],
         [ 0.0000000e+00,  1.8280517e-06],
         [ 0.0000000e+00, -2.4374023e-07],
         ...,
         [-9.3592966e-01,  3.2202265e-01],
         [-9.6523881e-01,  2.4182777e-01],
         [-9.4719601e-01, -3.7471110e-01]], dtype=float32),
  'GraphFactorization': array([[-0.80162185,  0.60082185],
         [-0.7559792 ,  0.65717846],
         [-0.7738935 ,  0.6351023 ],
         ...,
         [-0.82270974,  0.5692047 ],
   

In [60]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
names = results_d2[0].keys()
labels = labels1
for i, name in enumerate(names):
    Z = results_d2[0][name]
    X_train, X_test, y_train, y_test = train_test_split(Z, labels, random_state=0)
    logreg = LogisticRegression()
    logreg.fit(X_train, y_train)
    print(name)
    print('Accuracy of Logistic regression classifier on training set: {:.2f}'
     .format(logreg.score(X_train, y_train)))
    print('Accuracy of Logistic regression classifier on test set: {:.2f}'
         .format(logreg.score(X_test, y_test)))

DNGR
Accuracy of Logistic regression classifier on training set: 0.21
Accuracy of Logistic regression classifier on test set: 0.19
DeepWalk
Accuracy of Logistic regression classifier on training set: 0.23
Accuracy of Logistic regression classifier on test set: 0.20
GraRep
Accuracy of Logistic regression classifier on training set: 0.21
Accuracy of Logistic regression classifier on test set: 0.19
GraphFactorization
Accuracy of Logistic regression classifier on training set: 0.21
Accuracy of Logistic regression classifier on test set: 0.18
GraphWave
Accuracy of Logistic regression classifier on training set: 0.20
Accuracy of Logistic regression classifier on test set: 0.18
HARP_Deepwalk
Accuracy of Logistic regression classifier on training set: 0.26
Accuracy of Logistic regression classifier on test set: 0.20
HARP_Node2Vec
Accuracy of Logistic regression classifier on training set: 0.23
Accuracy of Logistic regression classifier on test set: 0.18
HOPE
Accuracy of Logistic regression cla