# Comparative analysis description

# Input data description

In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import inspect
from IPython.display import clear_output
os.chdir("..")
import relegy.embeddings as rle
import relegy.metrics as rlm
import relegy.graphs as rlr
os.chdir("Analysis")

In [2]:
def get_embedding_methods_iterable():
    return filter(lambda x: x[0][:2] != "__", inspect.getmembers(sys.modules['relegy.embeddings']))

In [3]:
node_labels = np.genfromtxt("C:\\Users\\Lukasz\\Desktop\\temp\\internet-industry-partnerships.node_labels")
edges = np.genfromtxt("C:\\Users\\Lukasz\\Desktop\\temp\\internet-industry-partnerships.edges", delimiter=",").astype(int)
labels1 = node_labels.astype(int)

In [4]:
edges

array([[  3, 216],
       [203, 215],
       [153, 215],
       ...,
       [  1, 219],
       [ 30, 219],
       [ 69, 219]])

In [5]:
G1 = nx.Graph()
G1.add_edges_from(edges)
G1 = nx.convert_node_labels_to_integers(G1, first_label=0)

In [6]:
G2, labels2_ = rlr.get_karate_graph()
labels2_ = np.array(labels2_)[:, 1]
labels2 = (labels2_ == 'Mr. Hi').astype(int)

In [7]:
def get_results_from_all_viable_methods(d, graphs, graph_names=None):
    results = [None] * len(graphs)
    for i, G in enumerate(graphs):
        results_dictionary = {}
        for name, class_handle in get_embedding_methods_iterable():
            info = "Currently processing " + str(name) + ", d: " + str(d)
            if graph_names is not None:
                info += ", graph: " + graph_names[i]
            if not (name == "GCN" or name == "GNN"):
                if name in ["GraphWave", "HOPE", "LINE"]:
                    current_d = d // 2
                else:
                    current_d = d
                print(info)
                if not (name == "HARP"):
                    results_dictionary[name] = class_handle.fast_embed(G, d=current_d)
                    clear_output()
                else:
                    temp_name = "HARP_Deepwalk"
                    results_dictionary[temp_name] = class_handle.fast_embed(G, d=current_d)
                    temp_name = "HARP_Node2Vec"
                    results_dictionary[temp_name] = class_handle.fast_embed(G, d=current_d, method = "Node2Vec")
                    clear_output()
        results[i] = results_dictionary
    return results

In [None]:
results_d2 = get_results_from_all_viable_methods(d=2, graphs=[G1, G2], graph_names=["G1", "G2"])

Currently processing Struc2Vec, d: 2, graph: G1


In [77]:
results_d2

[{'DNGR': array([[0.6624781 , 0.3656663 ],
         [0.634946  , 0.412911  ],
         [0.44179124, 0.2548797 ],
         [0.44557843, 0.24104708],
         [0.4470369 , 0.2888    ],
         [0.43609497, 0.10573334],
         [0.35469532, 0.10868034],
         [0.31616312, 0.15798822],
         [0.27253196, 0.15614092],
         [0.48617625, 0.21279117],
         [0.4982917 , 0.16402945],
         [0.5072297 , 0.1829763 ],
         [0.51524043, 0.1637106 ],
         [0.49935126, 0.19241151],
         [0.44877252, 0.19619656],
         [0.54395866, 0.26943645],
         [0.4128883 , 0.12024364],
         [0.4370497 , 0.19854265],
         [0.44543582, 0.17120695],
         [0.40527028, 0.08450049],
         [0.42560756, 0.17471284],
         [0.41023347, 0.11306486],
         [0.43773526, 0.25457585],
         [0.90693945, 0.7442    ],
         [0.84353137, 0.67991066],
         [0.8460631 , 0.6893118 ],
         [0.7545821 , 0.55881095],
         [0.7382039 , 0.5414554 ],
         [0.

In [78]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
names = results_d2[0].keys()
labels = labels1
for i, name in enumerate(names):
    Z = results_d2[0][name]
    X_train, X_test, y_train, y_test = train_test_split(Z, labels, random_state=0)
    logreg = LogisticRegression()
    logreg.fit(X_train, y_train)
    print(name)
    print('Accuracy of Logistic regression classifier on training set: {:.2f}'
     .format(logreg.score(X_train, y_train)))
    print('Accuracy of Logistic regression classifier on test set: {:.2f}'
         .format(logreg.score(X_test, y_test)))

DNGR
Accuracy of Logistic regression classifier on training set: 0.61
Accuracy of Logistic regression classifier on test set: 0.60
DeepWalk
Accuracy of Logistic regression classifier on training set: 0.60
Accuracy of Logistic regression classifier on test set: 0.60
GraRep
Accuracy of Logistic regression classifier on training set: 0.61
Accuracy of Logistic regression classifier on test set: 0.60
GraphFactorization
Accuracy of Logistic regression classifier on training set: 0.61
Accuracy of Logistic regression classifier on test set: 0.60
GraphWave
Accuracy of Logistic regression classifier on training set: 0.61
Accuracy of Logistic regression classifier on test set: 0.60
HARP_Deepwalk
Accuracy of Logistic regression classifier on training set: 0.62
Accuracy of Logistic regression classifier on test set: 0.60
HARP_Node2Vec
Accuracy of Logistic regression classifier on training set: 0.60
Accuracy of Logistic regression classifier on test set: 0.60
HOPE
Accuracy of Logistic regression cla