<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [None]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,classification_report,roc_auc_score
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import decomposition, datasets
from sklearn.manifold import TSNE
import os
import multiprocessing as mp
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")

In [None]:
class getNode2Vec:
    def __init__(self, path,dim):
        self.G = nx.Graph(nx.read_pajek(path + '.net'), nodetype=int)
        raw_data = pd.read_csv(path + '.raw_data', sep='\t')
        self.pos_e = raw_data[raw_data['label']==1][['from_id','to_id']].values
        self.e = raw_data[['from_id','to_id']].values
        self.labels = raw_data[['label']]
        self.dim = dim

    def remove_edges(self):
        for val in self.pos_e:
            self.G.remove_edge(str(val[0]), str(val[1]))

    def gen_node2vec(self):
        # Remove positive edges from graph:
        self.remove_edges()
        node2vec = Node2Vec(self.G,
                            dimensions=self.dim,
                            walk_length=100,
                            num_walks=18,
                            workers=5,
                            p=0.13,
                            q=0.13,
                            temp_folder='./temp/')  # Use temp_folder for big graphs
        # Embed nodes
        n2v_df = pd.DataFrame()
        model = node2vec.fit(
        )  #Use over gensim word2vec
        edges_embs = HadamardEmbedder(keyed_vectors=model.wv)
        # Edges:
        for e in self.e:
            v = edges_embs[(str(e[0]), str(e[1]))]
            res = dict(('d' + str(i), el) for i, el in enumerate(v))
            n2v_df = n2v_df.append(res, ignore_index=True)
            
        return pd.concat([n2v_df, self.labels], axis=1)

In [None]:
dirs = os.listdir('./data/')
for x in dirs:
    print(x)


def ww(p):
    m = getNode2Vec(p, 100)
    emb_model = m.gen_node2vec()
    emb_model.to_csv(p + 'emb.csv')
    print('FINISHED WITH:', p)


# Step 1: Init multiprocessing.Pool()
pool = mp.Pool(mp.cpu_count() - 1)
# Step 2: `pool.apply` the `howmany_within_range()`

proc = []
for net in dirs:
    path = "./data/{}/{}".format(net, net)
    p = mp.Process(target=ww,args=(path,))
    p.start()
    proc.append(p)
for p in proc:
    p.join()