In [1]:
'''
  NN classifier (keras.models.Sequential)
  A Sequential model is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor.
'''

'\n  NN classifier (keras.models.Sequential)\n  A Sequential model is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor.\n'

## Import Libaries

In [16]:
import pandas as pd
import numpy as np

## Pre-processing
import scipy.sparse as sp
from scipy import sparse

## Logistic Regression
from sklearn.linear_model import LogisticRegression

## NN Classifier
## Sequential model: Plain stack of layers where each layer has exactly one input tensor and one output tensor
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense,Activation,Dropout
from keras import callbacks ## Early Stopping

from sklearn.utils import shuffle

from sklearn.metrics import roc_auc_score, average_precision_score

from sklearn.preprocessing import normalize ## Scale input vectors individually to unit norm (vector length)

# import umap ## Dimensionality reduction

In [3]:
protein_list = pd.read_csv('protein_list.csv')
# protein_list.columns = ['idx', 'Protein1_ID']
## protein_list

In [4]:
protein_list

Unnamed: 0.1,Unnamed: 0,Protein1_ID
0,0,P03428
1,1,P03431
2,2,P03433
3,3,P03452
4,4,P03466
...,...,...
15680,15680,Q6NUS8
15681,15681,P12018
15682,15682,Q96IU2
15683,15683,Q6PEW1


## Utils

### Normalize

In [5]:
def normalize_row(mx): ## Sum of each row = 1
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1)) 
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

### Metrics

In [6]:
## Add F1, ROC-AUC and PR-AUC

def calculate_metrics(y_label, y_pred, y_pred_f):
    
    print(y_pred)
#     print(y_pred_f)

    # For binary classification
    TP, FP, TN, FN = 0, 0, 0, 0
    
    for i in range(len(y_label)):
        if y_label[i] == y_pred[i]:
            if y_label[i] == 1:
                TP = TP + 1
            else:
                TN = TN + 1
        else:
            if y_pred[i] == 1:
                FP = FP + 1
            else:
                FN = FN + 1
    
    print("[TP TN FP FN]")
    print(TP, TN, FP, FN)

    accuracy = (TP + TN) / float(TP + TN + FP + FN)
    sensitivity = TP / float(TP + FN)
    specificity = TN / float(TN + FP)

    ## ZeroDivisionError: float division by zero
    try:
        precision = TP / float(TP + FP)
        F1 = (2 * precision * sensitivity) / (precision + sensitivity)
    
    except ZeroDivisionError as e:
        print(e)
        precision = 0.0
        F1 = 0.0

    ROC_AUC = roc_auc_score(y_label, y_pred_f)
    PR_AUC = average_precision_score(y_label, y_pred_f)

    print("[accuracy, sensitivity, specificity, precision, F1, ROC_AUC, PR_AUC]")
    return [accuracy, sensitivity, specificity, precision, F1, ROC_AUC, PR_AUC]

### Input data

In [7]:
def create_fold():

    ## Hold-out test set
#     test_df = pd.read_csv('edges/Sept_2022_new/balanced/idx/b_test_idx.csv') ## Network Reconstruction
    
    '''
        Experimentally Verified Datasets
    '''
#     test_df = pd.read_csv('edges/Experimentally_Verified_Test/testset_1.csv')
#     test_df = pd.read_csv('edges/Experimentally_Verified_Test/expt_test_new_dec2022.csv') ## Changed pos label
#     test_df = pd.read_csv('edges/Experimentally_Verified_Test/expt_test_final_dec2022.csv') ## Changed all pos samples
    test_df = pd.read_csv('edges/Experimentally_Verified_Test/testset_3.csv')
    
    pos_test = test_df[test_df['label'] == 1]  ## Standard
    neg_test = test_df[test_df['label'] == 0]
    
    ## Training and Validation Set
    pos_org = pd.read_csv('edges/Sept_2022_new/balanced/idx/b_pos_idx.csv') ## Standard
    neg_org = pd.read_csv('edges/Sept_2022_new/balanced/idx/b_neg_idx.csv')
    
    '''
        Shuffle Data
    '''
    pos = shuffle(pos_org)
    neg = shuffle(neg_org)
    
    ## Train : Val = 9 : 1
    pos_val = pos.sample(frac = 0.1, replace = False) ## 10% of positive dataset
    pos_train = pos[~pos.index.isin(pos_val.index)]
    
    neg_val = neg.sample(frac = 0.1, replace = False) ## 10% of negative dataset
    neg_train = neg[~neg.index.isin(neg_val.index)]
    
    print('--Sampled new data--')
    
    return pos_train, neg_train, pos_val, neg_val, pos_test, neg_test ## train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false

In [8]:
## Read embeddings of all proteins

def read_embedding_matrix(emb_type, emb_mtd):
    
    protein_list_len = len(protein_list)
    
    if emb_type == 'graph':
            
        '''
            Methods:
                deepwalk
                node2vec_nw8_wl32
                struc2vec
                SDNE
                LINE --LINE/Order1/2/3
                GraRep
                VAE_epochs200/VAE_epochs5
                ripple2vec
                node2vec_plus_nw8_wl32 ## SparseOTF
        '''
        
#         set_emb_name = 'node2vec_plus_nw8_wl32' 
#         edit_data_path = 'Embeddings/Graph/' + set_emb_name + '/'
        
        edit_data_path = 'Embeddings/Graph/Best/Nov_2022/' 

        '''
            Load Embeddings
        '''
        ## Deepwalk, node2vec, struc2vec
#         emb = pd.read_csv(edit_data_path + emb_mtd + '.csv', skiprows=1, header = None).sort_values(by = [0]).set_index([0])
        
        ## SDNE, GraRep, VAE, ripple2vec, (LINE)
#         emb = pd.read_csv(edit_data_path + emb_mtd + '.txt', sep=' ', skiprows=1, header = None).sort_values(by = [0]).set_index([0])
        
        ## node2vec+
        emb = pd.read_csv(edit_data_path + emb_mtd + '.emb', sep=' ', skiprows=1, header = None).sort_values(by = [0]).set_index([0]) 
        
        ## Convert csv to array
        for i in np.setdiff1d(np.arange(protein_list_len), emb.index.values): ## setdiff1d: 1D array of values in ar1 that are not in ar2
            emb.loc[i] = (np.sum(emb.values, axis = 0)/emb.values.shape[0]) ## manually insert emb for protein indexes with no node2vec embedding
        features_def = emb.sort_index().values
        
    ### Read sparse matrix directly
    else:
        
        if emb_type == 'combination':
            
            ### Combination of graph embeddings
            edit_data_path = 'Embeddings/Graph/Best/Concat/'
            
            ### Protein + Graph embeddings
            # edit_data_path = data_path + 'concat_embeddings/'
            
        elif emb_type == 'protein':
            edit_data_path = data_path + 'protein_embeddings/'

        elif emb_type == 'feat_selection':
            edit_data_path = data_path + 'protein_embeddings/feature_selection/'
    
        read_emb = sparse.load_npz(edit_data_path + emb_mtd + '.npz')
        features_def = read_emb.toarray()
    
    features = normalize_row(features_def) ## Row-normalize features
#     features = normalize(features_def) ## from sklearn.preprocessing
    
    print("---Read Embeddings---")
    print(features.shape)
    print(features_def)
    print(features)
    
    return features

### Retrieve embeddings

In [9]:
def hadamard_emb(emb, posEdges, negEdges):
    
    posNum = posEdges.shape[0]
    negNum = negEdges.shape[0]
    
    X = np.empty((posNum+negNum, emb.shape[1])) ## All embeddings
    k = 0
    
    for i in posEdges.index:
        u = emb[posEdges['Protein1_ID'][i]]
        v = emb[posEdges['Protein2_ID'][i]]

        hadamard = np.multiply(u, v)

        X[k] = hadamard
        k = k + 1
            
    for i in negEdges.index:
        u = emb[negEdges['Protein1_ID'][i]]
        v = emb[negEdges['Protein2_ID'][i]]

        hadamard = np.multiply(u, v)
        
        X[k] = hadamard
        k = k + 1
        
    Y_pos = np.full((posNum,2),[0,1])
    Y_neg = np.full((negNum,2),[1,0])
    Y = np.vstack((Y_pos,Y_neg))

      # print("---Generate data---")
      # print(X)
      # print(X.shape) ## (2D array)
      # print(Y.shape)
    
    return X,Y

## trainNN

In [10]:
def train_nn(X_train, Y_train, X_val, Y_val, X_test, Y_test): 

    print("---TrainNN---")

    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=X_train.shape[1])) ## Input layer
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2,activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                    optimizer='adam', ## Adam optimization
                    metrics=['accuracy'])
    
    print(K.eval(model.optimizer.lr))
    
#     model.fit(X_train, Y_train, epochs=200, batch_size=128, verbose=1)

    ## Early stopping on validation dataset (10% of overall dataset)
    earlystopping = callbacks.EarlyStopping(monitor='val_loss', mode="min", patience=20, restore_best_weights=True)
    
    model.fit(X_train, Y_train, epochs=200, batch_size=128, validation_data=(X_val, Y_val), callbacks=[earlystopping])

    ### Using Hold-out Test Set (X_test and Y_test)
    y_prob = model.predict(X_test)

    ## Binary prediction (y_pred)
    y_classes = y_prob.argmax(axis=1) 

    ## Float prediction
    y_pred_f = y_prob[:,1] ## Probability of belonging to class label '1' 

    ## True class labels
    y_true = Y_test[:,1]

    acc = calculate_metrics(y_true, y_classes, y_pred_f) ## calculate_metrics(y_label, y_pred, y_pred_f)

    return acc

## main

In [11]:
def train(input_type, input_mtd):

    ## Call create_fold()
    train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = create_fold()

    print(train_edges.shape,train_edges_false.shape)
    print(val_edges.shape,val_edges_false.shape)
    print(test_edges.shape,test_edges_false.shape)

    ## Retrieve embeddings of all proteins 
    emb = read_embedding_matrix(input_type, input_mtd) ## embedding method
    
    ## UMAP reduce dimension of embedding to 2
#     emb = umap.UMAP().fit_transform(emb_org)
    
    ## Retrieve embeddings of respective nodes 
    X_train,Y_train = hadamard_emb(emb, train_edges, train_edges_false)
    X_val, Y_val = hadamard_emb(emb, val_edges, val_edges_false)
    X_test,Y_test = hadamard_emb(emb, test_edges, test_edges_false)

    ## Final softmax classifier
    acc = train_nn(X_train,Y_train,X_val,Y_val,X_test,Y_test)

    return acc

## Run program here

In [12]:
def run_prog(input_graph_emb):
    
    eval_metrics = []
    
    for i in range(0, 5):
        
        print('Iteration(train): ', (i+1))
        
        acc = train(input_type = 'graph', input_mtd=input_graph_emb) ## input_type = 'graph', 'combination'
        print(acc)
        
        eval_metrics.append(acc)
        
    print("===================== " + input_graph_emb + " =====================")
    print(eval_metrics)
    
    mean = np.array(eval_metrics).mean(axis=0) # Take the mean of each column
    mean = np.round(mean, 4)
    print('Mean: ' + str(mean)[1:-1])
          
    max = np.array(eval_metrics).max(axis=0)
    max = np.round(max, 4)
    print('Max: ' + str(max)[1:-1])
          
    min = np.array(eval_metrics).min(axis=0)
    min = np.round(min, 4)
    print('Min: ' + str(min)[1:-1])
    print("=======================================================")
        
    with open('Eval_Results/Testset2_Experimentally_Verified.txt', "a") as f:
        f.write(input_graph_emb + '(mean):' + str(mean) + '\n')
#         f.write(input_graph_emb + '(max):' + str(max) + '\n')
#         f.write(input_graph_emb + '(min):' + str(min) + '\n')

In [13]:
# mtd_arr = ['deepwalk_nw16_wl32', 'node2vec_nw8_wl32_p0.25_q0.5', 'struc2vec_nw128_wl16']
# mtd_arr = ['ripple2vec_nw8_wl8', 'vae_h256_128', 'grarep_k2']
# mtd_arr = ['LINE_order3_epochs10', 'sdne_a0_b10', 'ripple2vec_nw8_wl8']

# for i in range(len(mtd_arr)):
#     run_prog(mtd_arr[i])

In [17]:
input_g_mtd = 'n2vplus_nw8_wl32_p0.25_q0.5'

run_prog(input_g_mtd)

Iteration(train):  1
--Sampled new data--
(3613, 3) (3613, 3)
(401, 3) (401, 3)
(446, 3) (446, 3)
---Read Embeddings---
(15685, 128)
[[ 0.05438794  0.15712513  0.02526474 ...  0.06337012 -0.06831192
   0.05740243]
 [ 0.0920557   0.12854572  0.07927883 ... -0.04355874 -0.02950621
   0.01127648]
 [ 0.10265665  0.04119997  0.01358521 ... -0.07648639  0.01284278
   0.09610651]
 ...
 [-0.06547894  0.03663263  0.12268668 ... -0.04719101 -0.08261108
  -0.08350137]
 [-0.00914692 -0.00912238  0.11259656 ... -0.02135581 -0.1477403
  -0.11278752]
 [ 0.02491135 -0.00382214  0.14209396 ...  0.0124865  -0.13294958
  -0.08357669]]
[[ 0.06255813  0.18072855  0.02906002 ...  0.07288961 -0.07857377
   0.06602545]
 [ 0.18997227  0.26527551  0.16360507 ... -0.08989072 -0.06089097
   0.0232709 ]
 [-0.20810853 -0.08352177 -0.02754034 ...  0.15505543 -0.02603526
  -0.19482991]
 ...
 [ 0.07148592 -0.03999328 -0.13394184 ...  0.05152027  0.09018974
   0.09116171]
 [ 0.01469648  0.01465706 -0.18091049 ...  0.03

Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200


Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 0 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0
 0 1 0 1 1 0 1 0

Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200


Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1
 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0

Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200


Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200


Epoch 143/200
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 1 1 0 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 0 1 0 0 0
 0 1 0 1 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 0 1 1 1 

Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200


Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200


Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1 0 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 

Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200


Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 

In [None]:
## Hyper-params

# SDNE
# a_arr = [0, 0.1, 0.2, 0.3, 0.4]
# b_arr = [0, 10, 20, 30]

# ## struc2vec
# a_arr = [8, 16, 32, 64, 128] ## num_walks
# b_arr = [8, 16, 32, 64, 128] ## walk_length

## node2vec, node2vec+
# a_arr = [0.25, 0.5, 1, 2, 4]
# b_arr = [0.25, 0.5, 1, 2, 4]

## deepwalk
# a_arr = [8, 16, 32, 64, 128, 256] ## num_walks
# b_arr = [8, 16, 32, 64, 128, 256] ## walk_length

# ## ripple2vec
# a_arr = [8, 16, 32, 64]
# b_arr = [8, 16, 32, 64]

In [None]:
# for i in range (len(a_arr)):
    
#     for j in range(len(b_arr)):
        
#         a_str = str(a_arr[i])
#         b_str = str(b_arr[j])
        
# #         input_g_mtd_str = 'a' + a_str + '_b' + b_str ## SDNE
# #         input_g_mtd_str = 'p' + a_str + '_q' + b_str ## node2vec
#         input_g_mtd_str = 'n2v_plus_p' + a_str + '_q' + b_str ## node2vec+
# #         input_g_mtd_str = 'nw' + a_str + '_wl' + b_str ## deepwalk, struc2vec, ripple2vec
        
#         print(input_g_mtd_str)
        
#         run_prog(input_g_mtd_str) 

In [None]:
# a_arr = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50] ## LINE
# a_arr = [1, 2, 4, 8] ## GraRep

# ## GAE
# a_arr = [16, 32, 64, 128, 256, 512] ## hidden1
# b_arr = [8, 16, 32, 64, 128, 256] ## hidden2

# for i in range (len(a_arr)):
    
#     input_g_mtd_str = 'epochs_' + str(a_arr[i]) ## LINE

# #     input_g_mtd_str = 'h' + str(a_arr[i]) + '_' + str(b_arr[i]) ## VAE
#     print(input_g_mtd_str)
    
#     run_prog(input_g_mtd_str) 

In [None]:
# # ripple2vec additional

# str_arr = [8, 16, 32, 64, 128]

# ## Fixed num_walks
# for i in range (len(str_arr)):
#     get_str = str(str_arr[i])
        
#     input_g_mtd_str = 'nw' + str(128) + '_wl' + str(str_arr[i])

#     print(input_g_mtd_str)

#     run_prog(input_g_mtd_str) 

# ## Fixed walk_length
# for i in range (len(str_arr)):
#     get_str = str(str_arr[i])
        
#     input_g_mtd_str = 'nw' + str(str_arr[i]) + '_wl' + str(128)

#     print(input_g_mtd_str)

#     run_prog(input_g_mtd_str) 

In [None]:
# # Struc2vec additional

# ## num_walks=256
# str_arr = [8, 16, 32]

# for i in range (len(str_arr)):
#     get_str = str(str_arr[i])
        
#     input_g_mtd_str = 'nw' + str(256) + '_wl' + str(str_arr[i])

#     print(input_g_mtd_str)

#     run_prog(input_g_mtd_str) 

# for i in range (len(str_arr)):
#     get_str = str(str_arr[i])
        
#     input_g_mtd_str = 'nw' + str(str_arr[i]) + '_wl' + str(256)

#     print(input_g_mtd_str)

#     run_prog(input_g_mtd_str) 

In [None]:
# input_g_mtd_arr = ['dw_nw8_wl64', 'n2v_p0.25_q0.5', 's2v_nw256_wl8', 'line_epochs40']
# input_g_mtd_arr = ['grarep_k2', 'sdne_a0_b10']
# input_g_mtd_arr = ['dw-n2v-grarep-sdne', 'dw-n2v-grarep-line-sdne', 's2v-grarep-sdne', 'dw-n2v-s2v-grarep-sdne']

# input_g_mtd_arr = ['GraRep_default', 'SDNE_default']


# for i in range(len(input_g_mtd_arr)):
    
#     print(input_g_mtd_arr[i])
    
#     run_prog(input_g_mtd_arr[i]) 

In [None]:
# def run_prog(input_graph_emb, get_input_neg_prop):
    
#     eval_metrics = []
    
#     for i in range(0, 5):
        
#         print('Iteration(train): ', (i+1))
        
#         acc = train(input_type = 'graph', input_mtd=input_graph_emb, input_neg_prop=get_input_neg_prop)
#         print(acc)
        
#         eval_metrics.append(acc)
        
#     print("===================== " + input_graph_emb + " =====================")
#     print(eval_metrics)
    
#     mean = np.array(eval_metrics).mean(axis=0) # Take the mean of each column
#     mean = np.round(mean, 4)
#     print('Mean: ' + str(mean)[1:-1])
          
#     max = np.array(eval_metrics).max(axis=0)
#     max = np.round(max, 4)
#     print('Max: ' + str(max)[1:-1])
          
#     min = np.array(eval_metrics).min(axis=0)
#     min = np.round(min, 4)
#     print('Min: ' + str(min)[1:-1])
#     print("=======================================================")
          
#     with open('NN_NEW_Unbalanced_dataset_results.txt', "a") as f:
#         f.write(get_input_neg_prop + '--' + input_graph_emb + ': ' + str(mean) + '\n')

In [None]:
# input_g_mtd_arr = ['line_epochs40', 's2v_nw256_wl8']
# input_neg_prog = ['1_10', '1_3', '1_1']

# input_g_mtd_arr = ['s2v-grarep-line-sdne']
# input_neg_prog = ['1_35', '1_25', '1_10', '1_3', '1_1']

In [None]:
# for i in range(len(input_neg_prog)):
    
#     for j in range(len(input_g_mtd_arr)):
        
#         print(input_g_mtd_arr[j])
#         run_prog(input_g_mtd_arr[j], input_neg_prog[i])
    
#     print("************************************************** DONE " + str(i+1) + " **************************************************")

In [None]:
# for i in range(len(input_g_mtd_arr)):
    
#     print(input_g_mtd_arr[i])
    
#     run_prog(input_g_mtd_arr[i])
    
#     print("************************************************** DONE " + str(i+1) + " **************************************************")

In [None]:
## Check dimensions

# input_g_mtd_arr = ['dw-n2v', 'dw-n2v-s2v', 'dw-n2v-grarep',
#                    's2v-grarep', 'dw-n2v-s2v-grarep',
#                    'line-sdne', 'line-grarep', 'sdne-grarep', 'line-sdne-grarep',
#                    'dw-n2v-line', 'dw-n2v-sdne', 'dw-n2v-line-sdne',
#                    's2v-line', 's2v-sdne', 's2v-line-sdne',
#                    'dw-n2v-grarep-line', 'dw-n2v-grarep-sdne', 'dw-n2v-grarep-line-sdne',
#                    's2v-grarep-line', 's2v-grarep-sdne', 's2v-grarep-line-sdne',
#                    'dw-n2v-s2v-grarep-line', 'dw-n2v-s2v-grarep-sdne', 'dw-n2v-s2v-grarep-line-sdne']

# print(len(input_g_mtd_arr))

In [None]:
# edit_data_path = 'Embeddings/Graph/Best/Concat/'

# for i in range(len(input_g_mtd_arr)):
    
#     expected_dim = len(input_g_mtd_arr[i].split('-')) * 128
    
#     read_emb = sparse.load_npz(edit_data_path + input_g_mtd_arr[i] + '.npz')
#     features_def = read_emb.toarray()

#     print(features_def.shape)
    
#     if expected_dim == features_def.shape[1]:
#         print("True")
#     else:
#         print(input_g_mtd_arr[i])

In [None]:
# import numpy as np

# print(np.__version__)