# Import libraries and external notebooks

In [1]:
import import_ipynb
from datasets.Downloader import *

import pandas as pd
import networkx as nx
import numpy as np
import random
import math
from numpy import linalg as LA

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import layers, Model
import tensorflow.keras.backend as K
from sklearn.neural_network import MLPClassifier
from IPython import display
print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

importing Jupyter notebook from /media/santaris/HighSpeedStorage/LabsWorkspace/Notebooks/LocalityGroups/Graph Representation Learning/Matrix Factorization/SocioDim/datasets/Downloader.ipynb
Version:  2.0.0
Eager mode:  True
GPU is available


# Download datasets

In [2]:
download_dataset('http://socialcomputing.asu.edu/uploads/1283153973/BlogCatalog-dataset.zip', 'datasets/BlogCatalog-dataset.zip', 'datasets')
# download_dataset('http://socialcomputing.asu.edu/uploads/1283157931/Flickr-dataset.zip', 'datasets/Flickr-dataset.zip', 'datasets')


Size of file: 976987


datasets/BlogCatalog-dataset.zip: 100%|##########| 954k/954k [00:05<00:00, 193kB/s]  


# Import Data

In [3]:
G = nx.read_edgelist('datasets/BlogCatalog-dataset/data/edges.csv', delimiter=',', nodetype=int)

In [4]:
print(f"Number of nodes {G.number_of_nodes()}")
print(f"Number of edges {G.number_of_edges()}")

Number of nodes 10312
Number of edges 333983


In [5]:
adj_matrix = nx.adjacency_matrix(G)
dense_matrix = np.array(adj_matrix.toarray(),dtype=np.float64)
adj_tensor = tf.constant(dense_matrix)

In [6]:
labels = pd.read_csv('datasets/BlogCatalog-dataset/data/group-edges.csv', sep=',', names=['Node','Group'])

In [7]:
labels.head()

Unnamed: 0,Node,Group
0,28,1
1,32,1
2,36,1
3,37,1
4,84,1


In [8]:
vectorized_labels = np.zeros((G.number_of_nodes(), 39), dtype=int)
for index, row in labels.iterrows():
    node = row['Node']
    group = int(row['Group'])
    vectorized_labels[node - 1][group - 1] = 1
    

# Step 1. Extract Latent social dimensions based on network connectivity

In [9]:

def modularity(adj_matrix):
    
    # Degrees of nodes
    d = tf.reduce_sum(adj_matrix, 1)
    
    two_m = tf.reduce_sum(d)
    
    nom = tf.math.multiply(tf.transpose(d), d)
    
    B = tf.math.subtract(adj_matrix, tf.math.divide(nom, two_m))
    
    e,v = tf.linalg.eigh(B)
        
    return e,v


In [10]:
w, v = modularity(adj_tensor)

In [11]:
eigs_sort = tf.argsort(w, direction='DESCENDING')

In [12]:
v_latent = []
for i in range(500):
    v_latent.append(v[:][eigs_sort[i]].numpy())
v_latent = np.array(np.transpose(v_latent), dtype=np.float64)

# Step 2 Construct Discriminative Classifier

In [13]:
def construct_Train_Test_Sets(ratio=0.1):
    indexes = np.arange(G.number_of_nodes())
    np.random.shuffle(indexes)
    training_indexes = math.floor(G.number_of_nodes() * ratio)
    training_nodes = indexes[:training_indexes]
    testing_nodes = indexes[training_indexes+1:]
    
    X = []
    Y = []
    for node in training_nodes:
        X.append(v_latent[node - 1])
        Y.append(vectorized_labels[node - 1])


    X_train = np.array(X)
    X_train_tensor = tf.constant(X_train)
    Y_train = np.array(Y)
    Y_train_tensor = tf.constant(Y_train)

    X = []
    Y = []
    for node in testing_nodes:
        X.append(v_latent[node - 1].real)
        Y.append(vectorized_labels[node - 1])
    X_train.shape

    X_test = np.array(X)
    X_test_tensor = tf.constant(X_test)
    Y_test = np.array(Y)
    Y_test_tensor = tf.constant(Y_test)
    
    return X_train_tensor, Y_train_tensor, X_test_tensor, Y_test_tensor
    

In [39]:
def constructModel():
    model = tf.keras.Sequential()
    # Adds a densely-connected layer with 64 units to the model:
    model.add(layers.Dense(1000, activation='relu'))
    
    model.add(layers.Dense(1000, activation='relu'))

    # Add a sigmoid layer with 10 output units:
    model.add(layers.Dense(39, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(1e-4)))
    
    model.compile(optimizer=tf.keras.optimizers.Adam(0.01),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    
    return model

# Step 3 Evaluate

In [40]:
micro_f1 = []
macro_f1 = []
for ratio in np.arange(0.1, 1, 0.1):
    
    X_train, Y_train, X_test, Y_test = construct_Train_Test_Sets(ratio)
    
    model = constructModel()
    
    model.fit(X_train, Y_train, epochs=100, batch_size=64, verbose=0)
    
    predict = model.predict(X_test)
    predic_tensor = tf.constant(predict)
    condition = tf.math.greater_equal(predic_tensor, 0.5)
    bool_predictions = tf.where(condition, 1, 0)
    
    output_macro = tfa.metrics.F1Score(num_classes=39, average='macro')
    output_macro.update_state(Y_test, bool_predictions)
    
    macro_f1.append(output_macro.result().numpy())
    
    output_micro = tfa.metrics.F1Score(num_classes=39, average='micro')
    output_micro.update_state(Y_test, bool_predictions)
    
    micro_f1.append(output_micro.result().numpy())
    
    print("Ratio {}: Micro-F1 (%) {}: Macro-F1 (%) {}".format(ratio, output_micro.result().numpy(), output_micro.result().numpy()))
    

Ratio 0.1: Micro-F1 (%) 0.055673837661743164: Macro-F1 (%) 0.055673837661743164
Ratio 0.2: Micro-F1 (%) 0.03856603056192398: Macro-F1 (%) 0.03856603056192398
Ratio 0.30000000000000004: Micro-F1 (%) 0.0350453145802021: Macro-F1 (%) 0.0350453145802021
Ratio 0.4: Micro-F1 (%) 0.042208634316921234: Macro-F1 (%) 0.042208634316921234
Ratio 0.5: Micro-F1 (%) 0.04339886084198952: Macro-F1 (%) 0.04339886084198952
Ratio 0.6: Micro-F1 (%) 0.03641679883003235: Macro-F1 (%) 0.03641679883003235
Ratio 0.7000000000000001: Micro-F1 (%) 0.039753735065460205: Macro-F1 (%) 0.039753735065460205
Ratio 0.8: Micro-F1 (%) 0.04753199219703674: Macro-F1 (%) 0.04753199219703674
Ratio 0.9: Micro-F1 (%) 0.03729355335235596: Macro-F1 (%) 0.03729355335235596
