# Import libraries and external notebooks

In [1]:
import import_ipynb
from datasets.Downloader import *

import pandas as pd
import networkx as nx
import numpy as np
import random
import math
from numpy import linalg as LA

import tensorflow as tf
from tensorflow.keras import layers, Model
import tensorflow.keras.backend as K
from sklearn.neural_network import MLPClassifier
from IPython import display
print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

importing Jupyter notebook from /media/santaris/HighSpeedStorage/LabsWorkspace/Notebooks/LocalityGroups/Graph Representation Learning/Matrix Factorization/SocioDim/datasets/Downloader.ipynb
Version:  2.0.0
Eager mode:  True
GPU is available


# Download datasets

In [2]:
download_dataset('http://socialcomputing.asu.edu/uploads/1283153973/BlogCatalog-dataset.zip', 'datasets/BlogCatalog-dataset.zip', 'datasets')
# download_dataset('http://socialcomputing.asu.edu/uploads/1283157931/Flickr-dataset.zip', 'datasets/Flickr-dataset.zip', 'datasets')


Size of file: 976987


datasets/BlogCatalog-dataset.zip: 100%|##########| 954k/954k [00:04<00:00, 200kB/s]  


# Import Data

In [3]:
G = nx.read_edgelist('datasets/BlogCatalog-dataset/data/edges.csv', delimiter=',', nodetype=int)

In [4]:
print(f"Number of nodes {G.number_of_nodes()}")
print(f"Number of edges {G.number_of_edges()}")

Number of nodes 10312
Number of edges 333983


In [5]:
adj_matrix = nx.adjacency_matrix(G)
dense_matrix = np.array(adj_matrix.toarray(),dtype=np.float64)
adj_tensor = tf.constant(dense_matrix)

In [6]:
labels = pd.read_csv('datasets/BlogCatalog-dataset/data/group-edges.csv', sep=',', names=['Node','Group'])

In [7]:
labels.head()

Unnamed: 0,Node,Group
0,28,1
1,32,1
2,36,1
3,37,1
4,84,1


In [8]:
vectorized_labels = np.zeros((G.number_of_nodes(), 39), dtype=int)
for index, row in labels.iterrows():
    node = row['Node']
    group = int(row['Group'])
    vectorized_labels[node - 1][group - 1] = 1
    

# Step 1. Extract Latent social dimensions based on network connectivity

In [11]:

def modularity(adj_matrix):
    
    # Degrees of nodes
    d = tf.reduce_sum(adj_matrix, 1)
    
    two_m = tf.reduce_sum(d)
    
    nom = tf.math.multiply(tf.transpose(d), d)
    
    B = tf.math.subtract(adj_matrix, tf.math.divide(nom, two_m))
    
    e,v = tf.linalg.eigh(B)
        
    return e,v


In [12]:
w, v = modularity(adj_tensor)

In [15]:
v.shape

TensorShape([10312, 10312])

In [None]:
v_latent = []
for i in range(500):
    v_latent.append(v[:][ind[i]])
V = np.array(np.transpose(v_latent))
V.shape

# Evaluate

In [None]:
mlp = MLPClassifier(verbose=0,solver='adam', learning_rate='adaptive',hidden_layer_sizes=5000, max_iter=1000)


In [None]:
indexes = np.arange(G.number_of_nodes())
np.random.shuffle(indexes)
training_indexes = math.floor(G.number_of_nodes() * 0.1)
training_nodes = indexes[:training_indexes]
testing_nodes = indexes[training_indexes+1:]
training_nodes.shape

In [None]:
X = []
Y = []
for node in training_nodes:
    X.append(V[node - 1].real)
    Y.append(vectorized_labels[node - 1])
    

X_train = np.array(X)
Y_train = np.array(Y)

X = []
Y = []
for node in testing_nodes:
    X.append(V[node - 1].real)
    Y.append(vectorized_labels[node - 1])
X_train.shape

X_test = np.array(X)
Y_test = np.array(Y)