#### Import Required Modules

In [61]:
import numpy as np
import tensorflow as tf
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE

from tensorflow.keras import layers, optimizers, losses, Model

import matplotlib as plt
from model import *

## Loading & Preprocessing Data
#### Feature Extraction - Load data from facebook dataset

In [133]:
def load_data():
    data = np.load('facebook.npz')
    
    edges = data['edges']
    labels = data['target']
    features = data['features']
    
    # adjacency matrix
    n = len(labels)
    A = np.eye(n, dtype = np.float32)
    for i in edges:
        A[i[0]][i[1]] = 1 
    
    # check loaded properly
    print("num of classes: " + str(len(np.unique(labels))))
    print("num of nodes: " + str(features.shape[0]))
    print("num of edges: "+ str(len(edges)/2))
    print("num of features: " + str(features.shape[1]))

    A_tensorMatrix = tf.constant(A)
    return features, A_tensorMatrix, labels

X_features, adj_matrix, labels = load_data()
print(labels.shape)

num of classes: 4
num of nodes: 22470
num of edges: 171002.0
num of features: 128
(22470,)


#### Normalise Adjacency Matrix

In [94]:
### delete when function is in py

def normalise_adj(adj_matrix):
    """
    Parameters:
        adj_matrix: adjacency matrix to be normalised, in form of tensor
    Returns:
        normalised adjacency matrix
    """
    # get inverse degree matrix
    total_neighbours = tf.math.reduce_sum(adj_matrix, 1)
    inv_deg_matrix = tf.linalg.diag(tf.math.reciprocal(total_neighbours))
    
    # get half 
    half_inv_deg_matrix = tf.math.sqrt(inv_deg_matrix)
    D_half = tf.constant(half_inv_deg_matrix)
    
    # multiply D*D*A
    A = tf.matmul(D_half, tf.matmul(D_half, adj_matrix))
    return A
    

In [96]:
# normalise
A = normalise_adj(adj_matrix)

# multiply by features and weight 
tf.matmul(A, X_features)

tf.Tensor(
[[0.4999999  0.         0.         ... 0.         0.         0.        ]
 [0.         0.02857143 0.         ... 0.         0.         0.        ]
 [0.         0.         0.07692308 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.05555555 0.         0.        ]
 [0.         0.         0.         ... 0.         0.3333334  0.        ]
 [0.         0.         0.         ... 0.         0.         0.12499999]], shape=(22470, 22470), dtype=float32)


## Split Training, Validation, and Testing

In [138]:
##### SPLIT #####
def split_datasets(dataset, dataset_size):
    """ Partitions the dataset into training, validation, and testing splits
        of 0.2 : 0.2 : 0.6 since semi-supervised.
    Parameters:
        dataset: data to be partitioned
        dataset_size: size of dataset
    Returns:
        Training set (
        Validation, and Test set
    """
    # calculate indexes for split
    train_split = 0.2
    val_split = 0.2
    test_split = 0.6
    assert (train_split + val_split + test_split) == 1
    
    data = tf.data.Dataset.from_tensor_slices(dataset)
    size = int(train_split*dataset_size)
    
    train_dataset = data.take(size)
    val_dataset = data.skip(size).take(size)
    test_dataset = data.skip(size).skip(size)
    
    train_idx
    val_idx
    test_idx
    
    # create masks
    train_mask = np.array(labels.shape[0], dtype = np.bool)
    train_mask[train_idx] = 1
    
    val_mask
    val_mask[val_idx] = 1
    
    test_mask
    test_mask[test_idx] = 1
    
    # apply mask
    y_train = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    
    y_val = np.zeros(labels.shape)
    y_val[val_mask, :] = labels[val_mask, :]
    
    y_test = np.zeros(labels.shape)
    y_test[test_mask, :] = labels[test_mask, :]
    

    return train_dataset, val_dataset, test_dataset    
    

In [None]:
train_x, test_x = train_test_split(X_features)

In [140]:
data_size = len(labels)
x_train, x_val, x_test = split_datasets(labels, data_size)
print(x_train)

<TakeDataset shapes: (), types: tf.int64>


## Building & Training GCN

In [145]:
epochs

22470


In [143]:
optimizer = Adam()

21470


## Results

## TSNE Plot

In [None]:
tsne = TSNE(n_components = )
def plot_TSNE(target)