#### Import Required Modules

In [21]:
import numpy as np
import tensorflow as tf
import pandas as pd
import random

from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

from tensorflow.keras import layers, optimizers, losses, Model
from tensorflow.keras.utils import to_categorical

import matplotlib as plt
from model import *

## Loading & Preprocessing Data
#### Feature Extraction - Load data from facebook dataset

In [25]:
X_features, adj_matrix, labels = load_data()
print(labels)

num of classes: 4
num of nodes: 22470
num of edges: 171002.0
num of features: 128
[0 2 1 ... 2 1 0]


#### Normalise Adjacency Matrix

In [5]:
# normalise
A = normalise_adj(adj_matrix)

# multiply by features and weight 
tf.matmul(A, X_features)

<tf.Tensor: shape=(22470, 128), dtype=float32, numpy=
array([[-0.26257637, -0.27648258, -0.2623502 , ..., -0.21514013,
        -0.37590313, -0.22383553],
       [-0.13679378,  0.02753453, -0.26121682, ..., -0.06020843,
        -0.1114152 ,  0.08116659],
       [ 0.6967648 , -0.2716224 , -0.26235026, ..., -0.21514018,
        -0.37590325, -0.22176178],
       ...,
       [-0.25792482, -0.27252948, -0.11360406, ..., -0.1976151 ,
        -0.37236634, -0.21744584],
       [-0.2625765 , -0.27484787, -0.26235032, ..., -0.20976022,
        -0.36968994, -0.22021428],
       [-0.2360464 , -0.27409634, -0.25757593, ..., -0.19312643,
        -0.3753811 , -0.22299477]], dtype=float32)>

#### Split Training, Validation, and Testing

In [6]:
def split(data):
    """ Partitions the dataset into training, validation, and testing splits
        of 0.2 : 0.2 : 0.6 since semi-supervised.
    Parameters:
        data: data to be split
    Returns:
        Training set, Validation, and Test set
    """
    # training split
    size = len(data)*0.2
    train_set = random.sample(data, k = size)
    
    # split remainder of set
    remainder = set(data).difference(train_set)
    
    val_set = random.sample(remainder, k = size)
    test_set = set(remainder).difference(val_set)
    
    return train_set, val_set, test_set

In [9]:
train_set, val_set, test_set = split(labels)
print(train_set)

TypeError: Population must be a sequence.  For dicts or sets, use sorted(d).

In [20]:
# One-hot encoding
def encode(labels):
    encoder = LabelEncoder()
    labels = encoder.fit_transform(labels) # returns encoded labels
    encoded_labels = to_categorical(labels)
    return encoded_labels, encoder.classes_

encoded_labels, classes = encode(labels)

print(encoded_labels)
print(classes)

[[1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 ...
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]]


## Building & Training GCN

In [None]:
# Parameters
channels = 16 #num for first layer
dropout = 0.5 #rate
l2_reg = 5e-4 # regularisation rate
l_rate = 1e-2 #learning rate
epochs = 200 #number of epochs
input_channels = X_features.shape[0]

In [None]:
# Create and Compile
model = GCN_Model(len(np.unique(labels)), channels, 
                  dropout, l2_reg, input_channels)

model.compile(optimizer = Adam(learning_rate = l_rate),
                               loss = 'categorical_crossentropy',
                              metrics = [acc])
model.summary()

In [None]:
# Train
val_mask = val_test #add mask
def train():
    validation_data = ([X_features, A], encoded_labels, val_mask)
    
    model.fit([X_features, A], encoded_labels,
             sample_weight =,
             epochs = epochs,
             batch_size = X_features.shape[0],
             validation_data = validation_data,
              shuffle = False)
    

## Results

In [None]:
X_test= X_features
A_test = A
y_test = encoded_labels # add mask

# Evaluation
y_predictions = model.predict([X_test, A_test], 
                            batch_size = X_features.shape[0])

y_actual = y_test
report = classification_report(y_actual, y_predictions, 
                               target_names = classes)

print(report)

## TSNE Plot

Each point is a node representing the facebook page. The colours represent the four possible categories.

In [None]:
## TEST SCRIPT ##
output = output of first gcn layer

tsne = TSNE(n_components = 2).fit_transform(output)
plt.figre(figsize = (10,10))

colour_map = np.argmax(encoded_labels, axis = 1)
for i in range(num_classes):
    indices = np.where(color_map == i)
    
    plt.scatter(tsne[indices[0], 0], tsne[indices[0],1], label = i)
    
plt.title('tSNE Plot')
plt.legend()
plt.show()