#### Import Required Modules

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
import random

from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

from tensorflow.keras import layers, optimizers, losses, Model
from tensorflow.keras.utils import to_categorical

import matplotlib as plt
from model import *

## Loading & Preprocessing Data
#### Feature Extraction - Load data from facebook dataset

In [2]:
# load data
X_features, labels, edges = load_data()

# check loaded properly
num_classes = len(np.unique(labels))
num_nodes = X_features.shape[0]
num_features = X_features.shape[1]
num_edges = len(edges)/2

print("num of classes: " + str(num_classes))
print("num of nodes: " + str(num_nodes))
print("num of features: " + str(num_features))
print("num of edges: "+ str(num_edges))

# adjacency matrix
A = get_adj_matrix(labels, edges)

num of classes: 4
num of nodes: 22470
num of features: 128
num of edges: 171002.0


#### Normalise Adjacency Matrix

In [3]:
# normalise
A = normalise_adj(adj_matrix)

NameError: name 'adj_matrix' is not defined

#### Split Training, Validation, and Testing

In [None]:
## in py file ###
def split_index(data):
    """ Partitions the dataset into training, validation, and testing splits
        of 0.2 : 0.2 : 0.6 since semi-supervised.
    Parameters:
        data: data to be split
    Returns:
        Indices of Training set, Validation, and Test set
    """
    size = int(len(data)*0.2)
    indices = [i for i in range(len(data))]

    # training split
    train_set = random.sample(indices, k = size)
    
    # split remainder of set
    remainder = set(indices).difference(train_set)

    val_set = random.sample(remainder, k = size)
    test_set = list(set(remainder).difference(val_set))
    
    return train_set, val_set, test_set

In [None]:
# Get indices for splitting set
train_idx, val_idx, test_idx = split_index(labels)

# Apply mask
train_mask = np.zeros((num_nodes,), dtype = bool)
val_mask = np.zeros((num_nodes,), dtype = bool)
test_mask = np.zeros((num_nodes,), dtype = bool)

train_mask[train_idx] = True
val_mask[val_idx] = True
test_mask[test_idx] = True

In [None]:
# One-hot encoding
def encode(labels):
    encoder = LabelEncoder()
    labels = encoder.fit_transform(labels) # returns encoded labels
    encoded_labels = to_categorical(labels)
    return encoded_labels, encoder.classes_

encoded_labels, classes = encode(labels)

print(encoded_labels)
print(classes)

## Building & Training GCN

In [None]:
# Parameters
channels = 16 #num for first layer
dropout = 0.5 #rate
l2_reg = 5e-4 # regularisation rate
l_rate = 1e-2 #learning rate
epochs = 200 #number of epochs
input_channels = num_nodes

In [None]:
# Create and Compile
model = GCN_Model(num_classes, channels, 
                  dropout, l2_reg, input_channels)

model.compile(optimizer = Adam(learning_rate = l_rate),
                               loss = 'categorical_crossentropy',
                              metrics = [acc])
model.summary()

In [None]:
# Train

def train():
    validation_data = ([X_features, A], encoded_labels, val_mask)
    
    model.fit([X_features, A], 
              encoded_labels,
             sample_weight = train_mask,
             epochs = epochs,
             batch_size = num_nodes,
             validation_data = validation_data,
              shuffle = False)
    

## Results

In [None]:
X_test = X_features[test_mask]
A_test = A[test_mask, :][:,test_mask]
y_test = encoded_labels[tesk_mask]

# Evaluation
y_predictions = model.predict([X_test, A_test], 
                            batch_size = num_nodes)

report = classification_report(y_test, y_predictions, 
                               target_names = classes)

print(report)

## TSNE Plot

Each point is a node representing the facebook page. The colours represent the four possible categories.

In [None]:
## TEST SCRIPT ##
output = output of first gcn layer

tsne = TSNE(n_components = 2).fit_transform(output)
plt.figre(figsize = (10,10))

colour_map = np.argmax(encoded_labels, axis = 1)
for i in range(num_classes):
    indices = np.where(color_map == i)
    
    plt.scatter(tsne[indices[0], 0], tsne[indices[0],1], label = i)
    
plt.title('tSNE Plot')
plt.legend()
plt.show()