In [None]:
!pip install -e ..

In [None]:
from utils.history import plot_history
from mapper.full_batch_generators import RelationalFullBatchNodeGenerator
from layer.rgcn_reproduced import RGCN
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model

import sklearn
from sklearn import model_selection
from collections import Counter
import datasets

import matplotlib.pyplot as plt

In [None]:
dataset = datasets.AIFB()
## Here the load method will produce the graph and one hot encoded affiliation
G, affiliation = dataset.load()

In [None]:
# print(G.info())

In [None]:
## splitting dataset into train(80%) and test(20%)
train_targets, test_targets = model_selection.train_test_split(
    affiliation, train_size=0.8, test_size=None
)

## class for a data generator for use with full-batch mapper on relational graphs e.g. RGCN.
generator = RelationalFullBatchNodeGenerator(G, sparse=True)

train_gen = generator.flow(train_targets.index, targets=train_targets)
test_gen = generator.flow(test_targets.index, targets=test_targets)

In [None]:
rgcn = RGCN(
    generator=generator,
    bias=True,


    # AIFB best
    # layer_sizes=[32, 32],
    # activations=["relu", "relu"],
    # num_bases=20,
    # dropout=0.5,


    # AIFB original
    layer_sizes = [16, 16],
    activations=["relu", "relu"],
    num_bases=0,
    dropout=0.0,
)

In [None]:
x_in, x_out = rgcn.in_out_tensors()
predictions = Dense(train_targets.shape[-1], activation="softmax")(x_out)
model = Model(inputs=x_in, outputs=predictions)
model.compile(
    loss="categorical_crossentropy",
    optimizer=keras.optimizers.Adam(0.01),
    metrics=["acc"],
)

history = model.fit(train_gen, validation_data=test_gen, epochs=50)

In [None]:
plot_history(history)

In [None]:
test_metrics = model.evaluate(test_gen)
print("\nTest Set Metrics:")
for name, val in zip(model.metrics_names, test_metrics):
    print("\t{}: {:0.4f}".format(name, val))


In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# get embeddings for all people nodes
all_gen = generator.flow(affiliation.index, targets=affiliation)
embedding_model = Model(inputs=x_in, outputs=x_out)
emb = embedding_model.predict(all_gen)

X = emb.squeeze(0)
y = affiliation.idxmax(axis="columns").astype("category")

#to visualize high-dimensional data
if X.shape[1] > 2:
    transform = TSNE

    trans = transform(n_components=2)
    emb_transformed = pd.DataFrame(trans.fit_transform(X), index=affiliation.index)
    emb_transformed["label"] = y
else:
    emb_transformed = pd.DataFrame(X, index=affiliation.index)
    emb_transformed = emb_transformed.rename(columns={"0": 0, "1": 1})
    emb_transformed["label"] = y

#visualization of the distribution of embeddings
fig, ax = plt.subplots(figsize=(7, 7))
ax.scatter(
    emb_transformed[0],
    emb_transformed[1],
    c=emb_transformed["label"].cat.codes,
    cmap="jet",
    alpha=0.7,
)
ax.set(aspect="equal", xlabel="$X_1$", ylabel="$X_2$")
plt.title(
    "{} visualization of RGCN embeddings for AIFB dataset".format(transform.__name__)
)
plt.show()
