In [33]:
import tensorflow as tf
import pandas as pd
import numpy as np
import stellargraph as sg
from sklearn.model_selection import train_test_split
from sklearn import preprocessing as pre
from stellargraph.layer.gcn import GraphConvolution, GatherIndices
import scipy.sparse as sp
from tensorflow.keras.layers import Input, Layer, Lambda, Dropout, Reshape, Dense
from tensorflow.keras import layers, optimizers, losses, metrics, Model
from stellargraph.mapper import FullBatchNodeGenerator
from stellargraph.layer import GCN
from tensorflow.keras.callbacks import EarlyStopping

In [34]:
# get the data into dataframes
edges = pd.read_csv("musae_facebook_edges.csv")
target = pd.read_csv("musae_facebook_target.csv")

In [35]:
edges

Unnamed: 0,id_1,id_2
0,0,18427
1,1,21708
2,1,22208
3,1,22171
4,1,6829
...,...,...
170997,20188,20188
170998,22340,22383
170999,22348,22348
171000,5563,5563


In [36]:
target

Unnamed: 0,id,facebook_id,page_name,page_type
0,0,145647315578475,The Voice of China 中国好声音,tvshow
1,1,191483281412,U.S. Consulate General Mumbai,government
2,2,144761358898518,ESET,company
3,3,568700043198473,Consulate General of Switzerland in Montreal,government
4,4,1408935539376139,Mark Bailey MP - Labor for Miller,politician
...,...,...,...,...
22465,22465,1379955382222841,Kurt Wiegel MdL,politician
22466,22466,1651527995097082,dubdub Stories,company
22467,22467,155369444540412,Ministerio del Interior - Paraguay,government
22468,22468,175067819212798,Tottus Perú,company


In [37]:
np_edges = np.load("edges.npy")
np_features = np.load("features.npy")
np_target = np.load("target.npy")
target.page_type.unique()

array(['tvshow', 'government', 'company', 'politician'], dtype=object)

In [38]:
print(np_edges.shape)
print(np_features.shape)
print(np_target.shape, np_target)
np_target

(342004, 2)
(22470, 128)
(22470,) [0 2 1 ... 2 1 0]


array([0, 2, 1, ..., 2, 1, 0])

In [39]:
df_features = pd.DataFrame(np_features)
df_edges = pd.DataFrame(np_edges)
df_targets = pd.DataFrame(np_target)
df_edges.columns = ["source", "target"]
df_targets.columns = ["target"]
mat = sg.StellarGraph(df_features, df_edges)
print (mat.info())

StellarGraph: Undirected multigraph
 Nodes: 22470, Edges: 342004

 Node types:
  default: [22470]
    Features: float32 vector, length 128
    Edge types: default-default->default

 Edge types:
    default-default->default: [342004]
        Weights: all 1 (default)
        Features: none


In [40]:
train_data, test_data = train_test_split(df_targets, train_size=500)
val_data, test_data = train_test_split(test_data, train_size=500)
print("train data: ",train_data.shape, "validation data: ",val_data.shape, 
      "test data: ",test_data.shape)
train_data.index

train data:  (500, 1) validation data:  (500, 1) test data:  (21470, 1)


Int64Index([ 7973, 19616,  7155, 16444,  6506, 19883, 11578,   795, 11862,
            18875,
            ...
            19284,  7568,  4011, 11839, 11663,  3112, 10311, 18025,  2779,
            10935],
           dtype='int64', length=500)

In [41]:
one_hot_target = pre.LabelBinarizer()
train_targets = one_hot_target.fit_transform(train_data['target'])
val_targets = one_hot_target.transform(val_data['target'])
test_targets = one_hot_target.transform(test_data['target'])

print (test_targets.shape, val_targets.shape, train_targets.shape)


(21470, 4) (500, 4) (500, 4)


In [47]:
generator = FullBatchNodeGenerator(mat, method="gcn")
train_gen = generator.flow(train_data.index, train_targets)
gcn = GCN(
    layer_sizes=[32, 32], activations=["relu", "relu"], generator=generator, dropout=0.2
)
x_inp, x_out = gcn.in_out_tensors()
predictions = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out)

Using GCN (local pooling) filters...


In [50]:
model = Model(inputs=x_inp, outputs=predictions)
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.01),
    loss=losses.categorical_crossentropy,
    metrics=["acc"],
)
val_gen = generator.flow(val_data.index, val_targets)

In [None]:
es_callback = EarlyStopping(monitor="val_acc", patience=50, restore_best_weights=True)
history = model.fit(
    train_gen,
    epochs=200,
    validation_data=val_gen,
    verbose=2,
    shuffle=False, 
    callbacks=[es_callback],
)

Epoch 1/200
1/1 - 1s - loss: 0.0640 - acc: 0.9780 - val_loss: 0.8085 - val_acc: 0.8440
Epoch 2/200
1/1 - 0s - loss: 0.0992 - acc: 0.9600 - val_loss: 0.8024 - val_acc: 0.8540
Epoch 3/200
1/1 - 0s - loss: 0.0620 - acc: 0.9840 - val_loss: 0.8026 - val_acc: 0.8600
Epoch 4/200
1/1 - 0s - loss: 0.0656 - acc: 0.9860 - val_loss: 0.8344 - val_acc: 0.8520
Epoch 5/200
1/1 - 0s - loss: 0.0833 - acc: 0.9740 - val_loss: 0.8315 - val_acc: 0.8540
Epoch 6/200
1/1 - 0s - loss: 0.0826 - acc: 0.9700 - val_loss: 0.8162 - val_acc: 0.8580
Epoch 7/200
1/1 - 0s - loss: 0.0582 - acc: 0.9840 - val_loss: 0.8191 - val_acc: 0.8520
Epoch 8/200
1/1 - 0s - loss: 0.0471 - acc: 0.9820 - val_loss: 0.8317 - val_acc: 0.8560
Epoch 9/200
1/1 - 0s - loss: 0.0398 - acc: 0.9880 - val_loss: 0.8360 - val_acc: 0.8480
Epoch 10/200
1/1 - 0s - loss: 0.0520 - acc: 0.9860 - val_loss: 0.8323 - val_acc: 0.8560
Epoch 11/200
1/1 - 0s - loss: 0.0579 - acc: 0.9780 - val_loss: 0.8269 - val_acc: 0.8560
Epoch 12/200
1/1 - 0s - loss: 0.0470 - ac

Epoch 95/200
1/1 - 0s - loss: 0.0221 - acc: 0.9940 - val_loss: 1.1961 - val_acc: 0.8620
Epoch 96/200
1/1 - 0s - loss: 0.0173 - acc: 0.9960 - val_loss: 1.1960 - val_acc: 0.8620
Epoch 97/200
1/1 - 0s - loss: 0.0459 - acc: 0.9920 - val_loss: 1.1932 - val_acc: 0.8620
Epoch 98/200
1/1 - 0s - loss: 0.0052 - acc: 1.0000 - val_loss: 1.1960 - val_acc: 0.8640
Epoch 99/200
1/1 - 0s - loss: 0.0210 - acc: 0.9940 - val_loss: 1.1991 - val_acc: 0.8600
Epoch 100/200
1/1 - 0s - loss: 0.0118 - acc: 0.9960 - val_loss: 1.2001 - val_acc: 0.8580
Epoch 101/200
1/1 - 0s - loss: 0.0148 - acc: 0.9940 - val_loss: 1.1995 - val_acc: 0.8680
Epoch 102/200
1/1 - 0s - loss: 0.0189 - acc: 0.9940 - val_loss: 1.2026 - val_acc: 0.8620
Epoch 103/200
1/1 - 0s - loss: 0.0217 - acc: 0.9940 - val_loss: 1.2031 - val_acc: 0.8660
Epoch 104/200
1/1 - 0s - loss: 0.0170 - acc: 0.9920 - val_loss: 1.2071 - val_acc: 0.8640
Epoch 105/200
1/1 - 0s - loss: 0.0059 - acc: 1.0000 - val_loss: 1.2123 - val_acc: 0.8620
Epoch 106/200
1/1 - 0s - l