In [33]:
import tensorflow as tf
import pandas as pd
import numpy as np
import stellargraph as sg
from sklearn.model_selection import train_test_split
from sklearn import preprocessing as pre
from stellargraph.layer.gcn import GraphConvolution, GatherIndices
import scipy.sparse as sp
from tensorflow.keras.layers import Input, Layer, Lambda, Dropout, Reshape, Dense
from tensorflow.keras import layers, optimizers, losses, metrics, Model
from stellargraph.mapper import FullBatchNodeGenerator
from stellargraph.layer import GCN
from tensorflow.keras.callbacks import EarlyStopping

In [34]:
# get the data into dataframes
edges = pd.read_csv("musae_facebook_edges.csv")
target = pd.read_csv("musae_facebook_target.csv")

In [35]:
edges

Unnamed: 0,id_1,id_2
0,0,18427
1,1,21708
2,1,22208
3,1,22171
4,1,6829
...,...,...
170997,20188,20188
170998,22340,22383
170999,22348,22348
171000,5563,5563


In [36]:
target

Unnamed: 0,id,facebook_id,page_name,page_type
0,0,145647315578475,The Voice of China 中国好声音,tvshow
1,1,191483281412,U.S. Consulate General Mumbai,government
2,2,144761358898518,ESET,company
3,3,568700043198473,Consulate General of Switzerland in Montreal,government
4,4,1408935539376139,Mark Bailey MP - Labor for Miller,politician
...,...,...,...,...
22465,22465,1379955382222841,Kurt Wiegel MdL,politician
22466,22466,1651527995097082,dubdub Stories,company
22467,22467,155369444540412,Ministerio del Interior - Paraguay,government
22468,22468,175067819212798,Tottus Perú,company


In [37]:
np_edges = np.load("edges.npy")
np_features = np.load("features.npy")
np_target = np.load("target.npy")
target.page_type.unique()

array(['tvshow', 'government', 'company', 'politician'], dtype=object)

In [38]:
print(np_edges.shape)
print(np_features.shape)
print(np_target.shape, np_target)
np_target

(342004, 2)
(22470, 128)
(22470,) [0 2 1 ... 2 1 0]


array([0, 2, 1, ..., 2, 1, 0])

In [39]:
df_features = pd.DataFrame(np_features)
df_edges = pd.DataFrame(np_edges)
df_targets = pd.DataFrame(np_target)
df_edges.columns = ["source", "target"]
df_targets.columns = ["target"]
mat = sg.StellarGraph(df_features, df_edges)
print (mat.info())

StellarGraph: Undirected multigraph
 Nodes: 22470, Edges: 342004

 Node types:
  default: [22470]
    Features: float32 vector, length 128
    Edge types: default-default->default

 Edge types:
    default-default->default: [342004]
        Weights: all 1 (default)
        Features: none


In [40]:
train_data, test_data = train_test_split(df_targets, train_size=500)
val_data, test_data = train_test_split(test_data, train_size=500)
print("train data: ",train_data.shape, "validation data: ",val_data.shape, 
      "test data: ",test_data.shape)
train_data.index

train data:  (500, 1) validation data:  (500, 1) test data:  (21470, 1)


Int64Index([ 7973, 19616,  7155, 16444,  6506, 19883, 11578,   795, 11862,
            18875,
            ...
            19284,  7568,  4011, 11839, 11663,  3112, 10311, 18025,  2779,
            10935],
           dtype='int64', length=500)

In [41]:
one_hot_target = pre.LabelBinarizer()
train_targets = one_hot_target.fit_transform(train_data['target'])
val_targets = one_hot_target.transform(val_data['target'])
test_targets = one_hot_target.transform(test_data['target'])

print (test_targets.shape, val_targets.shape, train_targets.shape)


(21470, 4) (500, 4) (500, 4)


In [42]:
model = Model(inputs=x_inp, outputs=predictions)
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.01),
    loss=losses.categorical_crossentropy,
    metrics=["acc"],
)
val_gen = generator.flow(val_data.index, val_targets)

In [43]:
es_callback = EarlyStopping(monitor="val_acc", patience=50, restore_best_weights=True)
history = model.fit(
    train_gen,
    epochs=200,
    validation_data=val_gen,
    verbose=2,
    shuffle=False, 
    callbacks=[es_callback],
)

Epoch 1/200
1/1 - 1s - loss: 1.0759 - acc: 0.5260 - val_loss: 1.5653 - val_acc: 0.2960
Epoch 2/200
1/1 - 0s - loss: 1.0875 - acc: 0.5080 - val_loss: 1.5794 - val_acc: 0.2780
Epoch 3/200
1/1 - 0s - loss: 1.0758 - acc: 0.5020 - val_loss: 1.5792 - val_acc: 0.2880
Epoch 4/200
1/1 - 0s - loss: 1.0473 - acc: 0.5480 - val_loss: 1.5858 - val_acc: 0.2980
Epoch 5/200
1/1 - 0s - loss: 1.0685 - acc: 0.5240 - val_loss: 1.5872 - val_acc: 0.2880
Epoch 6/200
1/1 - 0s - loss: 1.0580 - acc: 0.5540 - val_loss: 1.5856 - val_acc: 0.3000
Epoch 7/200
1/1 - 0s - loss: 1.0500 - acc: 0.5380 - val_loss: 1.5808 - val_acc: 0.3040
Epoch 8/200
1/1 - 0s - loss: 1.0606 - acc: 0.5160 - val_loss: 1.5828 - val_acc: 0.2980
Epoch 9/200
1/1 - 0s - loss: 1.0425 - acc: 0.5440 - val_loss: 1.5863 - val_acc: 0.2960
Epoch 10/200
1/1 - 0s - loss: 1.0857 - acc: 0.5020 - val_loss: 1.5950 - val_acc: 0.2980
Epoch 11/200
1/1 - 0s - loss: 1.0593 - acc: 0.5420 - val_loss: 1.6075 - val_acc: 0.3080
Epoch 12/200
1/1 - 0s - loss: 1.0741 - ac