In [70]:
import tensorflow as tf
import pandas as pd
import numpy as np
import stellargraph as sg
from sklearn.model_selection import train_test_split
from sklearn import preprocessing as pre
from stellargraph.layer.gcn import GraphConvolution, GatherIndices
import scipy.sparse as sp
from tensorflow.keras.layers import Input, Layer, Lambda, Dropout, Reshape, Dense
from tensorflow.keras import layers, optimizers, losses, metrics, Model

In [3]:
# get the data into dataframes
edges = pd.read_csv("musae_facebook_edges.csv")
target = pd.read_csv("musae_facebook_target.csv")

In [5]:
edges

Unnamed: 0,id_1,id_2
0,0,18427
1,1,21708
2,1,22208
3,1,22171
4,1,6829
...,...,...
170997,20188,20188
170998,22340,22383
170999,22348,22348
171000,5563,5563


In [6]:
target

Unnamed: 0,id,facebook_id,page_name,page_type
0,0,145647315578475,The Voice of China 中国好声音,tvshow
1,1,191483281412,U.S. Consulate General Mumbai,government
2,2,144761358898518,ESET,company
3,3,568700043198473,Consulate General of Switzerland in Montreal,government
4,4,1408935539376139,Mark Bailey MP - Labor for Miller,politician
...,...,...,...,...
22465,22465,1379955382222841,Kurt Wiegel MdL,politician
22466,22466,1651527995097082,dubdub Stories,company
22467,22467,155369444540412,Ministerio del Interior - Paraguay,government
22468,22468,175067819212798,Tottus Perú,company


In [9]:
np_edges = np.load("edges.npy")
np_features = np.load("features.npy")
np_target = np.load("target.npy")

In [23]:
print(np_edges.shape)
print(np_features.shape)
print(np_target.shape)

(342004, 2)
(22470, 128)
(22470,)


In [52]:
df_features = pd.DataFrame(np_features)
df_edges = pd.DataFrame(np_edges)
df_targets = pd.DataFrame(np_target)
df_edges.columns = ["source", "target"]
df_targets.columns = ["target"]
mat = sg.StellarGraph(df_features, df_edges)
print (mat.info())

StellarGraph: Undirected multigraph
 Nodes: 22470, Edges: 342004

 Node types:
  default: [22470]
    Features: float32 vector, length 128
    Edge types: default-default->default

 Edge types:
    default-default->default: [342004]
        Weights: all 1 (default)
        Features: none


In [56]:
train_data, test_data = train_test_split(df_targets, train_size=500)
val_data, test_data = train_test_split(test_data, train_size=500)
print("train data: ",train_data.shape, "validation data: ",val_data.shape, 
      "test data: ",test_data.shape)

val_data

train data:  (500, 1) validation data:  (500, 1) test data:  (21470, 1)


Unnamed: 0,target
7939,2
14367,2
16212,2
5683,2
8640,2
...,...
9362,1
15824,2
20175,2
15060,3


In [57]:
one_hot_target = pre.LabelBinarizer()
train_targets = one_hot_target.fit_transform(train_data['target'])
val_targets = one_hot_target.transform(val_data['target'])
test_targets = one_hot_target.transform(test_data['target'])

#df_targets
#df_edges
#df_features


Unnamed: 0,target
0,0
1,2
2,1
3,2
4,3
...,...
22465,3
22466,1
22467,2
22468,1


In [59]:

A = mat.to_adjacency_matrix(weighted=False)
A_t = A + sp.diags(np.ones(A.shape[0]) - A.diagonal())
D_t = sp.diags(np.power(np.array(A.sum(1)), -0.5).flatten(), 0)
A_norm = A.dot(D_t).transpose().dot(D_t).todense()

In [60]:
def get_node_indices(G, ids):
    node_ids = np.asarray(ids)
    flat_node_ids = node_ids.reshape(-1)

    flat_node_indices = G.node_ids_to_ilocs(flat_node_ids)
    node_indices = flat_node_indices.reshape(1, len(node_ids))
    return node_indices

train_indices = get_node_indices(mat, train_data.index)
val_indices = get_node_indices(mat, val_data.index)
test_indices = get_node_indices(mat, test_data.index)

In [62]:
features_input = np.expand_dims(np_features, 0)
A_input = np.expand_dims(A_norm, 0)

y_train = np.expand_dims(train_targets, 0)
y_val = np.expand_dims(val_targets, 0)
y_test = np.expand_dims(test_targets, 0)

In [64]:
kernel_initializer="glorot_uniform"
bias = True
bias_initializer="zeros"
n_layers = 2
layer_sizes = [32, 32]
dropout = 0.5
n_features = features_input.shape[2]
n_nodes = features_input.shape[1]

In [67]:
x_features = Input(batch_shape=(1, n_nodes, n_features))
x_indices = Input(batch_shape=(1, None), dtype="int32")
x_adjacency = Input(batch_shape=(1, n_nodes, n_nodes))
x_inp = [x_features, x_indices, x_adjacency]

In [68]:


x = Dropout(0.5)(x_features)
x = GraphConvolution(32, activation='relu', 
                     use_bias=True,
                     kernel_initializer=kernel_initializer,
                     bias_initializer=bias_initializer)([x, x_adjacency])
x = Dropout(0.5)(x)
x = GraphConvolution(32, activation='relu', 
                     use_bias=True,
                     kernel_initializer=kernel_initializer,
                     bias_initializer=bias_initializer)([x, x_adjacency])

x = GatherIndices(batch_dims=1)([x, x_indices])
output = Dense(1, activation='sigmoid')(x)



In [74]:

model = Model(inputs=[x_features, x_indices, x_adjacency], outputs=output)
model.compile(
    optimizer=optimizers.Adam(lr=0.01),
    loss=losses.binary_crossentropy,
    metrics=["acc"],
)

In [78]:

print( features_input.shape, val_indices.shape, A_input.shape)
history = model.fit(
    x = [features_input, train_indices, A_input],
    y = y_train,
    batch_size = 32,
    epochs=200,
    validation_data=([features_input, val_indices, A_input], y_val),
    verbose=1,
    shuffle=False
)



(1, 22470, 128) (1, 500) (1, 22470, 22470)
Epoch 1/200


ValueError: in user code:

    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/engine/training.py:788 train_step
        loss = self.compiled_loss(
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/engine/compile_utils.py:201 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/losses.py:141 __call__
        losses = call_fn(y_true, y_pred)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/losses.py:245 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/losses.py:1809 binary_crossentropy
        backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/keras/backend.py:5000 binary_crossentropy
        return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/nn_impl.py:245 sigmoid_cross_entropy_with_logits_v2
        return sigmoid_cross_entropy_with_logits(
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/pentaflouride/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/nn_impl.py:132 sigmoid_cross_entropy_with_logits
        raise ValueError("logits and labels must have the same shape (%s vs %s)" %

    ValueError: logits and labels must have the same shape ((1, 500, 1) vs (None, 500, 4))
