In [None]:
import numpy as np
from utils import preprocess_adj,plot_embeddings,load_data_v1

import tensorflow as tf
import networkx as nx
from tensorflow.python.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.layers import Lambda
from tensorflow.python.keras.models import  Model

In [4]:
from graphsage import sample_neighs,GraphSAGE

In [5]:
 A, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_v1(
        'cora')

Dataset has 2708 nodes, 5429 edges, 1433 features.


In [6]:
features

<2708x1433 sparse matrix of type '<class 'numpy.float32'>'
	with 3880564 stored elements in Compressed Sparse Row format>

In [7]:
#对列进行标准化
features /=features.sum(axis=1).reshape(-1,1)

In [8]:
# 将邻接矩阵转换成图结构
G= nx.from_scipy_sparse_matrix(A,create_using=nx.DiGraph())
G[1000]

AtlasView({569: {'weight': 1.0}, 931: {'weight': 1.0}, 1185: {'weight': 1.0}})

In [9]:
# 加上自连接  对邻接矩阵进行正则化 防止受值大的节点影大
A = preprocess_adj(A)

In [10]:
indexs = np.arange(A.shape[0])
neigh_number = [10,25]   #采用的邻居节点数
neigh_maxlen = []

model_input = [features,np.asarray(indexs,dtype=np.int)]

In [11]:
for num in neigh_number:
    sample_neigh ,sample_neigh_len = sample_neighs(
        G,indexs,num,self_loop=False)
    model_input.extend([sample_neigh])
    neigh_maxlen.append(max(sample_neigh_len))

In [11]:
model_input

[matrix([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([   0,    1,    2, ..., 2705, 2706, 2707]),
 array([[ 544,  544,  435, ...,   14,  544,   14],
        [ 344,  344,  344, ...,  344,  344,  344],
        [ 552,  410,  552, ...,  410,  471,  410],
        ...,
        [1840, 1839, 1840, ..., 1784, 2216, 1839],
        [1640, 1640, 1640, ..., 1046, 1752, 1752],
        [2344, 2344, 2344, ..., 1389, 1389, 1389]], dtype=int32),
 array([[ 544,    8,  258, ...,  258,   14,  544],
        [ 344,  344,  344, ...,  344,  344,  344],
        [ 410,  552,  552, ...,  552,  565,  552],
        ...,
        [1839, 2216, 2216, ..., 1784, 1839, 1839],
        [1640, 1138, 1138, ..., 1138, 1640, 1752],
        [2344,  774, 2344, ..., 2344,  774, 2344]], dtype=int32)]

In [12]:
neigh_maxlen

[10, 25]

In [21]:
model = GraphSAGE(feature_dim=features.shape[1],
                 neighbor_num=neigh_maxlen,
                 n_hidden=16,
                 n_classes=y_train.shape[1],
                 use_bias=True,
                 activation=tf.nn.relu,
                 aggregator_type='mean',
                 dropout_rate=0.5,l2_reg=2.5e-4)

model.compile('adam','categorical_crossentropy',
             weighted_metrics=['categorical_crossentropy','acc'])

val_data = (model_input,y_val,val_mask)

In [22]:
 mc_callback = ModelCheckpoint('./log/best_model.h5',
                                  monitor='val_weighted_categorical_crossentropy',
                                  save_best_only=True,
                                  save_weights_only=True)

In [23]:
from keras import backend as K
y_train = K.cast_to_floatx(y_train) 

In [None]:
print("start training")

model.fit(model_input, y_train, sample_weight=train_mask, validation_data=val_data,
              batch_size=A.shape[0], epochs=20, shuffle=False, verbose=2,
              callbacks=[mc_callback])

