# 使用Tensoflow2实现GAT

In [1]:
import os 
import numpy as np
import matplotlib.pyplot as plt
import scipy.sparse as sp
import networkx as nx

import tensorflow as tf
from tensorflow import keras
import warnings

In [17]:
tf.config.list_physical_devices('GPU')

[]

In [18]:
tf.test.is_gpu_available()

False

# 1. 数据导入

依旧使用Cora数据集，跟GCN的实现中一样，所有这里仅仅只是把GCN实现中的代码搬了过来

In [19]:
content_path = '../datasets/cora/cora.content'
cite_path = '../datasets/cora/cora.cites'
data_dir = '../datasets/cora'
os.path.exists(content_path), os.path.exists(cite_path)

(True, True)

In [5]:
with open(content_path, 'r') as f:
    contents = f.readlines()
with open(cite_path, 'r') as f:
    cites = f.readlines()
contents = np.array([l.strip().split('\t') for l in contents])
cites_raw = np.array([i.strip().split('\t') for i in cites])

In [6]:
papers_raw, features_raw, labels_raw = np.split(contents, [1, -1], axis=1)
features = features_raw.astype(np.float32)

paper_dict = {key:value for value, key in enumerate(np.squeeze(papers_raw))}
label_dict = {key:value for value, key in enumerate(np.unique(np.squeeze(labels_raw)))}

papers = np.array([[paper_dict[key]] for key in papers_raw.reshape(-1)])
labels = np.array([[label_dict[key]] for key in labels_raw.reshape(-1)])

cites = np.array([[paper_dict[i[0]], paper_dict[i[1]]] for i in cites_raw])
node_num = len(papers)
label_num = len(label_dict.keys())
feature_dim = features.shape[1]
edge_num = len(cites)

print('{:=^30}'.format('图数据信息'))
print('节点数量：', node_num)
print('边数量：', edge_num)
print('特征维数：', feature_dim)
print('标签类别数量：', label_num)
print('标签类别：')
for label in label_dict.keys():
    print('{: <5}- {:<}'.format('', label))

节点数量： 2708
边数量： 5429
特征维数： 1433
标签类别数量： 7
标签类别：
     - Case_Based
     - Genetic_Algorithms
     - Neural_Networks
     - Probabilistic_Methods
     - Reinforcement_Learning
     - Rule_Learning
     - Theory


In [39]:
label_dict

{'Case_Based': 0,
 'Genetic_Algorithms': 1,
 'Neural_Networks': 2,
 'Probabilistic_Methods': 3,
 'Reinforcement_Learning': 4,
 'Rule_Learning': 5,
 'Theory': 6}

In [38]:
features

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [40]:
# 为方便后续使用，将处理好的数据进行离线保存
data = {'papers': papers, 'labels': labels, 'cites': cites, 'features': features, 
       'paper_dict': paper_dict, 'label_dict': label_dict, 'node_num': node_num, 
       'edge_num': edge_num, 'label_num': label_num, 'feature_dim': feature_dim}

import pickle 
pickle.dump(data, open(os.path.join(data_dir, 'cora_data.pkl'), 'wb'))

In [60]:
papers.shape, labels.shape, cites.shape, features.shape

((2708, 1), (2708, 1), (5429, 2), (2708, 1433))

In [61]:
node_num, edge_num, label_num, feature_dim

(2708, 5429, 7, 1433)

# 2. 构造邻接矩阵

In [7]:
G = nx.Graph()
G.add_nodes_from(range(len(papers)))
G.add_edges_from(cites)
adj_matrix = nx.adjacency_matrix(G)

In [8]:
adj_matrix.shape, type(adj_matrix)

((2708, 2708), scipy.sparse.csr.csr_matrix)

In [65]:
# 将稀疏矩阵转为普通矩阵，并引入自环边，构造带自环的邻接矩阵
A = adj_matrix.toarray()
A = A + np.eye(A.shape[0])
A

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [9]:
print(f'节点数：{node_num}')

节点数：2708


# 3. 数据集分割

数据集分割方式：
- 训练集：**每个类别中取出20个**作为训练集（共140）
- 验证集：剩下数据集中取出**500个**
- 测试集：剩下数据集中取出**1000个**

由于计算时是需要所有的图数据和特征，所以数据集分割采用的是mask数组，**只在算损失和准确率时使用**

In [68]:
labels_ = labels.ravel()

array([2, 5, 4, ..., 1, 0, 2])

In [None]:
train_mask = np.zeros(node_num, dtype=np.uint8)
val_mask = np.zeros(node_num, dtype=np.uint8)
test_mask = np.zeros(node_num, dtype=np.uint8)
pivot1, pivot2 = int(0.6 * node_num), int(0.8 * node_num)
train_mask[:pivot1] = 1
val_mask[pivot1:pivot2] = 1
test_mask[pivot2:] = 1
train_mask = tf.cast(train_mask, tf.bool)
val_mask = tf.cast(val_mask, tf.bool)
test_mask = tf.cast(test_mask, tf.bool)

In [48]:
labels_ = np.squeeze(labels)

In [56]:
labels_.shape

(2708,)

In [58]:
np.arange(len(labels_))[labels_==1]

array([  11,   22,   38,   42,   53,   55,   65,  100,  120,  129,  139,
        141,  145,  163,  168,  188,  189,  191,  209,  219,  228,  237,
        240,  243,  265,  273,  290,  309,  329,  343,  357,  361,  372,
        380,  390,  395,  402,  403,  415,  422,  448,  475,  493,  523,
        530,  537,  538,  546,  602,  606,  658,  681,  689,  712,  714,
        717,  721,  727,  728,  743,  744,  755,  757,  758,  764,  765,
        769,  780,  781,  789,  793,  800,  803,  811,  813,  815,  833,
        844,  846,  854,  856,  860,  880,  900,  902,  910,  934,  935,
        938,  940,  941,  942,  943,  953,  956,  958,  959,  961,  964,
        965,  966,  970,  972,  974,  982,  995, 1009, 1016, 1018, 1030,
       1034, 1035, 1036, 1054, 1060, 1063, 1065, 1069, 1070, 1077, 1096,
       1098, 1099, 1106, 1110, 1113, 1114, 1117, 1127, 1130, 1131, 1134,
       1136, 1139, 1140, 1142, 1143, 1153, 1159, 1167, 1175, 1178, 1184,
       1204, 1205, 1206, 1207, 1216, 1218, 1219, 12

GAT实验设置：

- 参数初始化：Glorot
- 优化算法：Adam
- 学习率：0.005
- 提早停止：True
- epochs：100
- 层数：2
- 第一层：8heads，$F\prime = 8$，64个输出特征，ELU作激活函数
- 第二层：1heads，$F\prime = classes$，$classes$个输出特征，softmax激活函数
- 正则化：L2，$\lambda = 0.0005$
- Dropout：0.6，用在每层输入和**规范化注意力系数**

GCN实验设置：

计算公式：$$Z = softmax(\hat A ReLU(\hat A X W^{(0)})W^{(1)}))$$

- Dropout：0.5，所有层
- 正则化：L2，$\lambda = 0.0005$，第一层
- 隐层数：1
- 隐层神经元数：16

# 2. 模型构建

In [None]:
class GAT_conv(keras.layers.Layer):
    def __init__(self, units, A_hat, node_num, activation, dropout_rate=0.0, activation=None,
                 use_bias=Ture, kernel_initializer='glorot_uniform',
                 bias_initializer='zeros', coef_dropout=0.0, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.use_bias = bias
        self.activation = keras.activations.get(activation)
        
    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], self.units),
                                initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(self.units, ),
                                initializer='random_normal', trainable=True)
        
    def call(self, inputs, A_hat):
        return self.activation(A_hat @ inputs @ self.W + self.b)

In [None]:
def masked_sparse_cross_entropy(preds, labels, mask):
    """预测结果是概率形式，标签是正确类型，计算"""
    loss = - tf.math.log(tf.clip_by_value(preds[mask], 1e-7, 1)) * tf.one_hot(labels[mask].ravel(), 7)
    loss = tf.reduce_sum(loss, axis=1)
    loss = tf.reduce_mean(loss)
    '''
    或者
    keras.losses.SparseCategoricalCrossentropy(from_logits=False)(labels[mask], preds[mask])
    '''
    return loss

def masked_accuracy(preds, labels, mask):
    """Accuracy with masking."""
    correct = tf.equal(tf.argmax(preds[mask], axis=1), labels[mask].ravel())
    correct = tf.cast(correct, tf.float32)
    return tf.reduce_mean(correct)