# 用Pytorch实现GCN

In [212]:
import os 
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import scipy.sparse as sp
import networkx as nx

import torch
import torch.nn.functional as F
import tensorflow as tf
from tensorflow import keras

## 1. 导入数据

采用Cora数据集。该数据集是一个论文图，共2708个节点，每个节点都是一篇论文，所有样本点被分为7类别：

1. Case_Based
2. Genetic_Algorithms
3. Neural_Networks
4. Probabilistic_Methods
5. Reinforcement_Learning
6. Rule_Learning
7. Theory

每篇论文都由一个1433维的词向量表示，即节点特征维度为1433。词向量的每个特征都对应一个词，取0表示该特征对应的词不在论文中，取1则表示在论文中。每篇论文都至少引用了一篇其他论文，或者被其他论文引用，这是一个连通图，不存在孤立点。

In [18]:
content_path = '../datasets/cora/cora.content'
cite_path = '../datasets/cora/cora.cites'

In [19]:
os.path.exists(content_path), os.path.exists(cite_path)

(True, True)

原始数据分别存于变量`contents`, `cites`

In [47]:
with open(content_path, 'r') as f:
    contents = f.readlines()
with open(cite_path, 'r') as f:
    cites = f.readlines()
contents = np.array([l.strip().split('\t') for l in contents])
cites_raw = np.array([i.strip().split('\t') for i in cites])

In [29]:
contents, cites

(array([['31336', '0', '0', ..., '0', '0', 'Neural_Networks'],
        ['1061127', '0', '0', ..., '0', '0', 'Rule_Learning'],
        ['1106406', '0', '0', ..., '0', '0', 'Reinforcement_Learning'],
        ...,
        ['1128978', '0', '0', ..., '0', '0', 'Genetic_Algorithms'],
        ['117328', '0', '0', ..., '0', '0', 'Case_Based'],
        ['24043', '0', '0', ..., '0', '0', 'Neural_Networks']],
       dtype='<U22'),
 array([['35', '1033'],
        ['35', '103482'],
        ['35', '103515'],
        ...,
        ['853118', '1140289'],
        ['853155', '853118'],
        ['954315', '1155073']], dtype='<U7'))

## 2. 数据预处理

对原始数据进行预处理，提取有效信息

In [158]:
# 分割数据，第二个参数是分割点，也可以使用np.hsplit，就不用指定axis
papers_raw, features, labels_raw = np.split(contents, [1, -1], axis=1)

In [159]:
papers_raw.shape, features.shape, labels_raw.shape

((2708, 1), (2708, 1433), (2708, 1))

建立一个**映射字典**，值为自然序数，键为论文代号

同样建立一个字典，值为自然序数，键为论文类别

In [160]:
paper_dict = {key:value for value, key in enumerate(np.squeeze(papers_raw))}
label_dict = {key:value for value, key in enumerate(np.unique(np.squeeze(labels_raw)))}

将论文和论文类别通过字典进行映射，同时将引用中的论文也通过字典进行映射

In [170]:
papers = np.array([[paper_dict[key]] for key in papers_raw.reshape(-1)])
labels = np.array([[label_dict[key]] for key in labels_raw.reshape(-1)])

In [213]:
cites = np.array([[paper_dict[i[0]], paper_dict[i[1]]] for i in cites_raw])

引用数据为单向的，为了构建**双向边**，将引用数据反转一次边后连接起来，此时边数变为了原来的两倍

In [77]:
# cites = np.concatenate((cites, cites[:, ::-1]), axis=0)

In [214]:
node_num = len(papers)
label_num = len(label_dict.keys())
feature_dim = features.shape[1]
edge_num = len(cites)

In [215]:
print('{:=^30}'.format('数据信息'))
print('节点数量：', node_num)
print('边数量：', edge_num)
print('特征维数：', feature_dim)
print('标签种类数量：', label_num)
print('标签种类：')
for label in label_dict.keys():
    print('{: <5}- {:<}'.format('', label))

节点数量： 2708
边数量： 5429
特征维数： 1433
标签种类数量： 7
标签种类：
     - Case_Based
     - Genetic_Algorithms
     - Neural_Networks
     - Probabilistic_Methods
     - Reinforcement_Learning
     - Rule_Learning
     - Theory


通过networx构建无向图，并构建**邻接矩阵**。

In [220]:
G = nx.Graph()
G.add_nodes_from(range(len(papers)))
G.add_edges_from(cites)
adj_matrix = nx.adjacency_matrix(G)

In [227]:
adj_matrix.shape

(2708, 2708)

In [237]:
np.array(adj_matrix.sum(axis=0)).flatten()

array([5, 1, 4, ..., 4, 4, 3], dtype=int64)

邻接矩阵**正规化函数**，返回值对应论文中的：
$$
\hat A = D^{-1/2}(A+I)D^{-1/2}
$$

In [285]:
def sparse_to_tuple(sparse_mx):
    """
    Convert sparse matrix to tuple representation.
    """
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx


def normalize_adjacent(adj):
    adj += sp.eye(adj.shape[0])
    degree = np.array(adj.sum(axis=1))
    d_hat = sp.diags(np.power(degree, -0.5).flatten())
    norm_adj = d_hat @ adj @ d_hat
    return norm_adj


def preprocess_adj(adj):
    adj_normalized = normalize_adjacent(adj + sp.eye(adj.shape[0]))
    return sparse_to_tuple(adj_normalized)

In [260]:
A_hat = normalize_adjacent(adj_matrix)

## 3. 数据分割

按8：2进行数据分割，分为训练集和测试集

In [283]:
train_mask = np.zeros(node_num, dtype=np.uint8)
val_mask = np.zeros(node_num, dtype=np.uint8)
test_mask = np.zeros(node_num, dtype=np.uint8)
pivot = int(node_num * 0.6)
train_mask[:pivot] = 1
pivot1 = int(node_num * 0.8)
val_mask[pivot: pivot1] = 1 
test_mask[pivot1: ] = 1

In [151]:
train_mask.sum(), test_mask.sum()

(tensor(2166), tensor(542))

In [287]:
train_mask = tf.convert_to_tensor(train_mask)
val_mask = tf.convert_to_tensor(val_mask)
test_mask = tf.convert_to_tensor(test_mask)
# features = tf.SparseTensor(sparse_to_tuple(features))
A_hat = tf.SparseTensor(sparse_to_tuple(A_hat))

TypeError: __init__() missing 2 required positional arguments: 'values' and 'dense_shape'

## 4. 模型层

计算：
状态转移方程
$$
H^n = \sigma(\hat A H^{n-1} W^{n-1})
$$
其中$\hat A$为正规化邻接矩阵，计算如下：
$$
\hat A = D^{-1/2}(A+I)D^{-1/2}
$$

In [243]:
tf.test.is_gpu_available(), tf.config.list_physical_devices('GPU')

(True,
 [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
  PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')])

In [278]:
class GCNConv(keras.layers.Layer):
    def __init__(self, input_dim, output_dim, placeholders, dropout=False, 
                 activation=None, with_bias=False, **kwargs):
        super().__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.activation = activation
        self.A_hat = placeholders['A_hat']
        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0
        self.with_bias = with_bias
    
    def build(self):
        self.kernel = self.add_weight(
            name='kernel', shape=[self.input_dim, self.output_dim], 
            initializer=self.kernel_initializer)
        if self.with_bias:
            self.bias = self.add_weight(
                name='bias', shape=[self.output_dim], initializer='zeros')
    
    def call(self, X):
        output = self.A_hat @ X @ self.kernel
        if self.with_bias:
            output += self.bias
        return self.activation(output)

In [272]:
def masked_softmax_cross_entropy(preds, labels, mask):
    """Softmax cross-entropy loss with masking."""
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)

def masked_accuracy(preds, labels, mask):
    """Accuracy with masking."""
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)

In [292]:
class GCN(keras.Model):
    def __init__(self, placeholders, **kwargs):
        super().__init__(**kwargs)
        self.inputs = placeholders['features']
        self.input_dim = placeholders['input_dim']
        self.hidden_dim = placeholders['hidden_dim']
        self.output_dim = placeholders['output_dim']
        self.placeholders = placeholders
        
        self.slayers = []
        self.slayers.append(GCNConv(self.input_dim, self.hidden_dim, placeholders,
                                   activation=tf.nn.relu))
        self.slayers.append(GCNConv(self.hidden_dim, self.output_dim, placeholders,
                                  activation=lambda x:x))
        
    def call(self, inputs, training=True):
        x, label, mask = inputs
        output = x
        for layer in self.slayers:
            output = layer(output)
#         loss = masked_softmax_cross_entropy(output[-1], self.placeholders['labels'],
#                                            self.placeholders['labels_mask'])
#         self.add_loss(loss)
        loss += masked_softmax_cross_entropy(output, label, mask)

        acc = masked_accuracy(output, label, mask)
        return loss, acc
    
#     def predict(self):
#         return tf.nn.softmax(self.outputs)
#     def compute_output_shape(self, input_shape):
#         shape = tf.TensorShape(input_shape).as_list()
#         shape[-1] = self.num_

In [279]:
placeholders = {
    'A_hat': A_hat,
    'features': features,
    'input_dim': feature_dim,
    'hidden_dim': 32,
    'output_dim': label_num,
}
model = GCN(placeholders)

In [293]:
optimizer = keras.optimizers.Adam(lr=1e-2)
for epoch in range(200):

    with tf.GradientTape() as tape:
        loss, acc = model(features)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    _, val_acc = model(features, training=False)


    if epoch % 20 == 0:

        print(epoch, float(loss), float(acc), '\tval:', float(val_acc))

TypeError: build() takes 1 positional argument but 2 were given

In [None]:
model.fit(features, labels, validation_data=())

In [None]:
class GCNmodel(keras.Model):
    def __init__(self, input_dim, hidden_dim, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.input = keras.layers.Input(shape=(input_dim,))
        self.conv1 = GCNConv(hidden_dim, activation='relu')
        self.conv2 = GCNConv(output_dim, activation='softmax')
    
    def call(self, inputs):
        Z = self.conv1(inputs)
        Z = self.conv2(Z)
        return Z

## 5. 模型训练和评测