### HAN网络代码理解(使用DGL)

In [4]:
!pip install dgl-cu101 # install deep graph library



In [0]:
import torch
import dgl
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GATConv   # call dgl library
import os

###**Question**:Does GAT layer is the same as part of node-level attention of HAN?
***
**------------------node-level attention in HAN-------------------**

**01 Type specific transformation of node feature**:

$\mathbf{h}_{i}^{\prime}=\mathbf{M}_{\phi_{i}} \cdot \mathbf{h}_{i}$

**02 weight coefficient of node:**

$\alpha_{i j}^{\Phi}=\operatorname{softmax}_{j}\left(e_{i j}^{\Phi}\right)=\frac{\exp \left(\sigma\left(\mathbf{a}_{\Phi}^{\mathrm{T}} \cdot\left[\mathbf{h}_{i}^{\prime} \| \mathbf{h}_{j}^{\prime}\right]\right)\right)}{\sum_{k \in \mathcal{N}_{i}^{\oplus}} \exp \left(\sigma\left(\mathbf{a}_{\Phi}^{\mathrm{T}} \cdot\left[\mathbf{h}_{i}^{\prime} \| \mathbf{h}_{k}^{\prime}\right]\right)\right)}$

**03 the meta-path based embedding of node**

$\mathbf{z}_{i}^{\Phi}=\sigma\left(\sum_{j \in \mathcal{N}_{i}^{\phi}} \alpha_{i j}^{\Phi} \cdot \mathbf{h}_{j}^{\prime}\right)$

**04 concatenate the learned embeddings as the semantic-specific embedding**

$\mathbf{z}_{i}^{\Phi}=\prod_{k=1}^{K} \sigma\left(\sum_{j \in \mathcal{N}_{i}^{\phi}} \alpha_{i j}^{\Phi} \cdot \mathbf{h}_{j}^{\prime}\right)$
***
**总结:异质图通过元路径转化为多张图，假设异质图中定义了3种元路径，则产生3张图，对于3张图，每个图的特征提取过程都如上所示，使用了self-attention,masked-attention,multi-head attention。**
***
注意：从GAT论文与HAN论文的描述来看，以上四个步骤与GAT论文中node-level attention的描述是一致的，**区别在于HAN的将其应用到通过元路径产生的多张图，作为node-level attention，这就解释了dgl库中直接将GATConv进行封装**。


### semantic-level attention
***
**importance of each meta-path**:
$w_{\Phi_{i}}=\frac{1}{|\mathcal{V}|} \sum_{i \in \mathcal{V}} \mathbf{q}^{\mathrm{T}} \cdot \tanh \left(\mathbf{W} \cdot \mathbf{z}_{i}^{\Phi}+\mathbf{b}\right)$

$W$ is the weight matrix, $b$ is the bias vector, $q$ is the semantic- level attention vector
***



In [0]:
# learn the importance of each meta-path and assign proper weights to them
# z:(node_num,num_meta_paths,in_size)
class SemanticAttention(nn.Module):
    def __init__(self, in_size, hidden_size=128):
        super(SemanticAttention, self).__init__()

        self.project = nn.Sequential(
            nn.Linear(in_size, hidden_size), # (node_num,num_meta_paths,hidden_size)
            nn.Tanh(),
            nn.Linear(hidden_size, 1, bias=False) #(node_num,num_meta_paths,1) 
            # here, use linear transformation as dot q and average
        )
# (node_num,num_meta_paths,1)*(node_num,num_meta_paths,in_size)==(node_num,num_meta_paths,in_size)
    def forward(self, z):
        w = self.project(z) # w:(node_num,num_meta_paths,1)
        print(w.shape)
        beta = torch.softmax(w, dim=1) # beta:(node_num,num_meta_paths,1)
        # beta*z:(node_num,num_meta_paths,in_size),this is element-wise product(with broadcast)
        return (beta * z).sum(1)  # 这里tensor相乘用到了广播机制，最终效果就是实现加权求和

In [17]:
test_z = torch.randn(100,3,256)
test_layer = SemanticAttention(256,128)
out = test_layer(test_z)
print(out.shape)

torch.Size([100, 3, 1])
torch.Size([100, 256])


### HANLayer
异质图中定义了多少元路径，就需要多少GAT_layer并行处理图来node-level attention。

dgl中GATConv比还有其他参数，可以看官方手册。

In [0]:
class HANLayer(nn.Module):
  def __init__(self,num_meta_paths,in_size,out_size,num_heads,dropout):
    super(HANLayer,self).__init__()
    self.gat_layers=nn.ModuleList()
    # node-level attention
    for tmp in range(num_meta_paths):
      self.gat_layers.append(GATConv(in_size,out_size,num_heads,
                    dropout,dropout,activation=F.elu))
    # semantic-level attention
    self.semantic_attention = SemanticAttention(num_heads*out_size)
    self.num_meta_paths = num_meta_paths
  def forward(self,graph_list,node_feature):
    semantic_embeddings=[]
    for i,graph in enumerate(graph_list): 
      # 这里由于利用了dgl官方的函数,所以必须进行使用官方的图类型graph
      semantic_embeddings.append(self.gat_layers[i](graph,node_feature).flatten(1))
    # [N,H,D_out]--------flatten------->[N,H*D_out]
    semantic_embedding=torch.stack(semantic_embeddings,dim=1)
    # [N,num_meat_paths,H*D_out]
    return self.semantic_attention(semantic_embedding) # [N,H*D_out]

In [0]:
from scipy.sparse import coo_matrix
import networkx as nx

In [28]:
# coo_matrix:(row,column,value)
# -----------------way to creat graph-----------------------------
spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(10, 10))
# print(spmat.todense()) # todense()可以看到所创建的矩阵
test_graph1 = dgl.graph(spmat)
test_graph2 = dgl.graph(spmat)
features=torch.randn(10,100)
in_size=100
out_size=8
num_heads=8
dropout=0.6
test_model_1 = GATConv(in_size,out_size,num_heads,dropout,dropout,activation=F.elu)
out = test_model_1(test_graph1,features)

AttributeError: ignored

In [0]:
# 这里其实将看是否需要叠加多层HAN
class HAN(nn.Module):
  # out_size: number of node class 
  def __init__(self,num_meta_paths,in_size,hidden_size,out_size,num_head_list,dropout):
    super(HAN,self).__init__()
    self.layers = nn.ModuleList()
    self.layers.append(HANLayer(num_meta_paths,in_size,hidden_size,num_head_list[0],dropout))
    # 第一层的原始特征要单独拿出来
    for i in range(1,len(num_head_list)):
      self.layers.append(HANLayer(num_meta_paths,hidden_size*num_head_list[i],
                    hidden_size,num_head_list[i],dropout))
    self.predict = nn.Linear(hidden_size*num_head_list[-1],out_size) # x线性变换用于分类 
  def forward(self,graph_list,h):
    for layer in self.layers:
      h = layer(graph_list,h)
    return self.predict(h)


### 加载ACM数据集(dgl版本)

In [0]:
import pickle
import dgl
from scipy import sparse # 这个常用于存储矩阵
from scipy import io as sio

In [0]:
# 下载dgl提供的HAN的ACM数据集
# dgl ACM dataset:https://data.dgl.ai/dataset/ACM3025.pkl
import urllib
data_path = '/content/drive/My Drive/GNN/data/ACM_HAN/'
url = 'https://data.dgl.ai/dataset/ACM3025.pkl'
urllib.request.urlretrieve(url, data_path+"ACM3025.pkl")
print("finish downloading")

finish downloading


In [0]:
# 这里的self-loop是指每个结点与自己本身是否有边相连
# 该数据加载中调用了dgl libray用于建立使用元路径定义的同质图

def get_binary_mask(total_size, indices):
    mask = torch.zeros(total_size)
    mask[indices] = 1
    return mask.byte() # convert to torch.uint8

def load_acm(remove_self_loop,data_path):
    with open(data_path, 'rb') as f:
        data = pickle.load(f)  # type(data): class dict
    # type(data['label']): <class 'scipy.sparse.csr.csr_matrix'>
    # crs: Compressed Sparse Row matrix
    # todense(): Return a dense matrix representation of this matrix.
    # todense():将采用特殊格式存储的数据转化为numpy数组即（dense matrix）
    # 获取结点标签以及特征
    labels, features = torch.from_numpy(data['label'].todense()).long(), \
                       torch.from_numpy(data['feature'].todense()).float()
    # print(labels.shape)       # torch.Size([3025, 3])
    # print(features.shape)      # torch.Size([3025, 1870])
    num_classes = labels.shape[1] # 论文三分类(Database, Wireless Communication, Data Mining)
    # print(labels[0,:])        # tensor([1, 0, 0]),标签采用的one-hot编码
    # print(labels.nonzero().shape)  # torch.Size([3025, 2]),每行存储的是非0元素的索引
    labels = labels.nonzero()[:, 1] # 本质上这句将one-hot变成用0，1，2表示标签
    
    if remove_self_loop:
        num_nodes = data['label'].shape[0]
        data['PAP'] = sparse.csr_matrix(data['PAP'] - np.eye(num_nodes))
        data['PLP'] = sparse.csr_matrix(data['PLP'] - np.eye(num_nodes))

    # Adjacency matrices for meta path based neighbors
    # (Mufei): I verified both of them are binary adjacency matrices with self loops
    # 这里直接建立了使用元路径从异质图转化来的2个同质图
    # 这里PLP就是原论文中PSP
    # ntype是结点名称，etype是边的名称
    author_g = dgl.graph(data['PAP'], ntype='paper', etype='author')
    subject_g = dgl.graph(data['PLP'], ntype='paper', etype='subject')

    gs = [author_g, subject_g]

    train_idx = torch.from_numpy(data['train_idx']).long().squeeze(0)
    val_idx = torch.from_numpy(data['val_idx']).long().squeeze(0)
    test_idx = torch.from_numpy(data['test_idx']).long().squeeze(0)

    num_nodes = author_g.number_of_nodes()
    train_mask = get_binary_mask(num_nodes, train_idx)
    val_mask = get_binary_mask(num_nodes, val_idx)
    test_mask = get_binary_mask(num_nodes, test_idx)

    print('dataset loaded')
    # print({
    #     'dataset': 'ACM',
    #     'train': train_mask.sum().item() / num_nodes,
    #     'val': val_mask.sum().item() / num_nodes,
    #     'test': test_mask.sum().item() / num_nodes
    # })

    return gs, features, labels, num_classes, train_idx, val_idx, test_idx, \
           train_mask, val_mask, test_mask

In [0]:
def get_device():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    if torch.cuda.is_available():
        print("device is cuda, # cuda is: ", n_gpu)
    else:
        print("device is cpu, not recommend")
    return device, n_gpu
device, n_gpu = get_device()
print(device)

device is cuda, # cuda is:  1
cuda:0


In [0]:
# The configuration below is from the paper.
default_configure = {
    'lr': 0.005,          # Learning rate
    'num_heads': [8],        # Number of attention heads for node-level attention
    'hidden_units': 8,
    'dropout': 0.6,
    'weight_decay': 0.001,
    'num_epochs': 200,
    'patience': 100
}

sampling_configure = {
    'batch_size': 20
}

### 模型训练

使用GPU进行训练图网络时，最好将模型与特征都送入到GPU中

In [0]:
# graph_list:列表中同质图的邻接矩阵,有self-loop,该图的类型是dgl库中特有的类型
# features: In ACM dataset, features of paper (node_num,dim)-->([3025, 1870])
# labels:  labels of nodes (node_num)-->([3025])
# num_classes: number of node class --> 3
# train_idx: ([600])   tensor
# val_idx:  ([300])   tensor
# test_idx： ([2125])   tensor
# 上面3行数目加起来正好是3025
# train_mask、val_mask、test_mask：torch.uint8,[3025]
# mask张量大小与节点总的数目一致
# print(type(graph_list[0])) # dgl.heterograph.DGLHeteroGraph
data_path = '/content/drive/My Drive/GNN/data/ACM_HAN/'
data_path = data_path+"ACM3025.pkl"
graph_list,features,labels,num_classes,train_idx,val_idx,test_idx, \
train_mask,val_mask,test_mask=load_acm(remove_self_loop=False,data_path=data_path)

dataset loaded


In [0]:
# 将数据送入到GPU
features = features.to(device)
labels = labels.to(device)
train_mask = train_mask.to(device)
val_mask = val_mask.to(device)
test_mask = test_mask.to(device)

In [0]:
# model = HAN(num_meta_paths=len(g),in_size=features.shape[1],hidden_size=args['hidden_units'],
#         out_size=num_classes,num_heads=args['num_heads'],dropout=args['dropout']).to(device)
model=HAN(num_meta_paths=len(graph_list),
       in_size=features.shape[1],
       hidden_size=8,
       out_size=num_classes,
       num_head_list=[8],
       dropout=0.6)
model.to(device)
output = model(graph_list,features)
print(output.shape)

AttributeError: ignored