In [1]:
from __future__ import division

import torch
import torch.nn as nn
import torch.nn.functional as F

import math
import pdb
import pickle
import argparse
import numpy as np
import matplotlib.pyplot as plt

### 评估函数

In [2]:
def true_positive(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred == i) & (target == i)).sum())

    return torch.tensor(out)

In [3]:
def true_negative(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred != i) & (target != i)).sum())

    return torch.tensor(out)

In [4]:
def false_positive(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred == i) & (target != i)).sum())

    return torch.tensor(out)

In [5]:
def false_negative(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred != i) & (target == i)).sum())

    return torch.tensor(out)

In [6]:
def precision(pred, target, num_classes):
    tp = true_positive(pred, target, num_classes).to(torch.float)
    fp = false_positive(pred, target, num_classes).to(torch.float)

    out = tp / (tp + fp)
    out[torch.isnan(out)] = 0

    return out

In [7]:
def recall(pred, target, num_classes):
    tp = true_positive(pred, target, num_classes).to(torch.float)
    fn = false_negative(pred, target, num_classes).to(torch.float)

    out = tp / (tp + fn)
    out[torch.isnan(out)] = 0

    return out

In [8]:
def f1_score(pred, target, num_classes):
    prec = precision(pred, target, num_classes)
    rec = recall(pred, target, num_classes)

    score = 2 * (prec * rec) / (prec + rec)
    score[torch.isnan(score)] = 0

    return score

### 参数设置

In [9]:
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, help='Dataset')
parser.add_argument('--epoch', type=int, default=40, help='Training Epochs')
parser.add_argument('--node_dim', type=int, default=64, help='Node dimension')
parser.add_argument('--num_channels', type=int, default=2, help='number of channels')
parser.add_argument('--lr', type=float, default=0.005, help='learning rate')
parser.add_argument('--weight_decay', type=float, default=0.001, help='l2 reg')
parser.add_argument('--num_layers', type=int, default=2, help='number of layer')
parser.add_argument('--norm', type=str, default='true', help='normalization')
parser.add_argument('--adaptive_lr', type=str, default='false', help='adaptive learning rate')

args = parser.parse_args(args=[])

In [10]:
args.dataset = 'ACM'
args.num_layers = 2
args.adaptive_lr = True

In [11]:
args

Namespace(adaptive_lr=True, dataset='ACM', epoch=40, lr=0.005, node_dim=64, norm='true', num_channels=2, num_layers=2, weight_decay=0.001)

In [12]:
epochs = args.epoch
node_dim = args.node_dim
num_channels = args.num_channels
lr = args.lr
weight_decay = args.weight_decay
num_layers = args.num_layers
norm = args.norm
adaptive_lr = args.adaptive_lr

### 查看数据集

In [13]:
# 节点特征
with open(r"C:\Users\sss\Desktop\Graph_Transformer_Networks\data/" + args.dataset + "/node_features.pkl", "rb") as f:
    node_features = pickle.load(f)

In [14]:
# 链关系
with open(r"C:\Users\sss\Desktop\Graph_Transformer_Networks\data/" + args.dataset + "/edges.pkl", "rb") as f:
    edges = pickle.load(f)

In [15]:
# 节点标签
with open(r"C:\Users\sss\Desktop\Graph_Transformer_Networks\data/" + args.dataset + "/labels.pkl", "rb") as f:
    labels = pickle.load(f)

In [16]:
[node_features.shape, len(edges), len(labels)]

[(8994, 1902), 4, 3]

In [17]:
num_nodes = edges[0].shape[0]

In [18]:
# 将edges组合成矩阵
for i, edge in enumerate(edges):
    if i == 0:
        A = torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1)
    else:
        A = torch.cat([A, torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1)], dim=-1)

In [19]:
A = torch.cat([A, torch.eye(num_nodes).type(torch.FloatTensor).unsqueeze(-1)], dim=-1)  # 添加一个单位对角阵

In [20]:
node_features = torch.from_numpy(node_features).type(torch.FloatTensor)

In [21]:
train_node = torch.from_numpy(np.array(labels[0])[:,0]).type(torch.LongTensor)  # train
train_target = torch.from_numpy(np.array(labels[0])[:,1]).type(torch.LongTensor)  
valid_node = torch.from_numpy(np.array(labels[1])[:,0]).type(torch.LongTensor)  # val
valid_target = torch.from_numpy(np.array(labels[1])[:,1]).type(torch.LongTensor)
test_node = torch.from_numpy(np.array(labels[2])[:,0]).type(torch.LongTensor)  # test
test_target = torch.from_numpy(np.array(labels[2])[:,1]).type(torch.LongTensor)

In [22]:
# train | valid | test
[labels[0].shape, labels[1].shape, labels[2].shape]

[(600, 2), (300, 2), (2125, 2)]

In [23]:
[node_features.shape, train_node.shape, train_target.shape, valid_node.shape, valid_target.shape, test_node.shape, test_target.shape]

[torch.Size([8994, 1902]),
 torch.Size([600]),
 torch.Size([600]),
 torch.Size([300]),
 torch.Size([300]),
 torch.Size([2125]),
 torch.Size([2125])]

In [24]:
num_classes = torch.max(train_target).item() + 1

### 构建模型

In [25]:
class GTConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GTConv, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, 1, 1))
        self.bias = None
        self.scale = nn.Parameter(torch.Tensor([0, 1]), requires_grad=False)
        self.reset_parameters()
        
    def reset_parameters(self):
        n = self.in_channels
        nn.init.constant_(self.weight, 0.1)
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)
            
    def forward(self, A):
        '''
        0) 对weight(conv)进行softmax
        1) 对每个节点在每个edgeType上进行[2, 5, 1, 1]的卷积操作;
        2) 对每个edgeType进行加权求和，加权是通过0)softmax
        '''
        # F.softmax(self.weight, dim=1) 对self.weight做softmax：[2, 5, 1, 1]
        # A：[1, 5, 8994, 8994]：带有edgeType的邻接矩阵
        # [1, 5, 8994, 8994] * [2, 5, 1, 1] => [2, 5, 8994, 8994]
        # sum：[2, 8994, 8994]
        A = torch.sum(A * F.softmax(self.weight, dim=1), dim=1)
        return A

In [26]:
class GTLayer(nn.Module):
    # 不同edge类型的组合
    def __init__(self, in_channels, out_channels, first=True):
        super(GTLayer, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.first = first
        if self.first == True:
            self.conv1 = GTConv(in_channels, out_channels)  # W1
            self.conv2 = GTConv(in_channels, out_channels)  # W2
        else:
            self.conv1 = GTConv(in_channels, out_channels)  
    
    def forward(self, A, H_=None):  # A:[1,edgeType,N,N]
        if self.first == True:
            a = self.conv1(A)  # GTConv=>[2, N, N] #Q1
            b = self.conv2(A)  # Q2
            # 第一次矩阵相乘，得到A1
            H = torch.bmm(a, b)  # torch.bmm(a,b),tensor a 的size为(b,h,w),tensor b的size为(b,w,h),注意两个tensor的维度必须为3;
            W = [(F.softmax(self.conv1.weight, dim=1)).detach(),(F.softmax(self.conv2.weight, dim=1)).detach()]  # conv-softmax: 是为了下一次直接使用
        else:
            a = self.conv1(A)  # 第二层只有一个conv1; output:Conv输出归一化edge后的结果
            H = torch.bmm(H_,a)  # H_上一层的输出矩阵A1; 输出这一层后的结果A2;
            W = [(F.softmax(self.conv1.weight, dim=1)).detach()]
        return H, W   # H = A(1) ... A(l); W = 归一化后的权重矩阵

In [27]:
class GTN(nn.Module):
    def __init__(self, num_edge, num_channels, w_in, w_out, num_class,num_layers,norm):
        super(GTN, self).__init__()
        self.num_edge = num_edge
        self.num_channels = num_channels
        self.w_in = w_in
        self.w_out = w_out
        self.num_class = num_class
        self.num_layers = num_layers
        self.is_norm = norm
        layers = []
        for i in range(num_layers):  # layers多个GTLayer组成的; 多头channels
            if i == 0:
                layers.append(GTLayer(num_edge, num_channels, first=True))  # 第一个GT层,edge类别构建的矩阵
            else:
                layers.append(GTLayer(num_edge, num_channels, first=False))
        self.layers = nn.ModuleList(layers)
        self.weight = nn.Parameter(torch.Tensor(w_in, w_out))  # GCN
        self.bias = nn.Parameter(torch.Tensor(w_out))
        self.loss = nn.CrossEntropyLoss()
        self.linear1 = nn.Linear(self.w_out * self.num_channels, self.w_out)
        self.linear2 = nn.Linear(self.w_out, self.num_class)
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight)
        nn.init.zeros_(self.bias)

    def gcn_conv(self,X,H):  # 自己写了一个GCN
        X = torch.mm(X, self.weight)  # X-features; self.weight-weight
        H = self.norm(H, add=True)  # H-第i个channel下邻接矩阵;
        return torch.mm(H.t(),X)

    def normalization(self, H):
        for i in range(self.num_channels):
            if i==0:
                H_ = self.norm(H[i,:,:]).unsqueeze(0)  # Q1
            else:
                H_ = torch.cat((H_,self.norm(H[i,:,:]).unsqueeze(0)), dim=0)  # Q2
        return H_

    def norm(self, H, add=False):
        H = H.t()   # t
        if add == False:
            H = H * ((torch.eye(H.shape[0])==0).type(torch.FloatTensor))  # 建立一个对角阵; 除了自身节点，对应位置相乘。Degree(排除本身)
        else:
            H = H * ((torch.eye(H.shape[0])==0).type(torch.FloatTensor)) + torch.eye(H.shape[0]).type(torch.FloatTensor)
        deg = torch.sum(H, dim=1)  # 按行求和, 即每个节点的dgree的和
        deg_inv = deg.pow(-1)  # deg-1 归一化操作
        deg_inv[deg_inv == float('inf')] = 0
        deg_inv = deg_inv * torch.eye(H.shape[0]).type(torch.FloatTensor)  # 转换成n*n的矩阵
        H = torch.mm(deg_inv,H)  # 矩阵内积
        H = H.t()
        return H

    def forward(self, A, X, target_x, target):
        A = A.unsqueeze(0).permute(0,3,1,2)   # A.unsqueeze(0)=[1,N,N,edgeType]=>[1,edgeType,N,N]; 卷积输出的channel数量
        Ws = []
        for i in range(self.num_layers):  # 两层GTLayer:{edgeType}
            if i == 0:
                H, W = self.layers[i](A)  # GTN0:两层GTConv; A:edgeType的邻接矩阵; output: H(A(l)), W:归一化的Conv
            else:
                H = self.normalization(H)   # Conv矩阵，D-1*A的操作
                H, W = self.layers[i](A, H)  # 第一层计算完了A(原始矩阵), H(上一次计算后的A(l)); output: A2, W(第二层Conv1)
            Ws.append(W)
        
        #H,W1 = self.layer1(A)
        #H = self.normalization(H)
        #H,W2 = self.layer2(A, H)
        #H = self.normalization(H)
        #H,W3 = self.layer3(A, H)
        for i in range(self.num_channels):   # conv的channel数量
            if i == 0:
                X_ = F.relu(self.gcn_conv(X,H[i]))  # X-features; H[i]-第i个channel输出的邻接矩阵Al[i]; gcn_conv:Linear
            else:
                X_tmp = F.relu(self.gcn_conv(X,H[i]))
                X_ = torch.cat((X_,X_tmp), dim=1)  # X_拼接之后输出
        X_ = self.linear1(X_)
        X_ = F.relu(X_)
        y = self.linear2(X_[target_x])
        loss = self.loss(y, target)
        return loss, y, Ws

In [28]:
model = GTN(
    num_edge = A.shape[-1],  # edge类别的数量; 还有一个单位阵;
    num_channels = num_channels,
    w_in = node_features.shape[1],
    w_out = node_dim,
    num_class = num_classes,
    num_layers = num_layers,  # GTLayer
    norm = norm
)

In [29]:
model

GTN(
  (layers): ModuleList(
    (0): GTLayer(
      (conv1): GTConv()
      (conv2): GTConv()
    )
    (1): GTLayer(
      (conv1): GTConv()
    )
  )
  (loss): CrossEntropyLoss()
  (linear1): Linear(in_features=128, out_features=64, bias=True)
  (linear2): Linear(in_features=64, out_features=3, bias=True)
)

In [30]:
if adaptive_lr == "false":
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)
else:
    optimizer = torch.optim.Adam([
        {"params": model.weight},
        {"params": model.linear1.parameters()},
        {"params": model.linear2.parameters()},
        {"params": model.layers.parameters(), "lr":0.5}
    ], lr=0.005, weight_decay=0.001)
    
loss = nn.CrossEntropyLoss()

In [31]:
final_f1 = 0

# Train & Valid & Test
best_val_loss = 10000
best_test_loss = 10000
best_train_loss = 10000
best_train_f1 = 0
best_val_f1 = 0
best_test_f1 = 0

In [32]:
for i in range(epochs):
    for param_group in optimizer.param_groups:
        if param_group['lr'] > 0.005:
            param_group['lr'] = param_group['lr'] * 0.9
            
    print('Epoch:  ',i + 1)
    model.zero_grad()
    model.train()  # A:[8994, 8994, 5],5个edgeType; node_features;
    
    loss, y_train,Ws = model(A, node_features, train_node, train_target)
    train_f1 = torch.mean(f1_score(torch.argmax(y_train.detach(), dim=1), train_target, num_classes=num_classes)).cpu().numpy()
    print('Train - Loss: {}, Macro_F1: {}'.format(loss.detach().cpu().numpy(), train_f1))
    
    loss.backward()
    optimizer.step()
    
    model.eval()
    # Valid
    with torch.no_grad():
        val_loss, y_valid, _ = model.forward(A, node_features, valid_node, valid_target)
        val_f1 = torch.mean(f1_score(torch.argmax(y_valid, dim=1), valid_target, num_classes=num_classes)).cpu().numpy()
        print('Valid - Loss: {}, Macro_F1: {}'.format(val_loss.detach().cpu().numpy(), val_f1))
        
        test_loss, y_test, W = model.forward(A, node_features, test_node, test_target)
        test_f1 = torch.mean(f1_score(torch.argmax(y_test, dim=1), test_target, num_classes=num_classes)).cpu().numpy()
        print('Test - Loss: {}, Macro_F1: {}\n'.format(test_loss.detach().cpu().numpy(), test_f1))
        
    if val_f1 > best_val_f1:
        best_val_loss = val_loss.detach().cpu().numpy()
        best_test_loss = test_loss.detach().cpu().numpy()
        best_train_loss = loss.detach().cpu().numpy()
        best_train_f1 = train_f1
        best_val_f1 = val_f1
        best_test_f1 = test_f1 
        
print('---------------Best Results--------------------')
print('Train - Loss: {}, Macro_F1: {}'.format(best_train_loss, best_train_f1))
print('Valid - Loss: {}, Macro_F1: {}'.format(best_val_loss, best_val_f1))
print('Test - Loss: {}, Macro_F1: {}'.format(best_test_loss, best_test_f1))
final_f1 += best_test_f1

RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:75] data. DefaultCPUAllocator: not enough memory: you tried to allocate 3235681440 bytes. Buy new RAM!