### 使用Pytorch Geometry框架写GCN\CAT\GraphSAGE

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import torch
import torch.nn.functional as F
import torch.nn as nn
import warnings
warnings.filterwarnings("ignore")

from torch_geometric.data import Data
from torch_geometric.nn import GCNConv,GATConv,SAGEConv
from torch_geometric.datasets import Planetoid

#### 测试

In [2]:
torch.__version__

'1.8.1+cpu'

In [3]:
torch.cuda.is_available()

False

In [4]:
edge_index = torch.tensor([
    [0, 1, 1, 2],
    [1, 0, 2, 1]
], dtype=torch.long)

x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

In [5]:
data = Data(x=x, edge_index=edge_index)

In [6]:
data

Data(edge_index=[2, 4], x=[3, 1])

### 数据预处理

In [7]:
dataset = Planetoid(
    root=r"C:\Users\sss\Desktop",  # 所在的根文件夹
    name="Cora"                    # 对应文件夹名称
)

In [21]:
data = dataset[0].to(device)

### 构建GCN网络

In [30]:
class GCN(nn.Module):
    def __init__(self, feature, hidden, classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(feature, hidden)
        self.conv2 = GCNConv(hidden, classes)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [32]:
[dataset.num_node_features, dataset.num_classes, dataset.num_edge_features, dataset.num_features]

[1433, 7, 0, 1433]

- 查看接口的属性

In [33]:
# [attribute for attribute in dir(data) if attribute[0].islower()]

In [34]:
# [attribute for attribute in dir(dataset) if attribute[0].islower()]

"""
['collate', 'copy', 'data', 'download', 'get', 'index_select',
 'indices', 'len', 'name', 'num_classes', 'num_edge_features',
 'num_features', 'num_node_features', 'pre_filter',
 'pre_transform', 'process', 'processed_dir', 'processed_file_names',
 'processed_paths', 'raw_dir', 'raw_file_names', 'raw_paths', 'root',
 'shuffle', 'slices', 'split', 'transform','url']
"""

"\n['collate', 'copy', 'data', 'download', 'get', 'index_select',\n 'indices', 'len', 'name', 'num_classes', 'num_edge_features',\n 'num_features', 'num_node_features', 'pre_filter',\n 'pre_transform', 'process', 'processed_dir', 'processed_file_names',\n 'processed_paths', 'raw_dir', 'raw_file_names', 'raw_paths', 'root',\n 'shuffle', 'slices', 'split', 'transform','url']\n"

In [35]:
model = GCN(dataset.num_node_features, 16, dataset.num_classes).to(device)

In [36]:
model

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 7)
)

### 优化器

In [37]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### 训练

In [41]:
model.train()
for epoch in range(1, 201):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(epoch, loss.item())

10 0.005470830947160721
20 0.018912168219685555
30 0.0010761326411738992
40 0.011110368184745312
50 0.002732601249590516
60 0.00457771634683013
70 0.0018456758698448539
80 0.009233931079506874
90 0.005894849542528391
100 0.0015394255751743913
110 0.0027924850583076477
120 0.004691712092608213
130 0.011004002764821053
140 0.005540255457162857
150 0.001749680028297007
160 0.0007692690123803914
170 0.0012144498759880662
180 0.009550421498715878
190 0.0051167733035981655
200 0.0016093036392703652


### 构建GAT网络

In [47]:
class GAT(nn.Module):
    def __init__(self, feature, hidden, classes, heads=1):
        super(GAT, self).__init__()
        self.gat1 = GATConv(feature, hidden, heads=heads)
        self.gat2 = GATConv(hidden*heads, classes)
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.gat2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [48]:
model = GAT(dataset.num_node_features, 8, dataset.num_classes, heads=4).to(device)

In [50]:
model

GAT(
  (gat1): GATConv(1433, 8, heads=4)
  (gat2): GATConv(32, 7, heads=1)
)

### 优化器

In [51]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### 训练

In [53]:
model.train()
for epoch in range(1, 201):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(epoch, loss.item())

10 0.4876019060611725
20 0.09008347243070602
30 0.047045957297086716
40 0.010428261011838913
50 0.014142460189759731
60 0.003901344956830144
70 0.002425303915515542
80 0.023129096254706383
90 0.0018718438223004341
100 0.0033128205686807632
110 0.0023456811904907227
120 0.0011621762532740831
130 0.004032126162201166
140 0.004625489469617605
150 0.0009360900730825961
160 0.0010325731709599495
170 0.0010962964734062552
180 0.002259148983284831
190 0.00041337983566336334
200 0.0007031913264654577


In [54]:
model.eval()
_, pred = model(data).max(dim=1)
correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(acc)

0.46646646646646645


### 构建GraphSAGE网络

In [55]:
class GraphSAGE(torch.nn.Module):
    def __init__(self, feature, hidden, classes):
        super(GraphSAGE, self).__init__()
        self.sage1 = SAGEConv(feature, hidden)
        self.sage2 = SAGEConv(hidden, classes)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.sage1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.sage2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [56]:
model = GraphSAGE(dataset.num_node_features, 8, dataset.num_classes).to(device)

### 优化器

In [57]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### 训练

In [59]:
model.train()
for epoch in range(1, 201):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(epoch, loss.item())

10 0.12412635236978531
20 0.0886329710483551
30 0.1426485776901245
40 0.06559161096811295
50 0.07368886470794678
60 0.0467926450073719
70 0.05026616156101227
80 0.02817082591354847
90 0.04680836945772171
100 0.07182223349809647
110 0.042454518377780914
120 0.0801805779337883
130 0.08145394176244736
140 0.03686688095331192
150 0.04174565151333809
160 0.02762432210147381
170 0.044863998889923096
180 0.02730637788772583
190 0.06068912148475647
200 0.04189097881317139


In [60]:
model.eval()
_, pred = model(data).max(dim=1)
correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(acc)

0.48848848848848847
