In [1]:
from google.colab import drive
drive.mount('/content/drive')

KeyboardInterrupt: ignored

# PyTorch Geometric

## install

[公式リファレンス](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html)

In [None]:
!pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-geometric

Looking in links: https://pytorch-geometric.com/whl/torch-1.4.0.html
Collecting torch-scatter==latest+cu101
[?25l  Downloading https://pytorch-geometric.com/whl/torch-1.4.0/torch_scatter-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (10.6MB)
[K     |████████████████████████████████| 10.6MB 7.7MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.4
Looking in links: https://pytorch-geometric.com/whl/torch-1.4.0.html
Collecting torch-sparse==latest+cu101
[?25l  Downloading https://pytorch-geometric.com/whl/torch-1.4.0/torch_sparse-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (15.2MB)
[K     |████████████████████████████████| 15.2MB 211kB/s 
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.1
Looking in links: https://pytorch-geometric.com/whl/torch-1.4.0.html
Collecting torch-cluster==latest+cu101
[?25l  Downloading https://pytorch-geometric.com/whl/torch-1.4.0/torch_cluster-latest%2Bcu101-cp36-cp36m-linux_x86_

## チュートリアル

[チュートリアル](https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html)

### グラフのデータハンドリング

In [None]:
import torch
from torch_geometric.data import Data

In [None]:
# 入力行列が特殊
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
data = Data(x=x, edge_index=edge_index)

# 以下でもできる
# edge_index = torch.tensor([[0, 1],
#                            [1, 0],
#                            [1, 2],
#                            [2, 1]], dtype=torch.long)
# x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
# data = Data(x=x, edge_index=edge_index.t().contiguous())  # 転置する必要がある

`contiguous()`とは、メモリ領域でエラーを出さないために必要。`view()`や`transpose()`などで使うことになる

In [None]:
test_x = torch.Tensor([[1,2,3],[4,5,6]])
test_y = torch.transpose(x, 0, 1).contiguous()
test_y_ = torch.transpose(x, 0, 1)
test_x[0, 0] = 42
print(test_y[0,0])
print(test_y_[0,0])

tensor(-1.)
tensor(-1.)


In [None]:
# 頂点ごとの特徴量
print(data.x)
print(data["x"])

tensor([[-1.],
        [ 0.],
        [ 1.]])
tensor([[-1.],
        [ 0.],
        [ 1.]])


In [None]:
# 枝情報
print(data.edge_index)
print(data["edge_index"])

tensor([[0, 1, 1, 2],
        [1, 0, 2, 1]])
tensor([[0, 1, 1, 2],
        [1, 0, 2, 1]])


In [None]:
# 頂点数
print(data.num_nodes)

3


In [None]:
# 枝数(両方から枝が出ているので2*2)
print(data.num_edges)

4


In [None]:
# cpu or gpu用のデータに変換
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

### データセット

#### グラフ分類用データセット
グラフにクラス割り当てられている

In [1]:
from torch_geometric.datasets import TUDataset

ModuleNotFoundError: ignored

In [None]:
dataset = TUDataset(root='/content/drive/My Drive/fueki/GNN/dataset/ENZYMES', name='ENZYMES')

In [None]:
# グラフの個数
print(len(dataset))

600


In [None]:
# グラフのクラス数
print(dataset.num_classes)

6


In [None]:
# 頂点の特徴量の次元数
print(dataset.num_node_features)

3


In [None]:
# 0番目のグラフ全体の情報
# グラフには37個のノード、それぞれに3つの特徴量
# 168/2 = 84の無向エッジ
# グラフには1つのクラス(クラス1というわけではない)
print(dataset[0])

Data(edge_index=[2, 168], x=[37, 3], y=[1])


In [None]:
# グラフのクラス
print(dataset[0].y)

tensor([5])


In [None]:
# 頂点の特徴量
print(dataset[0].x)

tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.]])


In [None]:
# 枝情報
print(dataset[0].edge_index)

tensor([[ 0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,
          3,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
          7,  8,  8,  8,  9,  9,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
         12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
         16, 16, 17, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, 20, 20,
         21, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25,
         25, 25, 25, 25, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 28, 28, 28,
         28, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 32,
         32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35,
         35, 35, 36, 36, 36, 36],
        [ 1,  2,  3,  0,  2,  3, 24, 27,  0,  1,  3, 27, 28,  0,  1,  2,  4,  5,
         28,  3,  5,  6, 29,  3,  4,  6,  7, 29,  4,  5,  7,  8,  5,  6,  8,  9,
         10,  6,  7,  9,  7,  8, 10, 11, 12,  7,  9, 11, 12,  9, 10, 12, 26

In [None]:
# グラフの描画
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.figure(figsize=(15,10))
G = nx.DiGraph()
edges = dataset[0].edge_index.t().to('cpu').detach().numpy().copy()
G.add_edges_from(dataset[0].edge_index.t().to('cpu').detach().numpy().copy())
nx.draw_networkx(G)

### ミニバッチ

In [None]:
from torch_geometric.data import DataLoader

In [None]:
# バッチ数32でデータをシャッフルするDataLoaderの作成
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in dataloader:
    print(batch)
    print(batch.num_graphs)
    break

Batch(batch=[1008], edge_index=[2, 3914], x=[1008, 3], y=[32])
32


In [None]:
from torch_scatter import scatter_mean

In [None]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in dataloader:
    print(data)
    print(data.num_graphs)

    # グラフの全頂点の特徴量を平均している(32グラフ*21特徴量)
    x = scatter_mean(data.x, data.batch, dim=0)
    print(x.size())
    break

Batch(batch=[1037], edge_index=[2, 3900], x=[1037, 3], y=[32])
32
torch.Size([32, 3])


### transforms

In [None]:
from torch_geometric.datasets import ShapeNet

# 以下のtransformsを行う場合は、以下のコードは実行しない
# dataset = ShapeNet(root='/content/drive/My Drive/fueki/GNN/dataset/ShapeNet', categories=['Airplane'])

In [None]:
import torch_geometric.transforms as T
# torch_geometric.transformsを使うことにより、グラフ以外のデータをグラフに変換することができる前処理関数を使うことができる

In [None]:
# T.KNNGraphによって、点群から最近傍グラフを生成する
dataset = ShapeNet(root='/content/drive/My Drive/fueki/GNN/dataset/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6))

In [None]:
print(len(dataset))

2349


In [None]:
print(dataset[0])

Data(category=[1], edge_index=[2, 15108], pos=[2518, 3], x=[2518, 3], y=[2518])


In [None]:
print(dataset[0].x.shape)

torch.Size([2518, 3])


In [None]:
print(dataset[0].x[0])

tensor([-0.0392,  0.3344,  0.9416])


In [None]:
print(dataset[0].y)

tensor([0, 0, 3,  ..., 3, 1, 1])


### グラフの学習方法

#### 頂点分類用データセット
1つのグラフ内の頂点に各クラスが割り当てられている

In [None]:
from torch_geometric.datasets import Planetoid

In [None]:
dataset = Planetoid(root='/content/drive/My Drive/fueki/GNN/dataset/Cora', name='Cora')

In [None]:
# グラフの個数
print(len(dataset))

1


In [None]:
# 頂点のクラス数
print(dataset.num_classes)

7


In [None]:
# 頂点の特徴量の次元数
print(dataset.num_node_features)

1433


In [None]:
# グラフのクラス
print(dataset[0].y)

tensor([3, 4, 4,  ..., 3, 3, 3])


In [None]:
print("train : val : test = {} : {} : {}".format(dataset[0].train_mask.sum(), dataset[0].val_mask.sum(), dataset[0].test_mask.sum()))

train : val : test = 140 : 500 : 1000


In [None]:
# 頂点の特徴量(2708頂点*1433特徴量)
print(dataset[0].x.shape)
print(dataset[0].x)

torch.Size([2708, 1433])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [None]:
# 枝情報
print(dataset[0].edge_index)

tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ...,  598, 1473, 2706]])


In [None]:
print(dataset[0].train_mask, len(dataset[0].train_mask))

tensor([ True,  True,  True,  ..., False, False, False]) 2708


In [None]:
len(dataset[0])

6

#### モデル

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 入力次元(特徴量の次元), 出力次元(16)
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        # 入力次元(16), 出力次元(クラス数)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.log_softmax(x, dim=1)
        return x

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [None]:
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [None]:
model(data).shape
# 2708頂点の7クラスの帰属確率
# [2708, 7]

torch.Size([2708, 7])

In [None]:
model.eval()
pred = model(data).argmax(dim=1)
correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()
print('Accuracy: {:.4f}'.format(acc))

Accuracy: 0.8040
