<a href="https://colab.research.google.com/github/musicjae/GNN/blob/main/GAT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Add this in a Google Colab cell to install the correct version of Pytorch Geometric.
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install torch-scatter==latest+{CUDA}     -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-sparse==latest+{CUDA}      -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-cluster==latest+{CUDA}     -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-spline-conv==latest+{CUDA} -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-geometric 

Looking in links: https://pytorch-geometric.com/whl/torch-1.7.0.html
Collecting torch-scatter==latest+cu101
[?25l  Downloading https://pytorch-geometric.com/whl/torch-1.7.0/torch_scatter-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (11.9MB)
[K     |████████████████████████████████| 11.9MB 3.3MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.5
Looking in links: https://pytorch-geometric.com/whl/torch-1.7.0.html
Collecting torch-sparse==latest+cu101
[?25l  Downloading https://pytorch-geometric.com/whl/torch-1.7.0/torch_sparse-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (24.3MB)
[K     |████████████████████████████████| 24.3MB 1.2MB/s 
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.8
Looking in links: https://pytorch-geometric.com/whl/torch-1.7.0.html
Collecting torch-cluster==latest+cu101
[?25l  Downloading https://pytorch-geometric.com/whl/torch-1.7.0/torch_cluster-latest%2Bcu101-cp36-cp36m-linux_x86_

In [21]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.manual_seed(2020) # seed for reproducible numbers
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

import matplotlib.pyplot as plt
%matplotlib notebook

import warnings
warnings.filterwarnings("ignore")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

name_data = 'Cora'
dataset = Planetoid(root= '/tmp/' + name_data, name = name_data)
dataset.transform = T.NormalizeFeatures()

print(f"Number of Classes in {name_data}:", dataset.num_classes)
print(f"Number of Node Features in {name_data}:", dataset.num_node_features)

Number of Classes in Cora: 7
Number of Node Features in Cora: 1433


# Exploration of Cora Datasset

https://graphsandnetworks.com/the-cora-dataset/

In [3]:
print(len(dataset.data.x[0]))
listn = []
cnt = 0
for i in dataset.data.x[0]:
    cnt+=1
    if i != 0:
        listn.append((i,cnt))

print(listn)

1433
[(tensor(1.), 20), (tensor(1.), 82), (tensor(1.), 147), (tensor(1.), 316), (tensor(1.), 775), (tensor(1.), 878), (tensor(1.), 1195), (tensor(1.), 1248), (tensor(1.), 1275)]


In [6]:
print(dataset.data.y)

tensor([3, 4, 4,  ..., 3, 3, 3])


In [13]:
len(dataset.data.edge_index[0])

10556

## GAT mechanism  
  
- Current hidden state vec: $h = \{\vec{h_1}, ..., \vec{h_N}\}$  
- 선형 변환: $W\vec{h}$  

  - Detail: $LeakyReLU(\vec{a^{T}}*concat(W \vec{h_{target}, W \vec{h}_{neighbors}}))$

      - $\vec{a^{T}}$: Weight vector (2F'size)  
      - $W$: Trainable Weight  

- New hidden state vec: $h_i' = \sigma(\sum_{j\in N_i} \alpha_{tar,nei}W \vec{h_{tar}})$
      

#### Preview of codes  
 - Tensor.repeat

In [16]:
a = torch.Tensor([[1,2,3],[4,5,6]])
print(a.repeat(1,2))
print(a.repeat(2,1))
print(a.repeat(2,3))

tensor([[1., 2., 3., 1., 2., 3.],
        [4., 5., 6., 4., 5., 6.]])
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [1., 2., 3.],
        [4., 5., 6.]])
tensor([[1., 2., 3., 1., 2., 3., 1., 2., 3.],
        [4., 5., 6., 4., 5., 6., 4., 5., 6.],
        [1., 2., 3., 1., 2., 3., 1., 2., 3.],
        [4., 5., 6., 4., 5., 6., 4., 5., 6.]])


- view

In [21]:
a = torch.Tensor([[1,2,3],[4,5,6]])
a = a.repeat(1,2)
print(a)
print(a.view(2*2,-1))

tensor([[1., 2., 3., 1., 2., 3.],
        [4., 5., 6., 4., 5., 6.]])
tensor([[1., 2., 3.],
        [1., 2., 3.],
        [4., 5., 6.],
        [4., 5., 6.]])


- We then compute a pair-wise attention score between two neighbors  
- takes a dot product of it and a learnable weight vector $\vec a^{(l)}$, and applies a LeakyReLU in the end

In [55]:
class GAT(torch.nn.Module):
    def __init__(self):
        super(GAT, self).__init__()
        self.hid = 8
        self.in_head = 8
        self.out_head = 1
        
        self.conv1 = GATConv(dataset.num_features, self.hid, heads=self.in_head, dropout=0.6)
        self.conv2 = GATConv(self.hid*self.in_head, dataset.num_classes, concat=False,
                             heads=self.out_head, dropout=0.6)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        # Dropout before the GAT layer is used to avoid overfitting in small datasets like Cora.
        # One can skip them if the dataset is sufficiently large.
        
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [56]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = GAT().to(device)

data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

model.train()
for epoch in range(1000):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    
    if epoch%200 == 0:
        print(loss)
    
    loss.backward()
    optimizer.step()

tensor(1.9423, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.6134, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.5520, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.5447, device='cuda:0', grad_fn=<NllLossBackward>)


In [57]:
model.eval()
data=dt
_, pred = model(data).max(dim=1)
correct = float(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()
print('Accuracy: {:.4f}'.format(acc))

Accuracy: 0.8270
