<a href="https://colab.research.google.com/github/mepino/finGNN/blob/main/GAT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Implementation of GAT

Official resources:
* [Code](https://dsgiitr.com/blogs/gat/)

In [1]:
import os

import numpy as np
import torch.nn as nn
import torch.nn.functional as F

import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.12.1+cu113
[K     |████████████████████████████████| 7.9 MB 2.6 MB/s 
[K     |████████████████████████████████| 3.5 MB 2.5 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [9]:
no_nodes = 3

in_features = 5
out_features = 2

input = torch.rand(no_nodes, in_features)

print(input.shape)
input

torch.Size([3, 5])


tensor([[0.5495, 0.9510, 0.5625, 0.4778, 0.9938],
        [0.9534, 0.3236, 0.2365, 0.1385, 0.2736],
        [0.4294, 0.0915, 0.5135, 0.4783, 0.5929]])

In [10]:
# linear_transform

W_l = nn.Parameter(torch.rand(in_features, out_features))
z_l = torch.mm(input, W_l)
z_l.shape

torch.Size([3, 2])

In [11]:
# weight for key and query

W = nn.Parameter(torch.zeros(out_features * 2, 1))
W.shape

torch.Size([4, 1])

![title](https://github.com/AntonioLonga/PytorchGeometricTutorial/blob/main/Tutorial3/a_input.png?raw=1)

In [12]:
test = torch.rand([2,3])
print('test original : \n', test)

N_ = test.size()[0]
out_features_ = test.size()[1]

print('\n input for attention : ')
torch.cat([test.repeat(1, N_).view(N_ * N_, -1), test.repeat(N_, 1)], dim=1).view(N_, -1, 2 * out_features_)

test original : 
 tensor([[0.1401, 0.6833, 0.0045],
        [0.8178, 0.7764, 0.0417]])

 input for attention : 


tensor([[[0.1401, 0.6833, 0.0045, 0.1401, 0.6833, 0.0045],
         [0.1401, 0.6833, 0.0045, 0.8178, 0.7764, 0.0417]],

        [[0.8178, 0.7764, 0.0417, 0.1401, 0.6833, 0.0045],
         [0.8178, 0.7764, 0.0417, 0.8178, 0.7764, 0.0417]]])

In [13]:
N = z_l.size()[0]

# z_l.repeat(1, N).view(N * N, -1) : source node를 반복해서 만들고, 1번 source N번, 2번 source N번, ... 이 순서대로 다 편거
# z_l.repeat(N, 1) : 이렇게 하면 1번, 2번, 3번.. 을 N개 만들게됨
a_input = torch.cat([z_l.repeat(1, N).view(N * N, -1), z_l.repeat(N, 1)], dim=1).view(N, -1, 2 * out_features)
a_input

tensor([[[1.3489, 2.3875, 1.3489, 2.3875],
         [1.3489, 2.3875, 1.1231, 1.0547],
         [1.3489, 2.3875, 0.9906, 1.2610]],

        [[1.1231, 1.0547, 1.3489, 2.3875],
         [1.1231, 1.0547, 1.1231, 1.0547],
         [1.1231, 1.0547, 0.9906, 1.2610]],

        [[0.9906, 1.2610, 1.3489, 2.3875],
         [0.9906, 1.2610, 1.1231, 1.0547],
         [0.9906, 1.2610, 0.9906, 1.2610]]], grad_fn=<ViewBackward0>)

In [24]:
# 각각의 source node가 각각의 target node와의 유사성을 additive attention으로 계산 (Bahdanau attention)
a_score = torch.bmm(3*torch.rand([3,3,4]), torch.rand([4,1]).repeat([a_input.shape[0], 1, 1])).squeeze()
#a_score = torch.bmm(a_input, W.repeat([a_input.shape[0], 1, 1])).squeeze()

leakyrelu = nn.LeakyReLU(0.2)
a_score = leakyrelu(a_score)

a_value = F.softmax(a_score, dim=-1)
a_value

tensor([[0.2384, 0.1975, 0.5641],
        [0.2522, 0.2176, 0.5302],
        [0.3473, 0.1978, 0.4549]])

In [25]:
z_l_final = torch.mm(a_value, z_l)
z_l_final

tensor([[1.1022, 1.4889],
        [1.1098, 1.5003],
        [1.1412, 1.6114]], grad_fn=<MmBackward0>)

### Masked Attention

In [None]:
# Masked Attention
adj = torch.randint(2, (3, 3))

zero_vec  = -9e15*torch.ones_like(e)
print(zero_vec.shape)

torch.Size([3, 3])


In [None]:
attention = torch.where(adj > 0, e, zero_vec)
print(adj,"\n",e,"\n",zero_vec)
attention

tensor([[1, 0, 0],
        [1, 0, 0],
        [1, 1, 1]]) 
 tensor([[-0.1791, -0.3931, -0.2842],
        [-0.0391, -0.2531, -0.1442],
        [ 0.5222, -0.1095, -0.0007]], grad_fn=<LeakyReluBackward0>) 
 tensor([[-9.0000e+15, -9.0000e+15, -9.0000e+15],
        [-9.0000e+15, -9.0000e+15, -9.0000e+15],
        [-9.0000e+15, -9.0000e+15, -9.0000e+15]])


tensor([[-1.7910e-01, -9.0000e+15, -9.0000e+15],
        [-3.9092e-02, -9.0000e+15, -9.0000e+15],
        [ 5.2224e-01, -1.0953e-01, -6.9216e-04]], grad_fn=<WhereBackward0>)

In [None]:
attention = F.softmax(attention, dim=1)
h_prime   = torch.matmul(attention, h)

In [None]:
attention

tensor([[1.0000, 0.0000, 0.0000],
        [1.0000, 0.0000, 0.0000],
        [0.4707, 0.2503, 0.2790]], grad_fn=<SoftmaxBackward0>)

In [None]:
h_prime

tensor([[-0.7827, -0.7009],
        [-0.7827, -0.7009],
        [-0.4427, -0.5813]], grad_fn=<MmBackward0>)

#### h_prime vs h

In [None]:
print(h_prime,"\n",h)

tensor([[-0.7827, -0.7009],
        [-0.7827, -0.7009],
        [-0.4427, -0.5813]], grad_fn=<MmBackward0>) 
 tensor([[-0.7827, -0.7009],
        [ 0.1574, -1.0071],
        [-0.4073,  0.0026]], grad_fn=<MmBackward0>)


In [None]:
class GATLayer(nn.Module):
    """
    Simple PyTorch Implementation of the Graph Attention layer.
    """
    def __init__(self):
        super(GATLayer, self).__init__()
      
    def forward(self, input, adj):
        print("")

# Build the layer

In [None]:
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GATLayer, self).__init__()
        
        '''
        TODO
        '''
        
    def forward(self, input, adj):
        # Linear Transformation
        h = torch.mm(input, self.W) # matrix multiplication
        N = h.size()[0]

        # Attention Mechanism
        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e       = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        # Masked Attention
        zero_vec  = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime   = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

In [None]:
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GATLayer, self).__init__()
        self.dropout       = dropout        # drop prob = 0.6
        self.in_features   = in_features    # 
        self.out_features  = out_features   # 
        self.alpha         = alpha          # LeakyReLU with negative input slope, alpha = 0.2
        self.concat        = concat         # conacat = True for all layers except the output layer.

        
        # Xavier Initialization of Weights
        # Alternatively use weights_init to apply weights of choice 
        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        
        self.a = nn.Parameter(torch.zeros(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)
        
        # LeakyReLU
        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, input, adj):
        # Linear Transformation
        h = torch.mm(input, self.W) # matrix multiplication
        N = h.size()[0]
        print(N)

        # Attention Mechanism
        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e       = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        # Masked Attention
        zero_vec  = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime   = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

# Use it

In [None]:
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

import matplotlib.pyplot as plt

name_data = 'Cora'
dataset = Planetoid(root= '/tmp/' + name_data, name = name_data)
dataset.transform = T.NormalizeFeatures()

print(f"Number of Classes in {name_data}:", dataset.num_classes)
print(f"Number of Node Features in {name_data}:", dataset.num_node_features)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index


Number of Classes in Cora: 7
Number of Node Features in Cora: 1433


Processing...
Done!


In [None]:
class GAT(torch.nn.Module):
    def __init__(self):
        super(GAT, self).__init__()
        self.hid = 8
        self.in_head = 8
        self.out_head = 1
        
        
        self.conv1 = GATConv(dataset.num_features, self.hid, heads=self.in_head, dropout=0.6)
        self.conv2 = GATConv(self.hid*self.in_head, dataset.num_classes, concat=False,
                             heads=self.out_head, dropout=0.6)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
                
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)
    
    
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = "cpu"

model = GAT().to(device)
data = dataset[0].to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

model.train()
for epoch in range(1000):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    
    if epoch%200 == 0:
        print(loss)
    
    loss.backward()
    optimizer.step()
    
    

tensor(1.9450, grad_fn=<NllLossBackward0>)
tensor(0.7059, grad_fn=<NllLossBackward0>)
tensor(0.6025, grad_fn=<NllLossBackward0>)
tensor(0.5604, grad_fn=<NllLossBackward0>)
tensor(0.5878, grad_fn=<NllLossBackward0>)


In [None]:
model.eval()
_, pred = model(data).max(dim=1)
correct = float(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()
print('Accuracy: {:.4f}'.format(acc))

Accuracy: 0.8160
