In [1]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

1.12.1


In [2]:
import torch_geometric
from torch_geometric.datasets import Planetoid

In [6]:
use_cuda_if_available = False

# Load the dataset

https://github.com/kimiyoung/planetoid/

In [3]:
dataset = Planetoid(root="tutorial1",name= "Cora")

##### Dataset properties

Cora
- citation graph
- One huge graph

In [4]:
print(dataset)
print("number of graphs:\t\t",len(dataset))
print("number of classes:\t\t",dataset.num_classes)
print("number of node features:\t",dataset.num_node_features)
print("number of edge features:\t",dataset.num_edge_features)

Cora()
number of graphs:		 1
number of classes:		 7
number of node features:	 1433
number of edge features:	 0


##### Dataset shapes

- x : 1433차원의 node feature
- edge_index : [[start_node_index],[end_node_index]]
- y : 0~7 node class
- train/val/test_mask : train/validation/test split

In [5]:
print(dataset.data)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [6]:
print("edge_index:\t\t",dataset.data.edge_index.shape)
print(dataset.data.edge_index)
print("\n")
print("train_mask:\t\t",dataset.data.train_mask.shape)
print(dataset.data.train_mask) #1D tensor
print("\n")
print("x:\t\t",dataset.data.x.shape)
print(dataset.data.x) #Node feature
print("\n")
print("y:\t\t",dataset.data.y.shape)
print(dataset.data.y) #Label of Node

edge_index:		 torch.Size([2, 10556])
tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ...,  598, 1473, 2706]])


train_mask:		 torch.Size([2708])
tensor([ True,  True,  True,  ..., False, False, False])


x:		 torch.Size([2708, 1433])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


y:		 torch.Size([2708])
tensor([3, 4, 4,  ..., 3, 3, 3])


In [7]:
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv


In [8]:
data = dataset[0] #One huge graph

SAGEConv(in_feats, out_feats, aggregator_type, feat_drop=0.0, bias=True, norm=None, activation=None)

Sourcode : https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/nn/conv/sage_conv.html
        
- in_feats : Input feature size(Node feature)
- out_feats : Output feature size
- aggr : aggregation function

In [9]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv = SAGEConv(dataset.num_features,
                             dataset.num_classes,
                             aggr="max") # max, mean, add ...)

    def forward(self):
        x = self.conv(data.x, data.edge_index) #node feature,edge list as input
        return F.log_softmax(x, dim=1) #crossentropy = log_softmax+nll_loss
    

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() and use_cuda_if_available else 'cpu')
device

device(type='cpu')

In [13]:
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [27]:
'''
참고 : 
- data = dataset[0]
- data(mask) = generator -> iter
- model()[mask] -> mask filtering
- .max(1) -> filtering된 node tensor들(ex. train의 경우 140개)의 row-wise max value&index
'''
for _, mask in data('train_mask'):
    print(model()[mask].max(1))

torch.return_types.max(
values=tensor([-7.1921e-03, -1.8998e-03, -2.2474e-03, -1.3277e-02, -5.5584e-03,
        -6.0664e-03, -1.8021e-03, -2.5338e-02, -6.6105e-03, -1.2725e-02,
        -7.4663e-03, -6.4635e-03, -2.5638e-02, -5.7374e-03, -4.9899e-03,
        -6.1799e-03, -3.6712e-03, -4.8001e-03, -3.5643e-03, -1.0229e-02,
        -1.3479e-03, -1.2848e-02, -3.1575e-03, -1.8285e-02, -7.2596e-05,
        -4.6279e-03, -8.1708e-04, -4.4687e-03, -1.1988e-02, -1.1594e-02,
        -5.1126e-03, -1.2092e-02, -7.1117e-03, -1.0908e-03, -5.6903e-03,
        -3.5459e-03, -2.1301e-03, -5.4314e-03, -4.6802e-03, -1.0918e-03,
        -4.8828e-03, -3.7892e-03, -3.1568e-03, -2.5734e-04, -4.7364e-03,
        -1.8760e-03, -8.0582e-03, -2.2357e-03, -2.0464e-03, -6.6190e-03,
        -1.3557e-02, -3.7653e-03, -4.5124e-03, -6.4650e-04, -9.4231e-03,
        -3.9037e-03, -2.7030e-03, -1.1304e-02, -1.4164e-02, -3.1431e-04,
        -5.1405e-03, -3.5623e-03, -1.4704e-02, -6.8630e-03, -3.4011e-03,
        -2.8951e-03,

In [21]:
def train():
    model.train()
    optimizer.zero_grad()
    #crossentropy = log_softmax+nll_loss
    #train data에 대해서만 loss적용
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()


def test():
    model.eval() #change mode
    logits, accs = model(), [] #model()[mask] 로 데이터 불러오기
    #data = dataset[0], data(mask) : generator
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        #tensor.max(1) : [0]각 행에서의 최대값 [1]index
        #max()안에 0이 들어갈 경우 각 열의 최대값
        pred = logits[mask].max(1)[1] 
        #tensor.eq() = 같은 위치에 있는 값들이 같은지 비교
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs

In [22]:
best_val_acc = test_acc = 0
for epoch in range(1,100):
    train()
    _, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log = 'Epoch: {:03d}, Val: {:.4f}, Test: {:.4f}'
    
    if epoch % 10 == 0:
        print(log.format(epoch, best_val_acc, test_acc))

Epoch: 010, Val: 0.7300, Test: 0.7200
Epoch: 020, Val: 0.7300, Test: 0.7200
Epoch: 030, Val: 0.7300, Test: 0.7200
Epoch: 040, Val: 0.7300, Test: 0.7200
Epoch: 050, Val: 0.7300, Test: 0.7200
Epoch: 060, Val: 0.7300, Test: 0.7200
Epoch: 070, Val: 0.7300, Test: 0.7200
Epoch: 080, Val: 0.7300, Test: 0.7200
Epoch: 090, Val: 0.7300, Test: 0.7200
