# Graph Convolutional Networks
> cora dataset

In [28]:
import torch_geometric

In [41]:
from typing import Callable, List, Optional, Tuple
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch import Tensor
from torch.optim import Optimizer
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
# from torch_geometric.utils import accuracy
from typing_extensions import Literal, TypedDict

The Cora dataset consists of 2708 scientific publications classified into one of seven classes. The citation network consists of 5429 links. Each publication in the dataset is described by a 0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary. The dictionary consists of 1433 unique words.

In [42]:
dataset = Planetoid('./cora', name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [54]:
num_nodes = dataset.data.num_nodes
num_edges = dataset.data.num_edges // 2
train_len = dataset[0].train_mask.sum()
val_len = dataset[0].val_mask.sum()
test_len = dataset[0].test_mask.sum()
other_len = num_nodes - train_len - val_len - test_len
print(f'Dataset: {dataset.name}')
print(f'Num. nodes: {num_nodes}(train={train_len},val={val_len}, test={test_len}, other={other_len})')
print(f'Num. edges: {num_edges}')
print(f'Num. node features: {dataset.num_node_features}')
print(f'Num. classes: {dataset.num_classes}')
print(f'Dataaset len.: {dataset.len()}')

Dataset: Cora
Num. nodes: 2708(train=140,val=500, test=1000, other=1068)
Num. edges: 5278
Num. node features: 1433
Num. classes: 7
Dataaset len.: 1


In [79]:
class GCN(torch.nn.Module):
    def __init__(
        self,
        num_node_features: int,
        num_classes: int,
        hidden_dim: int = 16,
        dropout_rate: float = 0.5,
    ) -> None:
        super().__init__()
        self.dropout1 = torch.nn.Dropout(dropout_rate)
        self.conv1 = GCNConv(num_node_features, hidden_dim)
        self.relu = torch.nn.ReLU(inplace=True)
        self.dropout2 = torch.nn.Dropout(dropout_rate)
        self.conv2 = GCNConv(hidden_dim, num_classes)
        
    def forward(self, x: Tensor, edge_index: Tensor) -> torch.Tensor:
        x = self.dropout1(x)
        x = self.conv1(x, edge_index)
        x = self.relu(x)
        x = self.dropout2(x)
        x = self.conv2(x, edge_index)
        return x

In [80]:
GCN(dataset.num_node_features, dataset.num_classes, 16, 0.5)

GCN(
  (dropout1): Dropout(p=0.5, inplace=False)
  (conv1): GCNConv(1433, 16)
  (relu): ReLU(inplace=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (conv2): GCNConv(16, 7)
)

In [86]:
dataset = Planetoid('./cora', name='Cora')
print(f'Sum of row values without normalization: {dataset[0].x.sum(dim=-1)}')

dataset = Planetoid('./cora', name='Cora', transform=T.NormalizeFeatures())
print(f'Sum of row values with normalization: {dataset[0].x.sum(dim=-1)}')

Sum of row values without normalization: tensor([ 9., 23., 19.,  ..., 18., 14., 13.])
Sum of row values with normalization: tensor([1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000])


## Training 

In [87]:
LossFn = Callable[[Tensor, Tensor], Tensor]
Stage  = Literal['train','val','test']

def train_step(
    model: torch.nn.Module, date: Data, optimizer: torch.optim.Optimizer, loss_fn:LossFn
) -> Tuple[float, float]:
    model.train()
    optimizer.zero_grad()
    mask = data.train_mask
    logits = model(data.x, data.edge_index)[mask]
    preds = logits.argmax(dim=1)
    y = data.y[mask]
    loss = loss_fn(logits, y)
    # L2 regularization to the first layer only
    
    acc = accuracy(preds, y)
    loss.backward()
    optimizer.step()
    return loss.item(), acc

@torch.no_grad()
def eval_step(model: torch.nn.Module, data: Data, loss_fn:LossFn, stage: Stage) -> Tuple[float,float]:
    model.eval()
    mask = getattr(data, f'{stage}_mask')
    logits = model(data.x, data.edge_index)[mask]
    preds = logits.argmax(dim=1)
    y = data.y[mask]
    loss = loss_fn(logits, y)
    
    acc = accuracy(preds, y)
    return loss.item(), acc

In [88]:
class HistoryDict(TypedDict):
    loss: List[float]
    acc: List[float]
    val_loss: List[float]
    val_acc: List[float]
    
def train(
    model: torch.nn.Module,
    date: Data,
    optimizer: torch.optim.Optimizer,
    loss_fn: LossFn = torch.nn.CrossEntropyLoss(),
    max_epochs: int=200,
    early_stopping: int=10,
    print_interval: int=20,
    verbose: bool=True,
) -> HistoryDict:
    history = {'loss':[], 'val_loss':[], 'acc_loss':[], 'val_acc':[]}
    for epoch in range(max_epochs):
        loss, acc = train_step(model, data, optimizer, loss_fn)
        val_loss, val_acc = eval_step(model, data, loss_fn, 'val')
        history['loss'].append(loss)
        history['acc'].append(acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        if epoch > early_stopping and val_loss > np.mean(history['val_loss'][-(early_stopping + 1):-1]):
            if verbose:
                print('\nEarly stopping...')
            break
            
        if verbose and epoch % print_interval == 0:
            print(f'nEpoch: {epoch}\n--------------')
            print(f'Train loss: {loss:.4f} | Train acc: {acc:.4f}')
            print(f' Val loss: {loss:.4f} | Val acc: {val_acc:.4f}')
    
    test_loss, test_acc = eval_step(model, data, loss_fn, 'test')
    if verbose:
        print(f'nEpoch: {epoch}\n--------------')
        print(f'Train loss: {loss:.4f} | Train acc: {acc:.4f}')
        print(f' Val loss: {loss:.4f} | Val acc: {val_acc:.4f}')
    return history
        

In [89]:
SEED = 42
MAX_EPOCHS = 200
LEARNING_RATE = 0.01
WEIGHT_DECAY = 5e-4
EARLY_STOPPING = 10

In [90]:
torch.manual_seed(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = GCN(dataset.num_node_features, dataset.num_classes).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
history = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

- 뭐가 문제지? ㅠㅠㅠ