# Node2vec model using torch Geometric with Cora

In [17]:
import torch
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import SGConv
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [18]:
path = '/Users/kasidej/Documents/study/gnn/' # directory to download the dataset
dataset = Planetoid(path, "Cora")
data = dataset[0] # tensor representation of cora-planetoid data
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
print('Cora: ', data)

cpu
Cora:  Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [22]:
sgc_model = SGConv(in_channels=data.num_features, # number of features
                    out_channels=dataset.num_classes, # dimension of embedding
                    K=1, cached=True)

In [23]:
# Get embedding
print("Shape of the original data: ", data.x.shape)
print("Shape of the embedding data: ", sgc_model(data.x, data.edge_index).shape)

Shape of the original data:  torch.Size([2708, 1433])
Shape of the embedding data:  torch.Size([2708, 7])


In [32]:
# construct the model for classification
class sgcnet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = SGConv(in_channels=data.num_features, # number of features
                    out_channels=dataset.num_classes, # dimension of embedding
                    K=1, cached=True)

    def forward(self):
        x = self.conv1(data.x, data.edge_index)

        # computation of log softmax
        return F.log_softmax(x, dim=1)

In [35]:
model = sgcnet().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.2, weight_decay=0.005)

# What are the learning parameters?
for i, parameters in model.named_parameters():
    print(f"Parameter: {i}")
    print(f"Shape: {parameters.shape}")

Parameter: conv1.lin.weight
Shape: torch.Size([7, 1433])
Parameter: conv1.lin.bias
Shape: torch.Size([7])


In [37]:
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [40]:
model()

tensor([[-1.9569, -1.9501, -1.9777,  ..., -1.9278, -1.9158, -1.9309],
        [-1.9630, -1.8909, -1.9572,  ..., -1.9051, -2.0197, -1.9567],
        [-1.9641, -1.8765, -1.9172,  ..., -1.9164, -2.0194, -1.9718],
        ...,
        [-1.9736, -1.9028, -1.9683,  ..., -1.9622, -1.9162, -2.0057],
        [-1.9639, -1.8991, -1.9432,  ..., -1.9353, -1.9810, -1.9734],
        [-1.9542, -1.8907, -1.9626,  ..., -1.9248, -1.9613, -1.9836]],
       grad_fn=<LogSoftmaxBackward0>)

In [41]:
# Train
def train():
    model.train()
    optimizer.zero_grad()
    outputs = model()[data.train_mask]
    inputs = data.y[data.train_mask]
    loss = F.nll_loss(outputs, inputs)
    loss.backward()
    optimizer.step() # update parameters

def test():
    model.eval()
    logits, accs = model(), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1] # transform log prob to actual label
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs

In [42]:
best_val_acc = test_acc = 0
for epoch in range(1, 101):
    train()
    train_acc, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    print(f'Epoch: {epoch:03d}, Train: {train_acc:.4f}, '
          f'Val: {best_val_acc:.4f}, Test: {test_acc:.4f}')

Epoch: 001, Train: 0.9714, Val: 0.5820, Test: 0.6270
Epoch: 002, Train: 0.9929, Val: 0.7420, Test: 0.7480
Epoch: 003, Train: 0.9929, Val: 0.7420, Test: 0.7480
Epoch: 004, Train: 1.0000, Val: 0.7420, Test: 0.7480
Epoch: 005, Train: 0.9929, Val: 0.7420, Test: 0.7480
Epoch: 006, Train: 0.9929, Val: 0.7420, Test: 0.7480
Epoch: 007, Train: 1.0000, Val: 0.7420, Test: 0.7480
Epoch: 008, Train: 1.0000, Val: 0.7420, Test: 0.7480
Epoch: 009, Train: 1.0000, Val: 0.7420, Test: 0.7480
Epoch: 010, Train: 1.0000, Val: 0.7420, Test: 0.7480
Epoch: 011, Train: 1.0000, Val: 0.7420, Test: 0.7480
Epoch: 012, Train: 1.0000, Val: 0.7560, Test: 0.7760
Epoch: 013, Train: 1.0000, Val: 0.7560, Test: 0.7760
Epoch: 014, Train: 1.0000, Val: 0.7560, Test: 0.7760
Epoch: 015, Train: 1.0000, Val: 0.7560, Test: 0.7760
Epoch: 016, Train: 1.0000, Val: 0.7560, Test: 0.7760
Epoch: 017, Train: 1.0000, Val: 0.7560, Test: 0.7760
Epoch: 018, Train: 1.0000, Val: 0.7560, Test: 0.7760
Epoch: 019, Train: 1.0000, Val: 0.7560, Test: 