<a href="https://colab.research.google.com/github/kbrezinski/CS224W-GraphML/blob/main/notebooks/node2vec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
print(torch.__version__)

from platform import python_version
print(python_version())

1.11.0+cu113
3.7.13


In [None]:
!pip install torch-scatter torch-sparse \
 torch-cluster torch-spline-conv torch-geometric \
-f https://data.pyg.org/whl/torch-1.11.0+cu113.html -q
#!pip install ogb

[K     |████████████████████████████████| 7.9 MB 51.2 MB/s 
[K     |████████████████████████████████| 3.5 MB 62.7 MB/s 
[K     |████████████████████████████████| 2.5 MB 67.8 MB/s 
[K     |████████████████████████████████| 750 kB 28.1 MB/s 
[K     |████████████████████████████████| 407 kB 31.2 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [None]:
import os
import torch_geometric.transforms as T

from torch_geometric.datasets import Planetoid
from torch_geometric.loader import DataLoader, NeighborLoader

In [None]:
transformation = False

transform = T.Compose([
    T.RandomNodeSplit('train_rest', num_val=500, num_test=500),
    T.TargetIndegree(),
    T.NormalizeFeatures(),
])
path = os.path.join('data', 'Cora')
dataset = Planetoid(path,'Cora', transform=transform if transformation else None)  # transform is post download

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [None]:
# need to seperate loader for train and validate
loader = NeighborLoader(dataset[0], num_neighbors=[80]*3)

In [None]:
import torch.nn.functional as F
import torch_geometric.nn as nn

class GCN(torch.nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.conv1 = nn.GCNConv(in_channels, 16)
    self.conv2 = nn.GCNConv(16, out_channels)

  def forward(self, data):
    x, edge_index = data.x, data.edge_index

    x = self.conv1(x, edge_index)
    x = F.relu(x)
    x = F.dropout(x, training=self.training)
    x = self.conv2(x, edge_index)

    return F.log_softmax(x, dim=1)

class GCNSeq(torch.nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()

    self.model = nn.Sequential('x, edge_index', [
            (nn.GCNConv(in_channels, 64), 'x, edge_index -> x'),
            torch.nn.ReLU(inplace=True),
            (nn.GCNConv(64, 64), 'x, edge_index -> x'),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(64, out_channels),
    ])

  def forward(self, data):
    x, edge_index = data.x, data.edge_index
    x = self.model(x, edge_index)

    return F.log_softmax(x, dim=1)



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(in_channels=dataset.num_node_features,
            out_channels=dataset.num_classes).to(device)

model = nn.Node2Vec(edge_index=dataset[0].edge_index, embedding_dim=dataset.num_node_features, walk_length=4, context_size=2)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)

model.train()
for epoch in range(100):
  for batch in loader:

    data = batch.to(device)
    optimizer.zero_grad()  
    out = model(data)
    loss = F.nll_loss(out[batch.train_mask], data.y[batch.train_mask])
    loss.backward()
    optimizer.step()

  if not epoch % 20:
    model.eval()
    pred = model().argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    acc = int(correct) / int(data.test_mask.sum())
    print(f'Accuracy: {acc:.4f}')

tensor(-0.0056, grad_fn=<NllLossBackward0>)
tensor(-0.1122, grad_fn=<NllLossBackward0>)
tensor(-0.2091, grad_fn=<NllLossBackward0>)
tensor(-0.2916, grad_fn=<NllLossBackward0>)
tensor(-0.3609, grad_fn=<NllLossBackward0>)
