In [1]:
# !pip install torch_geometric

In [2]:
from torch_geometric.datasets import Planetoid

In [3]:
dset = Planetoid(root=".",name="Cora")

In [4]:
data = dset[0]

In [5]:
print(f"Dataset: {dset}")
print('-----------------')
print(f'Number of graphs: {len(dset)}')
print(f'Number of nodes: {data.x.shape[0]}')
print(f'Number of features: {dset.num_features}')
print(f'Number of classes: {dset.num_classes}')

Dataset: Cora()
-----------------
Number of graphs: 1
Number of nodes: 2708
Number of features: 1433
Number of classes: 7


In [6]:
print(f"Graph:")
print("--------")
print(f"Edge are directed: {data.is_directed()}")
print(f"Graph hash isolated nodes: {data.has_isolated_nodes()}")
print(f"Graph has loops: {data.has_self_loops()}")

Graph:
--------
Edge are directed: False
Graph hash isolated nodes: False
Graph has loops: False


In [7]:
from torch_geometric.datasets import FacebookPagePage

In [8]:
dset = FacebookPagePage(root=".")

In [9]:
data = dset[0]

In [10]:
print(f"Dataset: {dset}")
print('-----------------')
print(f'Number of graphs: {len(dset)}')
print(f'Number of nodes: {data.x.shape[0]}')
print(f'Number of features: {dset.num_features}')
print(f'Number of classes: {dset.num_classes}')

Dataset: FacebookPagePage()
-----------------
Number of graphs: 1
Number of nodes: 22470
Number of features: 128
Number of classes: 4


In [11]:
print(f"Graph:")
print("--------")
print(f"Edge are directed: {data.is_directed()}")
print(f"Graph hash isolated nodes: {data.has_isolated_nodes()}")
print(f"Graph has loops: {data.has_self_loops()}")

Graph:
--------
Edge are directed: False
Graph hash isolated nodes: False
Graph has loops: True


In [12]:
data.train_mask = range(18000)
data.val_mask = range(18001, 20000)
data.test_mask = range(20001, 22470)

In [13]:
import torch_geometric.transforms as T

In [14]:
dset = Planetoid(root=".",name="Cora")
data = dset[0]

In [15]:
import pandas as pd

In [16]:
df_x = pd.DataFrame(data.x.numpy())
df_x['label'] = pd.DataFrame(data.y)

In [17]:
df_x

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1424,1425,1426,1427,1428,1429,1430,1431,1432,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2703,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
2704,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
2705,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
2706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3


In [18]:
import torch 
from torch.nn import Linear
import torch.nn.functional as F

In [19]:
def accuracy(y_pred, y_true):
    return torch.sum(y_pred == y_true) / len(y_true)

In [20]:
class MLP(torch.nn.Module):
    def __init__(self, dim_in, dim_h, dim_out):
        super().__init__()
        self.linear1 = Linear(dim_in, dim_h)
        self.linear2 = Linear(dim_h, dim_out)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.relu(x)
        x = self.linear2(x)
        return F.log_softmax(x, dim=1)

    def fit(self, data, epochs):
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=0.01, weight_decay=5e-4)
        self.train()
        for epoch in range(epochs+1):
            optimizer.zero_grad()
            out = self(data.x)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc = accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f"Epock {epoch:>3} | Train Loss: {loss:.3f} | Train Acc: {acc*100:>5.2f}% |Val Loss: {val_loss:.2f} | Val Acc: {val_acc*100:.2f}%")
    def test(self, data):
        self.eval()
        out = self(data.x)
        acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
        return acc

In [21]:
mlp = MLP(dset.num_features, 16, dset.num_classes)

In [22]:
print(mlp)

MLP(
  (linear1): Linear(in_features=1433, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=7, bias=True)
)


In [23]:
mlp.fit(data, epochs=100)

Epock   0 | Train Loss: 1.954 | Train Acc: 14.29% |Val Loss: 1.96 | Val Acc: 7.20%
Epock  20 | Train Loss: 0.102 | Train Acc: 100.00% |Val Loss: 1.42 | Val Acc: 50.60%
Epock  40 | Train Loss: 0.011 | Train Acc: 100.00% |Val Loss: 1.57 | Val Acc: 49.80%
Epock  60 | Train Loss: 0.006 | Train Acc: 100.00% |Val Loss: 1.59 | Val Acc: 48.00%
Epock  80 | Train Loss: 0.008 | Train Acc: 100.00% |Val Loss: 1.46 | Val Acc: 51.20%
Epock 100 | Train Loss: 0.009 | Train Acc: 100.00% |Val Loss: 1.40 | Val Acc: 53.80%


In [24]:
acc = mlp.test(data)
print(f'MLP test accuracy: {acc*100:.2f}%')

MLP test accuracy: 53.20%


### classifying nodes with vanilla graph neural nets

In [25]:
class VanillaGNNLayer(torch.nn.Module):
    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.linear = Linear(dim_in, dim_out, bias=False)
    def forward(self, x, adjacency):
        x = self.linear(x)
        x = torch.sparse.mm(adjacency, x)
        return x 

In [26]:
from torch_geometric.utils import to_dense_adj

adjacency = to_dense_adj(data.edge_index) [0]
adjacency += torch.eye(len(adjacency))

In [27]:
adjacency

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 0., 1., 1.]])

In [30]:
class VanillaGNN(torch.nn.Module):
    def __init__(self, dim_in, dim_h, dim_out):
        super().__init__()
        self.gnn1 = VanillaGNNLayer(dim_in, dim_h)
        self.gnn2 = VanillaGNNLayer(dim_h, dim_out)
    def forward(self, x, adjacency):
        h = self.gnn1(x, adjacency)
        h = torch.relu(h)
        h = self.gnn2(h,adjacency)
        return F.log_softmax(h,dim=1)
    def fit(self, data, epochs):
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=0.01, weight_decay=5e-4)
        self.train()
        for epoch in range(epochs+1):
            optimizer.zero_grad()
            out = self(data.x, adjacency)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc = accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f'Epoch {epoch:>3} | Train Loss:{loss:.3f} | Train Acc: {acc*100:>5.2f} % | Val Loss: {val_loss:.2f} | Val Acc: {val_acc*100:.2f}%')
    def test(self, data):
        self.eval()
        out = self(data.x, adjacency)
        acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
        return acc

In [31]:
gnn = VanillaGNN(dset.num_features, 16, dset.num_classes)
print(gnn)
gnn.fit(data, epochs=100)
acc = gnn.test(data)
print(f'\nGNN test accuracy: {acc*100:.2f}%')

VanillaGNN(
  (gnn1): VanillaGNNLayer(
    (linear): Linear(in_features=1433, out_features=16, bias=False)
  )
  (gnn2): VanillaGNNLayer(
    (linear): Linear(in_features=16, out_features=7, bias=False)
  )
)
Epoch   0 | Train Loss:2.235 | Train Acc: 12.86 % | Val Loss: 2.20 | Val Acc: 14.20%
Epoch  20 | Train Loss:0.099 | Train Acc: 99.29 % | Val Loss: 1.47 | Val Acc: 76.60%
Epoch  40 | Train Loss:0.016 | Train Acc: 100.00 % | Val Loss: 2.03 | Val Acc: 75.80%
Epoch  60 | Train Loss:0.007 | Train Acc: 100.00 % | Val Loss: 2.18 | Val Acc: 76.00%
Epoch  80 | Train Loss:0.004 | Train Acc: 100.00 % | Val Loss: 2.18 | Val Acc: 75.60%
Epoch 100 | Train Loss:0.002 | Train Acc: 100.00 % | Val Loss: 2.17 | Val Acc: 76.00%

GNN test accuracy: 77.00%
