In [None]:
import torch
import numpy as np
import csv
import pandas as pd

import json

from torch_geometric.data import Data
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import GCNConv

import torch.nn.functional as F
from torch.optim import SGD
from torch.nn import ReLU

from tqdm import tqdm

# Load Data

In [None]:
raw_np = np.load('datasets/facebook.npz')

In [None]:
edges = torch.tensor(raw_np['edges'].T)
node_features = torch.tensor(raw_np['features'])
target = torch.tensor(raw_np['target'])#, dtype=torch.int32)

In [None]:
node_features.shape[0]

In [None]:
all_indices = np.arange(node_features.shape[0])
train_mask = np.random.choice(all_indices, int(node_features.shape[0]*0.8), replace=False)
test_mask = all_indices[~np.isin(all_indices, train_mask)]

In [None]:
graph = Data(
    x=node_features, 
    edge_index=edges, 
    y=target,
    test_mask = test_mask
).contiguous()

In [None]:
graph_nl = NeighborLoader(
    data=graph, 
    num_neighbors=[-1],
    batch_size=32
)

# Build Model

In [None]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(128, 16)
        self.conv2 = GCNConv(16, 4)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return x

In [None]:
gcn_model = GCN()

# Train Model

In [None]:
gcn_model = GCN()
optimizer = SGD(gcn_model.parameters(), lr=0.001)
CELoss = torch.nn.CrossEntropyLoss()

epochs = tqdm(range(100))
for epoch in epochs:
    optimizer.zero_grad()

    loss = 0
    for batch in graph_nl:
        pred = gcn_model(batch)
        loss += CELoss(pred, batch.y)
    loss.backward()
    optimizer.step()
    epochs.set_description(f"{epoch}/{len(epochs)}, Loss {loss:.4f}")

    if loss > 1e5: break

In [None]:
batch.test_mask.shape

In [None]:
test_mask.shape