In [None]:
import torch
import numpy as np
import csv
import pandas as pd

import json

from torch_geometric.data import Data
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import GCNConv, Sequential

import torch.nn.functional as F
from torch.optim import SGD
from torch import nn

from tqdm import tqdm

from collections import defaultdict

In [None]:
device = 'cuda'

# Load Data

In [None]:
raw_np = np.load('datasets/facebook.npz')

In [None]:
edges = torch.tensor(raw_np['edges'].T).to(device)
node_features = torch.tensor(raw_np['features']).to(device)
target = torch.tensor(raw_np['target']).to(device)

In [None]:
all_indices = np.arange(node_features.shape[0])
train_indices = np.random.choice(all_indices, int(node_features.shape[0]*0.8), replace=False)
test_indices = all_indices[~np.isin(all_indices, train_indices)]

In [None]:
train_mask = np.isin(all_indices, train_indices)
train_mask = torch.tensor(train_mask)

test_mask = ~np.isin(all_indices, train_indices)
test_mask = torch.tensor(test_mask)

In [None]:
graph = Data(
    x=node_features, 
    edge_index=edges, 
    y=target,
    train_mask = train_mask, 
    test_mask = test_mask
).contiguous().to(device)

In [None]:
graph_nl = NeighborLoader(
    data=graph, 
    num_neighbors=[-1],
    batch_size=64
)

# Build Model

In [None]:
class GCN(nn.Module):
    def __init__(self, in_channels, n_classes, hidden_layers = []):
        super().__init__()
        if hidden_layers:
            self.conv_layers = nn.ModuleList(
                [
                    GCNConv(in_channels, hidden_layers[0]),
                ]
            )

            for i in range(len(hidden_layers)-1):
                self.conv_layers.append(
                    GCNConv(hidden_layers[i], hidden_layers[i+1])
                )

            self.conv_layers.append(GCNConv(hidden_layers[-1], n_classes))
        else:
            self.conv_layers = GCNConv(in_channels, n_classes)


    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        for i in range(len(self.conv_layers)-1):
            x = self.conv_layers[i](x, edge_index)
            x = F.relu(x)
        x = self.conv_layers[-1](x, edge_index)
        return x


In [None]:
model = GCN(graph.x.shape[1], 4, [64]).to(device)

# Train Model

In [None]:
gcn_model = GCN(graph.x.shape[1], 4, hidden_layers=[64, 32, 16]).to(device)

optimizer = SGD(gcn_model.parameters(), lr=0.5)
CELoss = torch.nn.CrossEntropyLoss()

results = defaultdict(list)
epochs = tqdm(range(200))
for epoch in epochs:
    gcn_model.train()
    optimizer.zero_grad()
    loss = 0
    for batch in graph_nl:
        pred = gcn_model(batch)
        loss += CELoss(pred[batch.train_mask], batch.y[batch.train_mask])
    loss /= len(graph_nl)
    loss.backward()
    optimizer.step()

    results['train_loss'].append(loss.to('cpu').detach().float())

    gcn_model.eval()
    pred = gcn_model(graph)
    results['test_loss'].append(CELoss(pred[graph.test_mask], graph.y[graph.test_mask]).to('cpu').detach().float())

    results['test accuracy'].append(((pred[graph.test_mask].argmax(axis=1)==graph.y[graph.test_mask]).sum()/len(graph.y[graph.test_mask])).to('cpu').detach().float())
    results['full accuracy'].append(((pred.argmax(axis=1)==graph.y).sum()/len(graph.y)).to('cpu').detach().float())
    
    epochs.set_description(f"{epoch}/{len(epochs)}, Loss {loss:.4f}")

    torch.cuda.empty_cache()

In [None]:
import matplotlib.pyplot as plt

epochs = np.arange(len(epochs))
n_plots = len(results)
nrows = int(np.ceil(n_plots/2))

fig, ax = plt.subplots(ncols=2, nrows=nrows, figsize=(15, 8))

count = 0
for result in results:
    col = count%2
    row = count//2
    ax[row, col].plot(results[result])
    ax[row, col].set_xlabel("epoch")
    ax[row, col].set_ylabel(result)
    count += 1


In [None]:
results['test accuracy']

In [None]:
plt.plot(results['test_accuracy'], results['test_accuracy'])