# GCN Model

In [23]:
import torch
from torch_geometric.data import Data
import numpy as np
import scipy.sparse as sp
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
import json
from torch_geometric.utils import dense_to_sparse

adj_matrix = sp.load_npz('./adj.npz')
features = np.load('./features.npy')
labels = np.load('./labels.npy')
splits = json.load(open('./splits.json'))
train_idx, test_idx = splits['idx_train'], splits['idx_test']

features = torch.FloatTensor(features)
labels = torch.LongTensor(labels)

full_labels = -1 * torch.ones(size=(features.shape[0],), dtype=torch.int64)
full_labels[train_idx] = labels

edge_index, _ = dense_to_sparse(torch.Tensor(adj_matrix.toarray()))

data = Data(x=features, edge_index=edge_index, y=full_labels)

data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)

num_train = int(len(train_idx) * 0.75)

train_indices = train_idx[:num_train]
val_indices = train_idx[num_train:]

data.train_mask[train_indices] = True
data.val_mask[val_indices] = True
data.test_mask[test_idx] = True

num_classes = (data.y.max() + 1).item()

class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(data.num_node_features, 128)
        self.conv2 = GCNConv(128, 128)
        self.conv3 = GCNConv(128, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv3(x, edge_index)
        return F.log_softmax(x, dim=1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = data.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-3)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask], ignore_index=-1)
    loss.backward()
    optimizer.step()
    if epoch % 50 == 0:
        print('Epoch {0}: {1}'.format(epoch, loss.item()))

model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
acc = int(correct) / int(data.val_mask.sum())
print(f'Accuracy: {acc:.4f}')



Epoch 0: 1.9533114433288574
Epoch 50: 0.10230010747909546
Epoch 100: 0.09286082535982132
Epoch 150: 0.09389713406562805
Accuracy: 0.8468


In [24]:
test_preds = pred[data.test_mask]
test_idx
test_preds = pred[test_idx]
test_preds[0:10]

tensor([1, 3, 2, 1, 1, 2, 3, 1, 1, 1])

In [25]:
np.savetxt('submission.txt', test_preds.cpu().numpy(), fmt='%d')