In [1]:
!pip install torch_geometric
# !pip install torch_sparse

Defaulting to user installation because normal site-packages is not writeable


In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.utils import from_scipy_sparse_matrix
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import json
import scipy.sparse as sp

In [41]:
# Load the data
adjacency_matrix = sp.load_npz('adj.npz')
feature_matrix  = np.load('features.npy')
labels = np.load('labels.npy')
splits = json.load(open('splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

labels_train = np.zeros(2480)
labels_train[labels] = labels
# Find edge index
edge_index, _ = from_scipy_sparse_matrix(adjacency_matrix)


In [24]:
# Create the data object
data = Data(x=torch.tensor(feature_matrix, dtype=torch.float),
            edge_index=edge_index,
            y=torch.tensor(labels, dtype=torch.long))
data

Data(x=[2480, 1390], edge_index=[2, 10100], y=[496])

In [25]:
# Initialize the train_mask and test_mask
train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
train_mask[idx_train] = True

test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
test_mask[idx_test] = True

# Update the data object with the masks
data.train_mask = train_mask
data.test_mask = test_mask

data

Data(x=[2480, 1390], edge_index=[2, 10100], y=[496], train_mask=[2480], test_mask=[2480])

In [22]:
# Define GCN model
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_hidden, num_classes):
        super().__init__()
        self.conv1 = GCNConv(num_node_features, num_hidden)
        self.conv2 = GCNConv(num_hidden, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [None]:
model = GCN(num_node_features=data.num_node_features,
            num_hidden=512,  # Example: number of hidden units
            num_classes=data.y.max().item() + 1)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


for epoch in range(1000):  # Example: number of epochs
    model.train()
    optimizer.zero_grad()
    out = model(data)
    # Ensure 'out' has predictions for all nodes, then apply the mask
    loss = criterion(out[data.train_mask], data.y)
    loss.backward()
    optimizer.step()

    # Optional: Print the loss every 10 epochs
    if epoch % 100 == 0:
        print(f'Epoch {epoch}: Loss {loss.item()}')


Epoch 0: Loss 1.9578694105148315
Epoch 100: Loss 0.005529898684471846
Epoch 200: Loss 0.003023304045200348
Epoch 300: Loss 0.004428666550666094
Epoch 400: Loss 0.00033935607643797994


In [None]:
model.eval()  # Set model to evaluation mode

  # Get node features
x = data.x

if hasattr(data, 'train_mask'):
  y = torch.full((data.num_nodes,), -1, dtype=torch.long)
  y[data.train_mask] = data.y
  #  - Approach 2 (separate data): No labels needed for test data
else:
  y = None

  # Get predictions
pred = model(data)
pred = model(data)
pred = F.log_softmax(pred, dim=1)  # Apply softmax activation
predicted_classes = pred.argmax(dim=1)  # Get class with highest probability
# Calculate evaluation metrics (replace with your desired metrics)
# Here's an example using accuracy
correct = (pred.argmax(dim=1) == y).sum().item()
accuracy = correct / (data.num_nodes if y is not None else len(pred))

preds = predicted_classes[idx_test]
np.savetxt('submission.txt', preds, fmt='%d')
print(f'accuracy : {accuracy}')




In [32]:
preds = pred[idx_test]
np.savetxt('submission.txt', preds, fmt='%d')