In [None]:
import torch
print(torch.__version__)

In [None]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.5.1+cu124.html

In [None]:
!pip install torch_geometric

In [None]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from torch_geometric.nn import Node2Vec
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from node_embeddings import train
from torch_geometric.datasets import Actor
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torch_geometric.nn import GCNConv, Linear, SAGEConv
from torch.nn import Dropout

In [None]:
data = Actor('../data')
data

In [None]:
dataset = data[0]
dataset

In [None]:
labels = dataset.y.detach().cpu().numpy()
labels

In [None]:
class GCN(torch.nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.conv1 = SAGEConv(-1, 128)  #128
        self.conv2 = SAGEConv(-1, 256)
        self.conv3 = SAGEConv(-1, 512)

        self.linear1 = Linear(512, 256)
        self.linear2 = Linear(256, 128)
        self.linear3 = Linear(128, num_classes)

        self.dropout = Dropout(0.2) # 0.3

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)

        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)

        x = self.conv3(x, edge_index).relu()
        x = self.dropout(x)

        x = self.linear1(x)
        x = self.dropout(x)

        x = self.linear2(x)
        x = self.dropout(x)

        x = self.linear3(x)

        return x

In [None]:
model = GCN(num_classes=5)

In [None]:
optimizer = Adam(model.parameters(), lr=0.001)

In [None]:
criterion = CrossEntropyLoss()

In [None]:
def train(model, dataset, optimizer, criterion, epochs=5):
    if len(dataset.train_mask.shape) > 1:
        train_mask = dataset.train_mask[:, 0]
    else:
        train_mask = dataset.train_mask

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(dataset.x, dataset.edge_index)
        loss = criterion(out[train_mask],
                         dataset.y[train_mask])
        loss.backward()
        optimizer.step()

        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

In [None]:
train(model=model, dataset=dataset, optimizer=optimizer, criterion=criterion, epochs=300)

In [None]:
node_embeddings = model.conv1(dataset.x, dataset.edge_index).detach().cpu().numpy()

In [None]:
labels = dataset.y.detach().cpu().numpy()

In [None]:
tsne = TSNE(n_components=2)
node_embeddings_2d = tsne.fit_transform(node_embeddings)
plt.scatter(node_embeddings_2d[:, 0], node_embeddings_2d[:, 1],
            c=labels, cmap='jet', alpha=0.7)

In [None]:
out = model(dataset.x, dataset.edge_index)
preds = out.argmax(dim=1)

In [None]:
test_idx = dataset.test_mask.nonzero(as_tuple=True)[0]
test_preds = preds[test_idx].cpu().numpy()
test_labels = data.y[test_idx].cpu().numpy()

In [None]:
accuracy_score(test_labels, test_preds)

In [None]:
print(classification_report(test_labels, test_preds))