In [2]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.datasets import load_iris
from sklearn.metrics.pairwise import cosine_similarity
import ipysigma

import networkx as nx
import torch
import torch.nn as nn
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch_geometric.data import Data




In [3]:
device = torch.device('cpu')

In [4]:
G = nx.Graph()
G_plot = nx.Graph()

In [5]:
iris = load_iris()
data = np.hstack((iris['data'],[[f'Gerard {i + 1}'] for i in range(len(iris['data']))]))
y_true = iris['target']
y_true

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [6]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [7]:
similarity_matrix = cosine_similarity(iris['data'],iris['data'])
similarity_matrix

array([[1.        , 0.99857916, 0.99998735, ..., 0.89178641, 0.8839093 ,
        0.88670276],
       [0.99857916, 1.        , 0.99879145, ..., 0.90415045, 0.89380008,
        0.89787445],
       [0.99998735, 0.99879145, 1.        , ..., 0.89362867, 0.88566903,
        0.88845942],
       ...,
       [0.89178641, 0.90415045, 0.89362867, ..., 1.        , 0.99780473,
        0.99912584],
       [0.8839093 , 0.89380008, 0.88566903, ..., 0.99780473, 1.        ,
        0.99886925],
       [0.88670276, 0.89787445, 0.88845942, ..., 0.99912584, 0.99886925,
        1.        ]])

In [8]:
minimum_similarity = 0.90

In [9]:
for i in tqdm(range(len(similarity_matrix))):
    if "Gerard " + str(i + 1) not in G_plot.nodes:
        G_plot.add_node("Gerard " + str(i + 1), sepal_length=iris['data'][i][0], sepal_width=iris['data'][i][1],
                   petal_length=iris['data'][i][2], petal_width=iris['data'][i][3])
        G.add_node(i, sepal_length=iris['data'][i][0], sepal_width=iris['data'][i][1],
                   petal_length=iris['data'][i][2], petal_width=iris['data'][i][3])
    for j in range(i):
        if "Gerard " + str(j + 1) not in G.nodes:
            G_plot.add_node("Gerard " + str(j + 1), sepal_length=iris['data'][j][0], sepal_width=iris['data'][j][1],
                       petal_length=iris['data'][j][2], petal_width=iris['data'][j][3])
            G.add_node(j, sepal_length=iris['data'][j][0], sepal_width=iris['data'][j][1],
                       petal_length=iris['data'][j][2], petal_width=iris['data'][j][3])
        if similarity_matrix[i][j] >= minimum_similarity:
            G_plot.add_edge("Gerard " + str(i + 1), "Gerard " + str(j + 1), weight=similarity_matrix[i][j])
            G.add_edge(i, j, weight=similarity_matrix[i][j])

print(nx.density(G))
print(nx.density(G_plot))

100%|██████████| 150/150 [00:00<00:00, 2382.02it/s]

0.8083221476510067
0.8083221476510067





In [10]:
x = torch.tensor(iris['data'], dtype=torch.float)
edges = list(G.edges)

edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
data = Data(x=x, edge_index=edge_index)
data.validate(raise_on_error=True)


True

In [11]:
data.edge_index.T

tensor([[  0,   1],
        [  0,   2],
        [  0,   3],
        ...,
        [147, 148],
        [147, 149],
        [148, 149]])

In [12]:
num_nodes = len(data.x)
train_ratio, val_ratio, test_ratio = 0.7, 0.1, 0.2

num_train = int(train_ratio * num_nodes)
num_val = int(val_ratio * num_nodes)
num_test = num_nodes - num_train - num_val

# Create masks for train, validation, and test nodes
perm = torch.randperm(num_nodes)
train_mask = perm[:num_train]
val_mask = perm[num_train:num_train + num_val]
test_mask = perm[num_train + num_val:]

# Apply masks to the data
data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)

data.train_mask[train_mask] = 1
data.val_mask[val_mask] = 1
data.test_mask[test_mask] = 1

# Access the resulting splits
train_nodes = data.train_mask.nonzero().view(-1).tolist()
val_nodes = data.val_mask.nonzero().view(-1).tolist()
test_nodes = data.test_mask.nonzero().view(-1).tolist()

print("Train nodes:", train_nodes)
print("Validation nodes:", val_nodes)
print("Test nodes:", test_nodes)

Train nodes: [1, 3, 4, 6, 7, 8, 9, 10, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 44, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 66, 67, 68, 70, 71, 72, 77, 78, 79, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 96, 98, 100, 101, 103, 104, 105, 106, 110, 111, 113, 114, 115, 117, 118, 119, 120, 122, 123, 124, 126, 128, 129, 130, 133, 134, 136, 139, 141, 142, 143, 144, 145, 148, 149]
Validation nodes: [2, 5, 40, 41, 45, 46, 65, 69, 97, 108, 109, 125, 132, 135, 138]
Test nodes: [0, 11, 12, 15, 20, 25, 30, 35, 42, 43, 58, 64, 73, 74, 75, 76, 81, 92, 99, 102, 107, 112, 116, 121, 127, 131, 137, 140, 146, 147]


In [13]:
data.y = torch.tensor(y_true, dtype=torch.long)

In [14]:
data

Data(x=[150, 4], edge_index=[2, 9033], train_mask=[150], val_mask=[150], test_mask=[150], y=[150])

In [15]:
def train_node_classifier(model, graph, optimizer, criterion, n_epochs=200):

    for epoch in range(1, n_epochs + 1):
        model.train()
        optimizer.zero_grad()
        out = model(graph)
        loss = criterion(out[graph.train_mask], graph.y[graph.train_mask])
        loss.backward()
        optimizer.step()

        pred = out.argmax(dim=1)
        acc = eval_node_classifier(model, graph, graph.val_mask)

        if epoch % 10 == 0:
            print(f'Epoch: {epoch:03d}, Train Loss: {loss:.3f}, Val Acc: {acc:.3f}')

    return model


def eval_node_classifier(model, graph, mask):

    model.eval()
    pred = model(graph).argmax(dim=1)
    correct = (pred[mask] == graph.y[mask]).sum()
    acc = int(correct) / int(mask.sum())

    return acc

In [16]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(len(iris['feature_names']), 16)
        self.conv2 = GCNConv(16, len(iris['target_names']))

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        output = self.conv2(x, edge_index)

        return output

In [17]:
gcn = GCN().to(device)
optimizer_gcn = torch.optim.Adam(gcn.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()
gcn = train_node_classifier(gcn, data, optimizer_gcn, criterion)

test_acc = eval_node_classifier(gcn, data, data.test_mask)
print(f'Test Acc: {test_acc:.3f}')

Epoch: 010, Train Loss: 1.189, Val Acc: 0.200
Epoch: 020, Train Loss: 1.042, Val Acc: 0.400
Epoch: 030, Train Loss: 0.942, Val Acc: 0.600
Epoch: 040, Train Loss: 0.836, Val Acc: 0.600
Epoch: 050, Train Loss: 0.747, Val Acc: 0.600
Epoch: 060, Train Loss: 0.661, Val Acc: 0.600
Epoch: 070, Train Loss: 0.570, Val Acc: 0.600
Epoch: 080, Train Loss: 0.488, Val Acc: 0.733
Epoch: 090, Train Loss: 0.418, Val Acc: 0.867
Epoch: 100, Train Loss: 0.360, Val Acc: 1.000
Epoch: 110, Train Loss: 0.313, Val Acc: 1.000
Epoch: 120, Train Loss: 0.276, Val Acc: 1.000
Epoch: 130, Train Loss: 0.248, Val Acc: 1.000
Epoch: 140, Train Loss: 0.225, Val Acc: 1.000
Epoch: 150, Train Loss: 0.208, Val Acc: 1.000
Epoch: 160, Train Loss: 0.193, Val Acc: 1.000
Epoch: 170, Train Loss: 0.181, Val Acc: 1.000
Epoch: 180, Train Loss: 0.172, Val Acc: 1.000
Epoch: 190, Train Loss: 0.162, Val Acc: 1.000
Epoch: 200, Train Loss: 0.155, Val Acc: 1.000
Test Acc: 1.000


In [18]:
y_pred = torch.argmax(torch.nn.functional.softmax(gcn.forward(data)), axis=1).detach().numpy()

  y_pred = torch.argmax(torch.nn.functional.softmax(gcn.forward(data)), axis=1).detach().numpy()


In [None]:
ipysigma.Sigma(G, node_color=y_pred)