In [None]:
#!pip install torch torchvision torchaudio
#!pip install torch-geometric

### Importing  libraries

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import tensorflow as tf
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.nn import RGCNConv

from neo4j import GraphDatabase
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, optimizers, losses, metrics, Model

from stellargraph import StellarGraph
from stellargraph.layer import GraphSAGE
from stellargraph.data import UnsupervisedSampler

  from .autonotebook import tqdm as notebook_tqdm


### Connect to the database and fetch graph data from Neo4j

In [2]:
# Connect to the Neo4j database
uri = "bolt://localhost:7687"
username = "neo4j"
password = "OLIV00%%"

driver = GraphDatabase.driver(uri, auth=(username, password))

# Function to fetch graph data from Neo4j
def fetch_graph_data():
    with driver.session() as session:
        result = session.run(
            "MATCH (n1)-[r]->(n2) RETURN id(n1) AS start, id(n2) AS end, type(r) AS relationship"
        )
        data = [(record["start"], record["end"], record["relationship"]) for record in result]
    return data

# Load graph data
graph_data = fetch_graph_data()

In [3]:
# Split data into training and test sets
train_data, test_data = train_test_split(graph_data, test_size=0.5, random_state=42)

graph_data

[(23, 15, 'Component'),
 (17, 29, 'Caused_by'),
 (17, 0, 'Caused_by'),
 (17, 1, 'Caused_by'),
 (17, 2, 'Caused_by'),
 (18, 3, 'Caused_by'),
 (19, 4, 'Caused_by'),
 (19, 5, 'Caused_by'),
 (19, 6, 'Caused_by'),
 (20, 7, 'Caused_by'),
 (20, 8, 'Caused_by'),
 (21, 9, 'Caused_by'),
 (22, 10, 'Caused_by'),
 (22, 11, 'Caused_by'),
 (22, 12, 'Caused_by'),
 (23, 13, 'Component'),
 (23, 14, 'Component')]

# Graph Convolution Network

### Convert the graph data into adjacency matrix

In [6]:
# Create a directed graph using NetworkX
G = nx.DiGraph()

# Add nodes and edges to the graph
for start, end, relationship in graph_data:
    G.add_node(start)
    G.add_node(end)
    G.add_edge(start, end, relationship=relationship)

# Create the adjacency matrix
adj_matrix = nx.to_numpy_matrix(G, dtype=int)

# Get the node order in the adjacency matrix
node_order = sorted(G.nodes())

# Create a dictionary to map node IDs to indices in the adjacency matrix
node_index_map = {node_id: index for index, node_id in enumerate(node_order)}

# Rearrange the adjacency matrix based on the node order
adj_matrix_reordered = np.array([[adj_matrix[node_index_map[start], node_index_map[end]] for end in node_order] for start in node_order])

# Print the adjacency matrix
print(adj_matrix_reordered)

[[0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [7]:
# Print the adjacency matrix shape
print(f"Adjacency matrix shape : {adj_matrix_reordered.shape}")

Adjacency matrix shape : (24, 24)


### Nodes feature  

In [29]:
# Convert the adjacency matrix to a tensor
adj_matrix_tensor = torch.tensor(adj_matrix_reordered, dtype=torch.float)

# Create an identity matrix to represent node features (assuming no node features, only structural information)
num_nodes = adj_matrix_tensor.shape[0]
identity_matrix = torch.eye(num_nodes)

# Concatenate the adjacency matrix and identity matrix as node features
node_features = torch.cat((adj_matrix_tensor, identity_matrix), dim=1)

# Create the edge index tensor for PyTorch Geometric
edge_index = torch.tensor(np.array(G.edges()).T, dtype=torch.long)

### Model building
### DIM = 32

In [30]:
# Define the Graph Convolutional Network (GCN) model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = nn.Linear(input_dim, hidden_dim)
        self.conv2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x, edge_index):
        x = torch.relu(self.conv1(x))
        x = self.conv2(x)
        return x

# Initialize the GCN model
input_dim = node_features.shape[1]
hidden_dim = 64
output_dim = 32
model = GCN(input_dim, hidden_dim, output_dim)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Convert the data to PyTorch Geometric Data object
data = Data(x=node_features, edge_index=edge_index)

# Training loop
num_epochs = 150
for epoch in range(num_epochs + 1):
    optimizer.zero_grad()
    output = model(data.x, data.edge_index)
    loss = criterion(output[data.edge_index[0]], data.edge_index[1])
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")

# You can now use the trained model to make predictions on new data or perform link prediction tasks.

Epoch: 0, Loss: 3.4803035259246826
Epoch: 10, Loss: 2.547050714492798
Epoch: 20, Loss: 1.4371734857559204
Epoch: 30, Loss: 1.0413086414337158
Epoch: 40, Loss: 0.9984414577484131
Epoch: 50, Loss: 0.9928731322288513
Epoch: 60, Loss: 0.9915652275085449
Epoch: 70, Loss: 0.99104905128479
Epoch: 80, Loss: 0.9908137321472168
Epoch: 90, Loss: 0.9906622767448425
Epoch: 100, Loss: 0.990551769733429
Epoch: 110, Loss: 0.9904589056968689
Epoch: 120, Loss: 0.9903779029846191
Epoch: 130, Loss: 0.990305483341217
Epoch: 140, Loss: 0.9902400970458984
Epoch: 150, Loss: 0.9901806116104126


### New links prediction

In [34]:
# Function to predict new links using the trained model
def predict_new_links(model, data, new_edges):
    model.eval()
    with torch.no_grad():
        output = model(data.x, data.edge_index)
        predicted_labels = output[new_edges[0]]
        predicted_labels = (predicted_labels >= 0.5).int()  # Threshold predictions
    return predicted_labels

# Predict new links for a set of new edge indices
new_edges = torch.tensor([[13, 17], [14, 17], [15, 17]], dtype=torch.long).t()  # Format: (start_nodes, end_nodes)
predictions = predict_new_links(model, data, new_edges)

# The predictions will be a tensor containing 0s and 1s, where 1 indicates - link and 0 indicates - no link.
print("Predicted link presence:")
print(predictions)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)


### DIM = 1

In [32]:
# Define the Graph Convolutional Network (GCN) model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(GCN, self).__init__()
        self.conv1 = nn.Linear(input_dim, hidden_dim)
        self.conv2 = nn.Linear(hidden_dim, 1)  # Output dimension changed to 1 for binary classification
    
    def forward(self, x, edge_index):
        x = torch.relu(self.conv1(x))
        x = self.conv2(x)
        return x

# Initialize the GCN model
input_dim = node_features.shape[1]
hidden_dim = 64
model = GCN(input_dim, hidden_dim)

# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Convert the data to PyTorch Geometric Data object
data = Data(x=node_features, edge_index=edge_index)

# Training loop
num_epochs = 150
for epoch in range(num_epochs + 1):
    optimizer.zero_grad()
    output = model(data.x, data.edge_index).squeeze()  # Squeeze the output to remove the singleton dimension
    loss = criterion(output[data.edge_index[0]], data.edge_index[1].float())
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")


Epoch: 0, Loss: 0.8947755694389343
Epoch: 10, Loss: -8.375316619873047
Epoch: 20, Loss: -29.453100204467773
Epoch: 30, Loss: -68.98267364501953
Epoch: 40, Loss: -131.4411163330078
Epoch: 50, Loss: -220.2678985595703
Epoch: 60, Loss: -338.01434326171875
Epoch: 70, Loss: -486.4073486328125
Epoch: 80, Loss: -667.0595703125
Epoch: 90, Loss: -881.8046264648438
Epoch: 100, Loss: -1131.091796875
Epoch: 110, Loss: -1415.167724609375
Epoch: 120, Loss: -1734.1153564453125
Epoch: 130, Loss: -2087.921630859375
Epoch: 140, Loss: -2476.520751953125
Epoch: 150, Loss: -2899.814208984375


### New links prediction

In [35]:
# Function to predict new links using the trained model
def predict_new_links(model, data, new_edges):
    model.eval()
    with torch.no_grad():
        output = model(data.x, data.edge_index)
        predicted_labels = output[new_edges[0]]
        predicted_labels = (predicted_labels >= 0.5).int()  # Threshold predictions
    return predicted_labels

# Predict new links for a set of new edge indices
new_edges = torch.tensor([[13, 17], [14, 17], [15, 17]], dtype=torch.long).t()  # Format: (start_nodes, end_nodes)
predictions = predict_new_links(model, data, new_edges)

# The predictions will be a tensor containing 0s and 1s, where 1 indicates - link and 0 indicates - no link.
print("Predicted link presence:")
print(predictions)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)


# Relational Graph Convolutional Network

In [15]:
class RGCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_relations=1, num_bases=30):
        super(RGCN, self).__init__()
        self.conv1 = RGCNConv(input_dim, hidden_dim, num_relations, num_bases=num_bases)
        self.conv2 = RGCNConv(hidden_dim, hidden_dim, num_relations, num_bases=num_bases)
        self.out = nn.Linear(hidden_dim, 1)
    
    def forward(self, x, edge_index, edge_type):
        x = torch.relu(self.conv1(x, edge_index, edge_type))
        x = torch.relu(self.conv2(x, edge_index, edge_type))
        x = self.out(x)
        return x

input_dim = node_features.shape[1]
hidden_dim = 64
model = RGCN(input_dim, hidden_dim)

In [16]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

data = Data(x=node_features, edge_index=edge_index)

num_epochs = 150
for epoch in range(num_epochs + 1):
    optimizer.zero_grad()
    output = model(data.x, data.edge_index, data.edge_type)  # Pass the edge_type to the model
    loss = criterion(output[data.edge_index[0]], data.edge_index[1].float())
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")

AttributeError: 'GlobalStorage' object has no attribute 'edge_type'