In [None]:
#!pip install torch torchvision torchaudio
#!pip install torch-geometric

### Importing  libraries

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import tensorflow as tf
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.nn import RGCNConv

from neo4j import GraphDatabase
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, optimizers, losses, metrics, Model

from stellargraph import StellarGraph
from stellargraph.layer import GraphSAGE
from stellargraph.data import UnsupervisedSampler

### Connect to the database and fetch graph data from Neo4j

In [127]:
# Connect to the Neo4j database
uri = "bolt://localhost:7687"
username = "neo4j"
password = "OLIV00%%"

driver = GraphDatabase.driver(uri, auth=(username, password))

# Function to fetch graph data from Neo4j
def fetch_graph_data():
    with driver.session() as session:
        result = session.run(
            "MATCH (n1)-[r]->(n2) RETURN id(n1) AS start, id(n2) AS end, type(r) AS relationship"
        )
        data = [(record["start"], record["end"], record["relationship"]) for record in result]
    return data

# Load graph data
graph_data = fetch_graph_data()

In [128]:
# Split data into training and test sets
train_data, test_data = train_test_split(graph_data, test_size=0.5, random_state=42)

graph_data

[(32, 7, 'interval'),
 (32, 8, 'interval'),
 (32, 9, 'interval'),
 (32, 10, 'interval'),
 (32, 11, 'interval'),
 (32, 12, 'interval'),
 (32, 13, 'interval'),
 (0, 45, 'min'),
 (1, 47, 'min'),
 (2, 49, 'min'),
 (3, 51, 'min'),
 (4, 53, 'min'),
 (5, 55, 'min'),
 (6, 57, 'min'),
 (51, 35, 'Probability of : 55% '),
 (45, 34, 'Probability of : 30% '),
 (46, 34, 'Probability of : 30% '),
 (0, 46, 'max'),
 (1, 48, 'max'),
 (2, 50, 'max'),
 (3, 52, 'max'),
 (4, 54, 'max'),
 (5, 56, 'max'),
 (6, 58, 'max'),
 (7, 45, 'min'),
 (8, 47, 'min'),
 (9, 49, 'min'),
 (10, 51, 'min'),
 (11, 53, 'min'),
 (12, 55, 'min'),
 (13, 57, 'min'),
 (47, 35, 'Probability of : 100% '),
 (48, 35, 'Probability of : 100% '),
 (49, 35, 'Probability of : 100% '),
 (40, 31, 'Measuring'),
 (40, 32, 'Measuring'),
 (41, 31, 'Measuring'),
 (41, 32, 'Measuring'),
 (42, 31, 'Measuring'),
 (42, 32, 'Measuring'),
 (43, 31, 'Measuring'),
 (43, 32, 'Measuring'),
 (57, 37, 'Probability of  90%'),
 (52, 35, 'Probability of : 90% '),


# Adamic Adar

In [129]:
# Perform Adamic-Adar link prediction
def adamic_adar_link_prediction(node1_id, node2_id):
    with driver.session() as session:
        query = (
            f"MATCH (n1)-[:COMMON]->(common_node)"
            f"<-[:COMMON]-(n2) WHERE id(n1)={node1_id} AND id(n2)={node2_id} "
            "RETURN count(common_node) AS adamic_adar_score"
        )
        result = session.run(query)
        return result.single()["adamic_adar_score"]

### Adamic Adar link prediction model evaluation

In [130]:
# Evaluate the link prediction model
def evaluate_link_prediction():
    y_true = []
    y_pred = []
    for node1_id, node2_id, _ in test_data:
        adamic_adar_score = adamic_adar_link_prediction(node1_id, node2_id)
        y_true.append(1)  # Link exists
        y_pred.append(1 if adamic_adar_score > 0 else 0)  # Predict link existence based on score

    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    
if __name__ == "__main__":
    evaluate_link_prediction()

Accuracy: 0.00


# Graph Convolution Network

### Convert the graph data into adjacency matrix

In [131]:
# Create a directed graph using NetworkX
G = nx.DiGraph()

# Add nodes and edges to the graph
for start, end, relationship in graph_data:
    G.add_node(start)
    G.add_node(end)
    G.add_edge(start, end, relationship=relationship)

# Create the adjacency matrix
adj_matrix = nx.to_numpy_matrix(G, dtype=int)

# Get the node order in the adjacency matrix
node_order = sorted(G.nodes())

# Create a dictionary to map node IDs to indices in the adjacency matrix
node_index_map = {node_id: index for index, node_id in enumerate(node_order)}

# Rearrange the adjacency matrix based on the node order
adj_matrix_reordered = np.array([[adj_matrix[node_index_map[start], node_index_map[end]] for end in node_order] for start in node_order])

# Print the adjacency matrix
print(adj_matrix_reordered)

[[0 1 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [132]:
# Print the adjacency matrix shape
print(f"Adjacency matrix shape : {adj_matrix_reordered.shape}")

Adjacency matrix shape : (59, 59)


### Nodes feature  

In [133]:
# Convert the adjacency matrix to a tensor
adj_matrix_tensor = torch.tensor(adj_matrix_reordered, dtype=torch.float)

# Create an identity matrix to represent node features (assuming no node features, only structural information)
num_nodes = adj_matrix_tensor.shape[0]
identity_matrix = torch.eye(num_nodes)

# Concatenate the adjacency matrix and identity matrix as node features
node_features = torch.cat((adj_matrix_tensor, identity_matrix), dim=1)

# Create the edge index tensor for PyTorch Geometric
edge_index = torch.tensor(np.array(G.edges()).T, dtype=torch.long)

### Model building
### DIM = 32

In [134]:
# Define the Graph Convolutional Network (GCN) model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = nn.Linear(input_dim, hidden_dim)
        self.conv2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x, edge_index):
        x = torch.relu(self.conv1(x))
        x = self.conv2(x)
        return x

# Initialize the GCN model
input_dim = node_features.shape[1]
hidden_dim = 64
output_dim = 32
model = GCN(input_dim, hidden_dim, output_dim)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Convert the data to PyTorch Geometric Data object
data = Data(x=node_features, edge_index=edge_index)

# Training loop
num_epochs = 150
for epoch in range(num_epochs + 1):
    optimizer.zero_grad()
    output = model(data.x, data.edge_index)
    loss = criterion(output[data.edge_index[0]], data.edge_index[1])
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")

# You can now use the trained model to make predictions on new data or perform link prediction tasks.

IndexError: Target 45 is out of bounds.

### DIM = 1

In [135]:
# Define the Graph Convolutional Network (GCN) model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(GCN, self).__init__()
        self.conv1 = nn.Linear(input_dim, hidden_dim)
        self.conv2 = nn.Linear(hidden_dim, 1)  # Output dimension changed to 1 for binary classification
    
    def forward(self, x, edge_index):
        x = torch.relu(self.conv1(x))
        x = self.conv2(x)
        return x

# Initialize the GCN model
input_dim = node_features.shape[1]
hidden_dim = 64
model = GCN(input_dim, hidden_dim)

# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Convert the data to PyTorch Geometric Data object
data = Data(x=node_features, edge_index=edge_index)

# Training loop
num_epochs = 150
for epoch in range(num_epochs + 1):
    optimizer.zero_grad()
    output = model(data.x, data.edge_index).squeeze()  # Squeeze the output to remove the singleton dimension
    loss = criterion(output[data.edge_index[0]], data.edge_index[1].float())
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")


Epoch: 0, Loss: -3.7078287601470947
Epoch: 10, Loss: -62.52656555175781
Epoch: 20, Loss: -226.18203735351562
Epoch: 30, Loss: -552.5494384765625
Epoch: 40, Loss: -1087.0587158203125
Epoch: 50, Loss: -1861.30078125
Epoch: 60, Loss: -2900.88037109375
Epoch: 70, Loss: -4221.28466796875
Epoch: 80, Loss: -5833.76953125
Epoch: 90, Loss: -7744.30712890625
Epoch: 100, Loss: -9956.375
Epoch: 110, Loss: -12472.0390625
Epoch: 120, Loss: -15292.298828125
Epoch: 130, Loss: -18417.43359375
Epoch: 140, Loss: -21847.212890625
Epoch: 150, Loss: -25581.078125


### New links prediction

In [136]:
# Function to predict new links using the trained model
def predict_new_links(model, data, new_edges):
    model.eval()
    with torch.no_grad():
        output = model(data.x, data.edge_index)
        predicted_labels = output[new_edges[0]]
        predicted_labels = (predicted_labels >= 0.5).int()  # Threshold predictions
    return predicted_labels

# Predict new links for a set of new edge indices
new_edges = torch.tensor([[13, 17], [14, 17], [15, 17]], dtype=torch.long).t()  # Format: (start_nodes, end_nodes)
predictions = predict_new_links(model, data, new_edges)

# The predictions will be a tensor containing 0s and 1s, where 1 indicates - link and 0 indicates - no link.
print("Predicted link presence:")
print(predictions)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)


In [138]:
# Predict new links for a set of new edge indices
for i in range(33, 40):
    new_edges = torch.tensor([[28, i], [29, i], [30, i]], dtype=torch.long).t()  # Format: (start_nodes, end_nodes)
    predictions = predict_new_links(model, data, new_edges)

    # The predictions will be a tensor containing 0s and 1s, where 1 indicates - link and 0 indicates - no link.
    print("Predicted link presence:")
    print(predictions)
    print()

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)

Predicted link presence:
tensor([[1],
        [1],
        [1]], dtype=torch.int32)



### Model evaluation

In [None]:
# Function to perform evaluation on the test set
def evaluate_model(model, data, edge_index, labels):
    model.eval()
    with torch.no_grad():
        output = model(data.x, edge_index)
        predicted_labels = output.argmax(dim=1)  # Use argmax to get the predicted class labels

    # Calculate accuracy
    correct = (predicted_labels == labels).sum().item()
    total = len(labels)
    accuracy = correct / total

    return accuracy

# Convert test data to PyTorch tensors
test_edges = torch.tensor(test_edges, dtype=torch.long).t()
test_labels = torch.tensor(test_labels, dtype=torch.long)

# Evaluate the model on the test set
test_accuracy = evaluate_model(model, data, test_edges, test_labels)

print("Test Accuracy:", test_accuracy)

# Relational Graph Convolutional Network

In [123]:
# Define the Relational Graph Convolutional Network (R-GCN) model
class RGCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_relations=1, num_bases=30):
        super(RGCN, self).__init__()
        self.conv1 = RGCNConv(input_dim, hidden_dim, num_relations, num_bases=num_bases)
        self.conv2 = RGCNConv(hidden_dim, hidden_dim, num_relations, num_bases=num_bases)
        self.out = nn.Linear(hidden_dim, 1)
    
    def forward(self, x, edge_index, edge_type):
        x = torch.relu(self.conv1(x, edge_index, edge_type))
        x = torch.relu(self.conv2(x, edge_index, edge_type))
        x = self.out(x)
        return x

# Initialize the R-GCN model
input_dim = node_features.shape[1]
hidden_dim = 64
model = RGCN(input_dim, hidden_dim)

In [124]:
# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Convert the data to PyTorch Geometric Data object
data = Data(x=node_features, edge_index=edge_index)

# Training loop
num_epochs = 150
for epoch in range(num_epochs + 1):
    optimizer.zero_grad()
    output = model(data.x, data.edge_index, data.edge_type)  # Pass the edge_type to the model
    loss = criterion(output[data.edge_index[0]], data.edge_index[1].float())
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")


AttributeError: 'GlobalStorage' object has no attribute 'edge_type'