### Importing  libraries

In [11]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from neo4j import GraphDatabase
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers, optimizers, losses, metrics, Model

from stellargraph import StellarGraph
from stellargraph.layer import GraphSAGE
from stellargraph.data import UnsupervisedSampler

### Connect to the database and fetch graph data from Neo4j

In [4]:
# Connect to the Neo4j database
uri = "bolt://localhost:7687"
username = "neo4j"
password = "OLIV00%%"

driver = GraphDatabase.driver(uri, auth=(username, password))

# Function to fetch graph data from Neo4j
def fetch_graph_data():
    with driver.session() as session:
        result = session.run(
            "MATCH (n1)-[r]->(n2) RETURN id(n1) AS start, id(n2) AS end, type(r) AS relationship"
        )
        data = [(record["start"], record["end"], record["relationship"]) for record in result]
    return data

# Load graph data
graph_data = fetch_graph_data()

In [5]:
# Split data into training and test sets
train_data, test_data = train_test_split(graph_data, test_size=0.5, random_state=42)

graph_data

[(23, 15, 'Component'),
 (17, 29, 'Caused_by'),
 (17, 0, 'Caused_by'),
 (17, 1, 'Caused_by'),
 (17, 2, 'Caused_by'),
 (18, 3, 'Caused_by'),
 (19, 4, 'Caused_by'),
 (19, 5, 'Caused_by'),
 (19, 6, 'Caused_by'),
 (20, 7, 'Caused_by'),
 (20, 8, 'Caused_by'),
 (21, 9, 'Caused_by'),
 (22, 10, 'Caused_by'),
 (22, 11, 'Caused_by'),
 (22, 12, 'Caused_by'),
 (23, 13, 'Component'),
 (23, 14, 'Component')]

# Adamic Adar

In [12]:
# Perform Adamic-Adar link prediction
def adamic_adar_link_prediction(node1_id, node2_id):
    with driver.session() as session:
        query = (
            f"MATCH (n1)-[:COMMON]->(common_node)"
            f"<-[:COMMON]-(n2) WHERE id(n1)={node1_id} AND id(n2)={node2_id} "
            "RETURN count(common_node) AS adamic_adar_score"
        )
        result = session.run(query)
        return result.single()["adamic_adar_score"]

### Link prediction model evaluation

In [13]:
# Evaluate the link prediction model
def evaluate_link_prediction():
    y_true = []
    y_pred = []
    for node1_id, node2_id, _ in test_data:
        adamic_adar_score = adamic_adar_link_prediction(node1_id, node2_id)
        y_true.append(1)  # Link exists
        y_pred.append(1 if adamic_adar_score > 0 else 0)  # Predict link existence based on score

    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    
if __name__ == "__main__":
    evaluate_link_prediction()

Accuracy: 0.00


# GraphSAGE

In [11]:
# parameters definition
batch_size = 20
epochs = 50

num_samples = [20, 10]