This is to have a track of which cypher queries that have been run more easily and test the output

In [50]:
from neo4j import GraphDatabase
from langchain_neo4j import Neo4jGraph


In [51]:
url = "bolt://localhost:7687"
username = "neo4j"
password= "neo4j"
graph = Neo4jGraph(url=url, username=username, password=password)
driver = GraphDatabase.driver(url, auth=(username, password))
session = driver.session()

In [52]:
def queryNeo4j(driver, query):
    """Runs a single Cypher query."""
    
    with driver.session() as session:
        try:
            session.run(query)
            
        except Exception as e:
            print(f"Error executing query: {e}")

In [4]:
#strips out the http://w3id.org/anett-o/

queryNeo4j(driver, """
MATCH (n)
WHERE n.uri STARTS WITH 'http://w3id.org/annett-o/'
SET n.uri = SPLIT(n.uri, '/')[SIZE(SPLIT(n.uri, '/')) - 1]
""")

In [10]:
graph.get_structured_schema

{'node_props': {'Resource': [{'property': 'uri', 'type': 'STRING'},
   {'property': 'ns0__layer_num_units', 'type': 'LIST'},
   {'property': 'ns0__eval_score', 'type': 'LIST'},
   {'property': 'ns0__learning_rate', 'type': 'LIST'},
   {'property': 'owl__qualifiedCardinality', 'type': 'LIST'},
   {'property': 'owl__minQualifiedCardinality', 'type': 'LIST'},
   {'property': 'ns0__dropout_rate', 'type': 'LIST'},
   {'property': 'rdfs__comment', 'type': 'LIST'},
   {'property': 'ns0__labels_count', 'type': 'LIST'},
   {'property': 'ns0__labels_dtype', 'type': 'LIST'},
   {'property': 'ns0__normal_sigma', 'type': 'LIST'},
   {'property': 'ns0__normal_mu', 'type': 'LIST'},
   {'property': 'ns0__data_sample_features', 'type': 'LIST'},
   {'property': 'ns0__data_sample_dimensionality', 'type': 'LIST'},
   {'property': 'ns0__momentum', 'type': 'LIST'},
   {'property': 'ns0__number_of_epochs', 'type': 'LIST'},
   {'property': 'ns0__learning_rate_decay_epochs', 'type': 'LIST'},
   {'property': 'n

In [53]:
print(queryNeo4j(driver, """
    MATCH (a:Taxonomy)-[r]-(b)
    RETURN a, r, b
"""))

None


In [4]:
from yfiles_jupyter_graphs import GraphWidget
def showGraph():
    session = driver.session()
    query = """
    MATCH (b)
    WHERE b.uri in ["LossFunction", "TaskCharacterization"]
    MATCH (a)-[r*1..1]->(b)
    RETURN a, r, b
    """
    widget = GraphWidget(graph = session.run(query).graph())
    widget.node_label_mapping = 'uri'
    widget.radial_layout()
    widget.set_sidebar(start_with='Data')
    return widget
    

showGraph()

GraphWidget(layout=Layout(height='650px', width='100%'))

In [47]:
import csv
from neo4j import GraphDatabase
import networkx as nx

# Step 1: Connect to Neo4j Database
uri = "bolt://localhost:7687"  # Neo4j instance URI
username = "neo4j"  # Your Neo4j username
password = "your_password"  # Your Neo4j password

driver = GraphDatabase.driver(uri, auth=(username, password))

# Step 2: Run the Cypher query to get the data
def run_query(driver):
    query = """
    MATCH (a:Taxonomy)-[r]-(b)
    RETURN a, r, b
    """
    
    with driver.session() as session:
        result = session.run(query)
        # Fetch all results before processing them
        records = list(result)  # This ensures we get all records at once
        return records

# Step 3: Convert Cypher results to NetworkX Graph
def cypher_to_networkx(records):
    G = nx.Graph()
    
    for record in records:
        # Extracting node 'a', relationship 'r', and node 'b'
        a = record["a"]
        b = record["b"]
        r = record["r"]
        
        # Use the 'name' property for node labels
        a_name = a["name"]
        b_name = b["name"]
        
        # Handle multiple layer types if present (split by commas and strip whitespace)
        a_layer_types = a.get("LayerTypes", "").split(",") if "LayerTypes" in a else []
        b_layer_types = b.get("LayerTypes", "").split(",") if "LayerTypes" in b else []
        
        # Clean up and remove extra whitespace
        a_layer_types = [layer_type.strip() for layer_type in a_layer_types]
        b_layer_types = [layer_type.strip() for layer_type in b_layer_types]
        
        # Add nodes with properties (using 'name' for node label)
        G.add_node(a_name, name=a["name"], layer_types=a_layer_types)
        G.add_node(b_name, name=b["name"], layer_types=b_layer_types)
        
        # Handle multiple attributes on edges (e.g., layer types, task types)
        edge_layer_types = r.get("LayerTypes", "").split(",") if "LayerTypes" in r else []
        edge_task_types = r.get("TaskTypes", "").split(",") if "TaskTypes" in r else []
        
        # Clean up and remove extra whitespace for edge attributes
        edge_layer_types = [layer_type.strip() for layer_type in edge_layer_types]
        edge_task_types = [task_type.strip() for task_type in edge_task_types]
        
        # Add edge with properties; aggregate data for edges with the same nodes
        if G.has_edge(a_name, b_name):
            # If an edge already exists, update the edge's properties by appending new data
            G[a_name][b_name]["layer_types"].extend(edge_layer_types)
            G[a_name][b_name]["task_types"].extend(edge_task_types)
            # Also, concatenate relationship types
            G[a_name][b_name]["relationships"].add(r.type)
        else:
            # If edge doesn't exist, create a new edge with the properties
            G.add_edge(a_name, b_name, type=r.type, weight=r.get("weight", 1),
                       layer_types=edge_layer_types, task_types=edge_task_types,
                       relationships={r.type})  # Store relationship type in a set for uniqueness
        
    # After adding all edges, we can ensure no duplicates by converting lists to sets for both task_types and layer_types
    for u, v, data in G.edges(data=True):
        data["layer_types"] = list(set(data["layer_types"]))  # Remove duplicates by converting to a set
        data["task_types"] = list(set(data["task_types"]))  # Remove duplicates by converting to a set
        data["relationships"] = list(data["relationships"])  # Ensure relationships are a list for CSV export
        
    return G

# Step 4: Export Node and Edge Data to a Single CSV
def export_graph_to_csv(G, filename):
    with open(filename, mode="w", newline="") as file:
        writer = csv.writer(file)
        
        # Write Nodes Table
        writer.writerow(["# Nodes Table"])  # Optional table header separator
        writer.writerow(["Node Name", "LayerTypes"])  # Node CSV headers (removed ItemState and Label)
        for node, data in G.nodes(data=True):
            # Join the layer types into a single string if multiple types are present
            layer_types = ", ".join(data["layer_types"])
            writer.writerow([node, layer_types])  # Write node data
        
        # Add a separator between nodes and edges (a blank line here)
        writer.writerow([])
        
        # Write Edges Table
        writer.writerow(["# Edges Table"])  # Optional table header separator
        writer.writerow(["Source Node", "Target Node", "Relationship Type"])  # Edge CSV headers (removed LayerTypes, TaskTypes, Weight)
        for u, v, data in G.edges(data=True):
            # Join all relationship types into a comma-separated string
            relationship_types = ", ".join(data["relationships"])
            writer.writerow([u, v, relationship_types])  # Write edge data

# Run the Cypher query and convert to NetworkX graph
records = run_query(driver)
G = cypher_to_networkx(records)

# Export nodes and edges to a single CSV file
export_graph_to_csv(G, "graph_data.csv")

# Close the connection
driver.close()



