In [1]:
from rdflib import Graph, Namespace
from rdflib.plugins.sparql import prepareQuery

## Parse graph from TTL

In [2]:
# Create an empty RDF graph
graph = Graph()

In [3]:
graph.parse("kennedys_rdf_gpt_output.ttl", format='turtle')

<Graph identifier=N5a83896febc746758c349939c0d7cf2a (<class 'rdflib.graph.Graph'>)>

In [4]:
len(graph.all_nodes())

43

In [5]:
# Print the number of triples in the graph
print(f"Number of triples: {len(graph)}")

Number of triples: 115


## Query the graph

In [6]:
# Define the SPARQL query "What is Rose Kennedy's profession?"
query = prepareQuery(
    """
    PREFIX kennedys: <http://topbraid.org/examples/kennedys#>
    SELECT ?profession
    WHERE {
        kennedys:Rose_Kennedy kennedys:profession ?profession .
    }
    """,
    initNs={"kennedys": "http://topbraid.org/examples/kennedys#"}
)

In [7]:
# Execute the SPARQL query on the graph
results = graph.query(query)

In [8]:
for row in results:
    profession = str(row.profession)
    print(profession)

http://topbraid.org/examples/kennedys#philanthropist
http://topbraid.org/examples/kennedys#socialite


## Pretty Print results
Without string manipulation

In [9]:
# # Define the custom namespaces
kennedys = Namespace("http://topbraid.org/examples/kennedys#")
# rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
# rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")

In [10]:
from rdflib.namespace import NamespaceManager

In [11]:
namespace_manager = NamespaceManager(graph)
namespace_manager.bind("kennedys", kennedys)

In [12]:
# iterate through query result again
for row in results:
    profession = str(row.profession)
    print(namespace_manager.qname(profession))

kennedys:philanthropist
kennedys:socialite


### Inspect graph directly to pretty print triples

In [30]:
# Using counter to only print first five triples that occur in the graph
counter = 5
for subject, predicate, obj in graph:
    # Resolve and print the subject, predicate, and object values
    subject_value = subject.n3(graph.namespace_manager)
    predicate_value = predicate.n3(graph.namespace_manager)
    object_value = obj.n3(graph.namespace_manager)
    print(f"Subject: {subject_value}")
    print(f"Predicate: {predicate_value}")
    print(f"Object: {object_value}")
    print()
    counter -=1
    if counter == 0:
        break

Subject: kennedys:Rosemary_Kennedy
Predicate: kennedys:sibling
Object: kennedys:Kathleen_Kennedy

Subject: kennedys:John_Kennedy
Predicate: kennedys:sibling
Object: kennedys:Patricia_Kennedy

Subject: kennedys:Edward_Kennedy
Predicate: rdf:type
Object: kennedys:Person

Subject: kennedys:Rose_Kennedy
Predicate: kennedys:child
Object: kennedys:Robert_Kennedy

Subject: kennedys:Rosemary_Kennedy
Predicate: kennedys:firstName
Object: "Rosemary"



## Parse the graph directly from the Internet

In [14]:
graph_1 = Graph()

In [15]:
from rdflib.plugin import register, Parser

In [16]:
# Register the RDF/XML parser plugin
register('rdfxml', Parser, 'rdflib.plugins.parsers.rdfxml', 'RDFXMLParser')

In [17]:
url = "https://www.topbraid.org/examples/kennedys"

In [18]:
graph_1.parse(url, format="rdfxml", expand=True)

<Graph identifier=Nb3c3fa33ba66424193e08dc2fe5130ae (<class 'rdflib.graph.Graph'>)>

In [19]:
len(graph_1.all_nodes())

527

In [20]:
# Print the number of triples in the graph
print(f"Number of triples: {len(graph_1)}")

Number of triples: 1155


## Put graph into Neo4j

In [21]:
from neo4j import GraphDatabase

In [22]:
driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "password"))

In [23]:
# Open a session
with driver.session() as session:
    # Iterate over all the triples in the RDF graph
    for subject, predicate, obj in graph:
        # Convert the RDF triples into Neo4j Cypher queries
        cypher_query = f"""
        MERGE (s:Node {{label: "{subject}"}})
        MERGE (o:Node {{label: "{obj}"}})
        MERGE (s)-[:`{predicate}`]->(o)
        """
        # Execute the Cypher query
        session.run(cypher_query)

### Query to see if data inserted properly

In [24]:
# Open a session
with driver.session() as session:
    # Execute a Cypher query to retrieve the data
    cypher_query = """
    MATCH (s)-[r]->(o)
    RETURN s.label AS subject, type(r) AS predicate, o.label AS object
    """
    result = session.run(cypher_query)
    # parse result
    for record in result:
        subject = record["subject"]
        predicate = record["predicate"]
        obj = record["object"]
        print(f"Subject: {subject}, Predicate: {predicate}, Object: {obj}")

Subject: http://topbraid.org/examples/kennedys#Rosemary_Kennedy, Predicate: http://topbraid.org/examples/kennedys#sibling, Object: http://topbraid.org/examples/kennedys#Jean_Kennedy
Subject: http://topbraid.org/examples/kennedys#Patricia_Kennedy, Predicate: http://topbraid.org/examples/kennedys#sibling, Object: http://topbraid.org/examples/kennedys#Kathleen_Kennedy
Subject: http://topbraid.org/examples/kennedys#Eunice_Kennedy, Predicate: http://topbraid.org/examples/kennedys#lastName, Object: Kennedy Shriver
Subject: http://topbraid.org/examples/kennedys#Rose_Kennedy, Predicate: http://topbraid.org/examples/kennedys#child, Object: http://topbraid.org/examples/kennedys#Jean_Kennedy
Subject: http://topbraid.org/examples/kennedys#John_Kennedy, Predicate: http://topbraid.org/examples/kennedys#firstName, Object: John Fitzgerald
Subject: http://topbraid.org/examples/kennedys#Kathleen_Kennedy, Predicate: http://topbraid.org/examples/kennedys#sibling, Object: http://topbraid.org/examples/kenne

It just inserts the URIs; there is probably a better way to do this.
Could try to recursively parse the graph using the URIs, or maybe need to ask what I am actually doing here.

### Cleanup

In [28]:
with driver.session() as session:
    # Execute a Cypher query to delete all nodes and relationships
    cypher_query = """
    MATCH (n)
    DETACH DELETE n
    """
    session.run(cypher_query)

# Trying to recursively parse the graph

In [26]:
import urllib.request
import xml.etree.ElementTree as ET
from rdflib import Graph, URIRef

In [27]:
# Load the RDF/XML data from the URL
response = ET.parse("https://www.topbraid.org/examples/kennedys").getroot()

# Recursive function to process XML elements and add triples to the graph
def process_element(element, parent_subject=None):
    subject = parent_subject or URIRef(element.attrib.get("rdf:about"))

    for child in element:
        predicate = URIRef(child.tag)
        if "resource" in child.attrib:
            obj = URIRef(child.attrib["resource"])
            graph.add((subject, predicate, obj))
            process_element(child, parent_subject=obj)
        else:
            obj = child.text
            graph.add((subject, predicate, obj))

# Process the root element
process_element(response)

# Print the number of triples in the graph
print(f"Number of triples: {len(graph)}")

FileNotFoundError: [Errno 2] No such file or directory: 'https://www.topbraid.org/examples/kennedys'