Imports

In [3]:
from langgraph.graph import START, END, StateGraph
from langgraph.prebuilt import ToolNode
from IPython.display import Image, display
from langgraph.graph import MessagesState
from langchain_core.messages import SystemMessage
from owlready2 import get_ontology
from langchain_core.messages import AIMessage
from langgraph.prebuilt import tools_condition
from pathlib import Path
from typing_extensions import TypedDict

from pydantic import BaseModel
from typing import List, Optional

from owlready2 import *

from openai import OpenAI

import json

setup

In [4]:
class MockLLM:
    def invoke(self, messages):
        return "mocked assistant response"

llm_with_tools = MockLLM()
sys_msg = SystemMessage(content="You are a helpful assistant.")
onto = get_ontology("IA_ontology.owl").load()

Define Pydantic classes for input, inbetween and output states

In [5]:
class InputState(TypedDict):
    story: str

class InbetweenState(TypedDict):
    story: str
    chunks: list[str]
    ontology_summary: str
    extract: list[str]
    conflicts: list[str]
    rewritten_chunks: list[str]
    re_verified_chunks: list[str]
    evaluated_chunks: list[str]


class OutputState(TypedDict):
    revised_story: str

Functions to make a summary of the ontology

In [14]:
# use IA_ontology.owl
onto = get_ontology("IA_ontology.owl").load()

In [16]:
from owlready2 import *

def safe_name(x):
    """Return a readable name for any ontology element."""
    if hasattr(x, "name"):
        return x.name
    elif hasattr(x, "__name__"):
        return x.__name__
    elif isinstance(x, Restriction):
        return f"Restriction({x.property.name})"
    else:
        return str(x)

def get_ontology_summary():

    summary = []
    summary.append(f"=== SUMMARY of {onto.base_iri} ===\n")

    # --- CLASSES ---
    summary.append("CLASSES:")
    for c in sorted(onto.classes(), key=lambda x: x.name):
        summary.append(f"  {c.name}")

    # --- OBJECT PROPERTIES ---
    summary.append("\nOBJECT PROPERTIES:")
    for p in sorted(onto.object_properties(), key=lambda x: x.name):
        domain = [safe_name(d) for d in p.domain] or ["(none)"]
        range_ = [safe_name(r) for r in p.range] or ["(none)"]

        characteristics = []
        if FunctionalProperty in p.is_a: characteristics.append("Functional")
        if InverseFunctionalProperty in p.is_a: characteristics.append("InverseFunctional")
        if SymmetricProperty in p.is_a: characteristics.append("Symmetric")
        if TransitiveProperty in p.is_a: characteristics.append("Transitive")
        info = ", ".join(characteristics) if characteristics else "Simple"

        summary.append(f"  {p.name} | domain: {domain} | range: {range_} | type: {info}")

    # --- DATA PROPERTIES ---
    summary.append("\nDATA PROPERTIES:")
    for p in sorted(onto.data_properties(), key=lambda x: x.name):
        domain = [safe_name(d) for d in p.domain] or ["(none)"]
        range_ = [safe_name(r) for r in p.range] or ["(none)"]

        characteristics = []
        if FunctionalProperty in p.is_a: characteristics.append("Functional")
        info = ", ".join(characteristics) if characteristics else "Simple"

        summary.append(f"  {p.name} | domain: {domain} | range: {range_} | type: {info}")

    # --- INDIVIDUALS ---
    summary.append("\nINDIVIDUALS:")
    for i in sorted(onto.individuals(), key=lambda x: x.name):
        types = [safe_name(cls) for cls in i.is_a]
        summary.append(f"  {i.name} : {types}")

    return "\n".join(summary)


# Example usage
print(get_ontology_summary())


=== SUMMARY of http://IA.org/onto.owl# ===

CLASSES:
  Activity
  Adult
  Adventurous
  Allergy
  AvoidsSocialInteraction
  AvoidsTasks
  Baguette
  Baker
  Baking
  Behavior
  Bicycle
  BicycleLane
  BigCity
  Boiling
  Bus
  BusStation
  BusyCity
  Cake
  Cancer
  Car
  Carbohydrate
  Cashier
  CatchingFish
  CharacterTrait
  Chef
  City
  Classroom
  Condition
  CookedFood
  CookingMethod
  Croissant
  CuttingBoard
  Dairy
  DairyAllergy
  Deficiency
  Dessert
  Diabetes
  Disease
  Doctor
  Education
  EducationalResource
  Electricity
  ElectricityUtility
  EnvironmentCondition
  Fat
  Fatigue
  Fever
  Fish
  FishingNet
  FlatTerrain
  Flour
  Food
  FoodAcquiring
  FoodCategory
  Fruit
  Frying
  Gluten
  GlutenAllergy
  GoodHearted
  Grilling
  Headache
  Health
  HelpsOthers
  HigherEducation
  HigherLearner
  Infrastructure
  Ingredient
  IronDeficiency
  Knife
  Landmark
  Lawyer
  Lazy
  Macaron
  MarriedPerson
  MedicalStudies
  Microwave
  MuscleAche
  Nurse
  Nut
  NutAl

In [6]:
def chunk(state: InputState) -> InbetweenState:
    """Split the input into manageable chunks."""
    # split story in three parts
    chunks = []

    for sentence in state['story'].split("."):
        chunks.append(sentence.strip())

    return {"chunks": chunks}

In [None]:
state = {"story": "Amelia walked in the garden. The flowers were bright. He smiled."}

# when
result = chunk(state)

result

{'chunks': ['Naya walked in the garden',
  'The flowers were bright',
  'She smiled',
  '']}

# Extract

In [60]:
def extract(state: InbetweenState)-> InbetweenState:
    """Extract relevant information from the story."""
    ontology_summary = get_ontology_summary()
    story = state['story']

    client = OpenAI()

    class Triple(BaseModel):
        subj: str
        pred: str
        obj: str

    class Output(BaseModel):
        output: list[Triple]

    response = client.responses.parse(
        model="gpt-5-nano",
        reasoning={"effort": "low"},
        instructions="""Look in the context that provides the classes, object types, data types and instances of an ontology. Then scan the story.
        Which classes, object types, data types and instances you encounter in the story that are also present in the ontology_context?
        ONLY use as predicates the ones that are present in the ontology_context.""",
        input=f"ontology_context: {ontology_summary}, story: {story}",
        text_format=Output
    )

    data = json.loads(response.output_text)

    # Make a Python dict so it can be easily converted to JSON elsewhere
    triples = data["output"]

    return {"extract": triples}

In [61]:
state = {"story": "Amelia is lawyer and lives in Paris"}
result = extract(state)
print("\n--- Extract result ---")
print(json.dumps(result, indent=2))


--- Extract result ---
{
  "extract": [
    {
      "subj": "Amelia",
      "pred": "hasOccupation",
      "obj": "Lawyer"
    }
  ]
}


# Check for conflicts

## helper functions

In [23]:
onto.base_iri

'http://IA.org/onto.owl#'

In [38]:
def dict_to_sparql(triple_dict, ontology_base_iri):
    """
    Convert a dictionary like {'subj': 'Jorryt', 'pred': 'rdf:type', 'obj': 'Person'}
    into a SPARQL query where the subject is replaced by a variable (?subject).
    """
    prefix = f"PREFIX : <{ontology_base_iri}>\n" \
             "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"

    pred = triple_dict["pred"]
    obj = triple_dict["obj"]

    # If the predicate starts with "rdf:", keep it like that
    # Otherwise prefix it with ":"
    pred_prefix = pred if pred.startswith("rdf:") else f":{pred}"
    obj_prefix = obj if obj.startswith("rdf:") else f":{obj}"

    query = prefix + f"""
SELECT DISTINCT ?subject WHERE {{
  ?subject {pred_prefix} {obj_prefix} .
}}
"""
    return query



In [39]:
from textwrap import dedent


def test_dict_to_sparql_basic():
    triple = {"subj": "Jorryt", "pred": "rdf:type", "obj": "Person"}
    base_iri = "IA_ontology.owl"

    result = dict_to_sparql(triple, base_iri)
    print(result)



'''expected:
    PREFIX : <IA_ontology.owl>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT DISTINCT ?subject WHERE {
      ?subject rdf:type :Person .
    }
""")'''

test_dict_to_sparql_basic()

PREFIX : <IA_ontology.owl>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT DISTINCT ?subject WHERE {
  ?subject rdf:type :Person .
}



In [93]:
def check_triples_against_ontology(triples, onto):

    for triple in triples:
        query = dict_to_sparql(triple, onto.base_iri)
        # print(f"Query:\n{query}")
        try:
            rows = list(default_world.sparql(query))
            sparql_output_list = []
            if rows:
                # print("Output:")
                for row in rows:
                    for x in row:
                        sparql_output_list.append(x.name)
            triple["sparql_output"] = sparql_output_list
        except Exception as e:
            pass
        #        print("-" * 40)
    return triples

triples = [
    {
      "subj": "Amelia",
      "pred": "rdf:type",
      "obj": "Person"
    }
]

results = check_triples_against_ontology(triples, onto)
results

[{'subj': 'Amelia',
  'pred': 'rdf:type',
  'obj': 'Person',
  'sparql_output': ['Adam',
   'Daan',
   'Jan',
   'Amelia',
   'Petra',
   'Elizabeth',
   'Jasmin',
   'Jennifer']}]

In [94]:
def scan_unvalid_triples(triples):
    invalid_triples = []
    for triple in triples:
        if triple['sparql_output']:
            if triple["subj"] in triple["sparql_output"]:
                triple["valid"] = True
            else:
                triple["valid"] = False
                invalid_triples.append(triple)
    return invalid_triples


triples = [{'subj': 'Amelia',
  'pred': 'rdf:type',
  'obj': 'Person',
  'sparql_output': ['Adam',
   'Daan',
   'Jan',
   'Petra',
   'Elizabeth',
   'Jasmin',
   'Jennifer']}]
scan_unvalid_triples(triples)

[{'subj': 'Amelia',
  'pred': 'rdf:type',
  'obj': 'Person',
  'sparql_output': ['Adam',
   'Daan',
   'Jan',
   'Petra',
   'Elizabeth',
   'Jasmin',
   'Jennifer'],
  'valid': False}]

In [None]:
client = OpenAI()

class Triple(BaseModel):
    subj: str
    pred: str
    obj: str

class Output(BaseModel):
    output: list[Triple]

response = client.responses.parse(
    model="gpt-5-nano",
    reasoning={"effort": "low"},
    instructions="""

    -

    """,
    input=f"""    We have seen for this text chunk the following inconsistencies with the ontology.

    chunk: {chunk}

    inconsistencies: {conflicts}

    Please look at the ontology report and confirm or reject the inconsistencies.
    ontology_context: {ontology_summary}""",
    text_format=Output
)

data = json.loads(response.output_text)

In [None]:
def check_conflicts(state: InbetweenState) -> InbetweenState:
    """Check for inconsistencies in the story."""
    onto = get_ontology("IA_ontology.owl").load()
    triples = [
    {
      "subj": "sinterklaas",
      "pred": "rdf:type",
      "obj": "Person"
    }
    ]

    results = check_triples_against_ontology(triples, onto)
    unvalid_triples = scan_unvalid_triples(results)
    invalid_triples_str = ""
    for triple in unvalid_triples:
      invalid_triples_str += f"sub: {triple['subj']}, pred: {triple['pred']}, obj: {triple['obj']}\n"

    
    return {"conflicts": unvalid_triples}



In [100]:
state = {
    "extract": [
        {"subj": "Amelia", "pred": "rdf:type", "obj": "Person"},
        {"subj": "Daan", "pred": "rdf:type", "obj": "Person"},
        {"subj": "Paris", "pred": "rdf:type", "obj": "City"},
        {"subj": "Eiffel_Tower", "pred": "builtInYear", "obj": "int"},
        {"subj": "Paris", "pred": "hasPopulation", "obj": "int"}
            ]
}

check_conflicts(state)


Unvalid triples: [{'subj': 'sinterklaas', 'pred': 'rdf:type', 'obj': 'Person', 'sparql_output': ['Adam', 'Daan', 'Jan', 'Amelia', 'Petra', 'Elizabeth', 'Jasmin', 'Jennifer'], 'valid': False}]


{'conflicts': [{'subj': 'sinterklaas',
   'pred': 'rdf:type',
   'obj': 'Person',
   'sparql_output': ['Adam',
    'Daan',
    'Jan',
    'Amelia',
    'Petra',
    'Elizabeth',
    'Jasmin',
    'Jennifer'],
   'valid': False}]}

In [105]:
# print the sub, pred, obj of the triples that are invalid as a big string with flags
conflicts = {'conflicts': [{'subj': 'sinterklaas',
   'pred': 'rdf:type',
   'obj': 'Person',
   'sparql_output': ['Adam',
    'Daan',
    'Jan',
    'Amelia',
    'Petra',
    'Elizabeth',
    'Jasmin',
    'Jennifer'],
   'valid': False}]}

invalid_triples_str = ""
for triple in conflicts['conflicts']:
    invalid_triples_str += f"sub: {triple['subj']}, pred: {triple['pred']}, obj: {triple['obj']}\n"

print(invalid_triples_str)


sub: sinterklaas, pred: rdf:type, obj: Person

