Imports

In [2]:
from langgraph.graph import START, END, StateGraph
from langgraph.prebuilt import ToolNode
from IPython.display import Image, display
from langgraph.graph import MessagesState
from langchain_core.messages import SystemMessage
from owlready2 import get_ontology
from langchain_core.messages import AIMessage
from langgraph.prebuilt import tools_condition
from pathlib import Path
from typing_extensions import TypedDict

from pydantic import BaseModel
from typing import List, Optional

from owlready2 import *

from openai import OpenAI

import json

setup

In [3]:
class MockLLM:
    def invoke(self, messages):
        return "mocked assistant response"

llm_with_tools = MockLLM()
sys_msg = SystemMessage(content="You are a helpful assistant.")
onto = get_ontology("IA_ontology.owl").load()

Define Pydantic classes for input, inbetween and output states

In [4]:
class InputState(TypedDict):
    story: str

class InbetweenState(TypedDict):
    story: str
    chunks: list[str]
    ontology_summary: str
    extract: list[str]
    conflicts: list[str]
    rewritten_chunks: list[str]
    re_verified_chunks: list[str]
    evaluated_chunks: list[str]


class OutputState(TypedDict):
    revised_story: str

Functions to make a summary of the ontology

In [5]:
# use IA_ontology.owl
onto = get_ontology("IA_ontology.owl").load()

# Getting Ontology

In [6]:
from owlready2 import *

def safe_name(x):
    """Return a readable name for any ontology element."""
    if hasattr(x, "name"):
        return x.name
    elif hasattr(x, "__name__"):
        return x.__name__
    elif isinstance(x, Restriction):
        return f"Restriction({x.property.name})"
    else:
        return str(x)

def get_ontology_summary():

    summary = []
    summary.append(f"=== SUMMARY of {onto.base_iri} ===\n")

    # --- CLASSES ---
    summary.append("CLASSES:")
    for c in sorted(onto.classes(), key=lambda x: x.name):
        summary.append(f"  {c.name}")

    # --- OBJECT PROPERTIES ---
    summary.append("\nOBJECT PROPERTIES:")
    for p in sorted(onto.object_properties(), key=lambda x: x.name):
        domain = [safe_name(d) for d in p.domain] or ["(none)"]
        range_ = [safe_name(r) for r in p.range] or ["(none)"]

        characteristics = []
        if FunctionalProperty in p.is_a: characteristics.append("Functional")
        if InverseFunctionalProperty in p.is_a: characteristics.append("InverseFunctional")
        if SymmetricProperty in p.is_a: characteristics.append("Symmetric")
        if TransitiveProperty in p.is_a: characteristics.append("Transitive")
        info = ", ".join(characteristics) if characteristics else "Simple"

        summary.append(f"  {p.name} | domain: {domain} | range: {range_} | type: {info}")

    # --- DATA PROPERTIES ---
    summary.append("\nDATA PROPERTIES:")
    for p in sorted(onto.data_properties(), key=lambda x: x.name):
        domain = [safe_name(d) for d in p.domain] or ["(none)"]
        range_ = [safe_name(r) for r in p.range] or ["(none)"]

        characteristics = []
        if FunctionalProperty in p.is_a: characteristics.append("Functional")
        info = ", ".join(characteristics) if characteristics else "Simple"

        summary.append(f"  {p.name} | domain: {domain} | range: {range_} | type: {info}")

    # --- INDIVIDUALS ---
    summary.append("\nINDIVIDUALS:")
    for i in sorted(onto.individuals(), key=lambda x: x.name):
        types = [safe_name(cls) for cls in i.is_a]
        summary.append(f"  {i.name} : {types}")

    return "\n".join(summary)


# Example usage
print(get_ontology_summary())


=== SUMMARY of http://IA.org/onto.owl# ===

CLASSES:
  Activity
  Adult
  Adventurous
  Allergy
  AvoidsSocialInteraction
  AvoidsTasks
  Baguette
  Baker
  Baking
  Behavior
  Bicycle
  BicycleLane
  BigCity
  Boiling
  Bus
  BusStation
  BusyCity
  Cake
  Cancer
  Car
  Carbohydrate
  Cashier
  CatchingFish
  CharacterTrait
  Chef
  City
  Classroom
  Condition
  CookedFood
  CookingMethod
  Croissant
  CuttingBoard
  Dairy
  DairyAllergy
  Deficiency
  Dessert
  Diabetes
  Disease
  Doctor
  Education
  EducationalResource
  Electricity
  ElectricityUtility
  EnvironmentCondition
  Fat
  Fatigue
  Fever
  Fish
  FishingNet
  FlatTerrain
  Flour
  Food
  FoodAcquiring
  FoodCategory
  Fruit
  Frying
  Gluten
  GlutenAllergy
  GoodHearted
  Grilling
  Headache
  Health
  HelpsOthers
  HigherEducation
  HigherLearner
  Infrastructure
  Ingredient
  IronDeficiency
  Knife
  Landmark
  Lawyer
  Lazy
  Macaron
  MarriedPerson
  MedicalStudies
  Microwave
  MuscleAche
  Nurse
  Nut
  NutAl

# Chunk Node

In [7]:
def chunk(state: InputState) -> InbetweenState:
    """Split the input into manageable chunks."""
    # split story in three parts
    chunks = []

    for sentence in state['story'].split("."):
        chunks.append(sentence.strip())

    return {"chunks": chunks}

In [30]:
state = {"story": """

Adam, a baker living in Paris, spent the early morning baking fresh croissants and baguettes, carefully using his favorite oven and knife to prepare dough that was rich in flour and butter.
His friend Jan, who had a mild gluten allergy but a good heart, still visited the bakery every weekend, walking through the quiet parts of the city to enjoy the smell of freshly baked bread.
Later that evening, Adam flew to Mars to sell his pastries to astronauts in a floating space café above Olympus Mons."


"""}

# when
result = chunk(state)

result

{'chunks': ['Adam, a baker living in Paris, spent the early morning baking fresh croissants and baguettes, carefully using his favorite oven and knife to prepare dough that was rich in flour and butter',
  'His friend Jan, who had a mild gluten allergy but a good heart, still visited the bakery every weekend, walking through the quiet parts of the city to enjoy the smell of freshly baked bread',
  'Later that evening, Adam flew to Mars to sell his pastries to astronauts in a floating space café above Olympus Mons',
  '"']}

In [38]:
    
def get_triples(ontology_summary, chunk):    
    client = OpenAI()

    class Triple(BaseModel):
        subj: str
        pred: str
        obj: str

    class Output(BaseModel):
        output: list[Triple]

    response = client.responses.parse(
        model="gpt-5-mini",
        reasoning={"effort": "medium"},
        instructions="""Look in the context that provides the classes, object types, data types and instances of an ontology. Then scan the story.
        Which classes, object types, data types and instances you encounter in the story that are also present in the ontology_context?
        ONLY use as predicates the ones that are present in the ontology_context.""",
        input=f"ontology_context: {ontology_summary}, story: {chunk}",
        text_format=Output
    )

    data = json.loads(response.output_text)

    # Make a Python dict so it can be easily converted to JSON elsewhere
    triples = data["output"]

    return triples

# Extract Node

This node is extracting triples from each separate chunk.

In [39]:
def extract(state: InbetweenState)-> InbetweenState:
    """Extract relevant information from the story."""
    ontology_summary = get_ontology_summary()

    extracts_dict = {}
    for i,chunk in enumerate(state['chunks']):
        print(f"Extracting triples from chunk {i}")
        print(f"Chunk: {chunk}")
        triples = get_triples(ontology_summary, chunk)
        print(f"Triples: {triples}")
        extracts_dict[f'chunk_{i}'] = triples

    return {"extract": extracts_dict}

In [40]:
state = {'chunks': ['Adam, a baker living in Paris, spent the early morning baking fresh croissants and baguettes, carefully using his favorite oven and knife to prepare dough that was rich in flour and butter',
  'His friend Jan, who had a mild gluten allergy but a good heart, still visited the bakery every weekend, walking through the quiet parts of the city to enjoy the smell of freshly baked bread',
  'Later that evening, Adam flew to Mars to sell his pastries to astronauts in a floating space café above Olympus Mons',
  '"']}
result = extract(state)
print("\n--- Extract result ---")
print(json.dumps(result, indent=2))

Extracting triples from chunk 0
Chunk: Adam, a baker living in Paris, spent the early morning baking fresh croissants and baguettes, carefully using his favorite oven and knife to prepare dough that was rich in flour and butter
Triples: [{'subj': 'Adam', 'pred': 'hasOccupation', 'obj': 'Baker'}, {'subj': 'Baker', 'pred': 'requiresTool', 'obj': 'Oven'}, {'subj': 'Baker', 'pred': 'requiresTool', 'obj': 'Knife'}, {'subj': 'Croissant', 'pred': 'isCookedAs', 'obj': 'Baking'}, {'subj': 'Baguette', 'pred': 'isCookedAs', 'obj': 'Baking'}, {'subj': 'Paris', 'pred': 'famousForFood', 'obj': 'Croissant'}, {'subj': 'Paris', 'pred': 'famousForFood', 'obj': 'Baguette'}, {'subj': 'Croissant', 'pred': 'containsIngredient', 'obj': 'Flour'}, {'subj': 'Baguette', 'pred': 'containsIngredient', 'obj': 'Flour'}, {'subj': 'Adam', 'pred': 'hasName', 'obj': 'Adam'}]
Extracting triples from chunk 1
Chunk: His friend Jan, who had a mild gluten allergy but a good heart, still visited the bakery every weekend, walk

# Check for conflicts

## helper functions

This helper function makes a simple Sparql query that will check whether the subject of the triple is really related to the object by the predicate.

In [48]:
def dict_to_sparql(triple_dict, ontology_base_iri):
    """
    Convert a dictionary like {'subj': 'Jorryt', 'pred': 'rdf:type', 'obj': 'Person'}
    into a SPARQL query where the subject is replaced by a variable (?subject).
    """
    prefix = f"PREFIX : <{ontology_base_iri}>\n" \
             "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"

    pred = triple_dict["pred"]
    obj = triple_dict["obj"]

    # If the predicate starts with "rdf:", keep it like that
    # Otherwise prefix it with ":"
    pred_prefix = pred if pred.startswith("rdf:") else f":{pred}"
    obj_prefix = obj if obj.startswith("rdf:") else f":{obj}"

    query = prefix + f"""
SELECT DISTINCT ?subject WHERE {{
  ?subject {pred_prefix} {obj_prefix} .
}}
"""
    return query



In [49]:
from textwrap import dedent


def test_dict_to_sparql_basic():
    triple = {"subj": "Jorryt", "pred": "rdf:type", "obj": "Person"}
    base_iri = "IA_ontology.owl"

    result = dict_to_sparql(triple, base_iri)
    print(result)



'''expected:
    PREFIX : <IA_ontology.owl>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT DISTINCT ?subject WHERE {
      ?subject rdf:type :Person .
    }
""")'''

test_dict_to_sparql_basic()

PREFIX : <IA_ontology.owl>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT DISTINCT ?subject WHERE {
  ?subject rdf:type :Person .
}



This function prints the outcomes of the SPARQL query for the triple in a new dictionary key-item pair.

In [50]:
def check_triples_against_ontology(triples, onto):

    for triple in triples:
        query = dict_to_sparql(triple, onto.base_iri)
        # print(f"Query:\n{query}")
        try:
            rows = list(default_world.sparql(query))
            sparql_output_list = []
            if rows:
                # print("Output:")
                for row in rows:
                    for x in row:
                        sparql_output_list.append(x.name)
            triple["sparql_output"] = sparql_output_list
        except Exception as e:
            pass
        #        print("-" * 40)
    return triples

triples = [
    {
      "subj": "Amelia",
      "pred": "rdf:type",
      "obj": "Person"
    }
]

results = check_triples_against_ontology(triples, onto)
results

[{'subj': 'Amelia',
  'pred': 'rdf:type',
  'obj': 'Person',
  'sparql_output': ['Adam', 'Daan', 'Jan']}]

This helper function will check whether the subject is in the output of the SPARQL query.

In [51]:
def scan_unvalid_triples(triples):
    invalid_triples = []
    for triple in triples:
        if triple['sparql_output']:
            if triple["subj"] in triple["sparql_output"]:
                triple["valid"] = True
            else:
                triple["valid"] = False
                invalid_triples.append(triple)
    return invalid_triples


triples = [{'subj': 'Amelia',
  'pred': 'rdf:type',
  'obj': 'Person',
  'sparql_output': ['Adam',
   'Daan',
   'Jan',
   'Petra',
   'Elizabeth',
   'Jasmin',
   'Jennifer']}]
scan_unvalid_triples(triples)

[{'subj': 'Amelia',
  'pred': 'rdf:type',
  'obj': 'Person',
  'sparql_output': ['Adam',
   'Daan',
   'Jan',
   'Petra',
   'Elizabeth',
   'Jasmin',
   'Jennifer'],
  'valid': False}]

This LLM-driven function checks the inconsistency. And, with help of the ontology in the context, makes the conflicts clearer and gives explanation and suggestion for resolution.

In [52]:
from pydantic import BaseModel, Field

def get_conflicts(ontology_summary, invalid_triples_str, valid_triples_str):

    client = OpenAI()

    class Conflict(BaseModel):
        conflict: str = Field(description="The conflict that is found in the text chunk.")
        explanation: str = Field(description="The explanation for the conflict.")
        resolution: str = Field(description="The resolution for the conflict.")

    class Output(BaseModel):
        output: list[Conflict]

    response = client.responses.parse(
        model="gpt-5-nano",
        reasoning={"effort": "low"},
        instructions="""

        -

        """,
        input=f"""    We have seen for this text chunk the following inconsistencies with the ontology.

        chunk: {chunk}

        inconsistencies: {invalid_triples_str}

        consistent triples: {valid_triples_str}

        Please look at the ontology report and confirm or reject the inconsistencies.
        ontology_context: {ontology_summary}""",
        text_format=Output
    )

    data = json.loads(response.output_text)
    return data

# ontology_summary = get_ontology_summary()
# unvalid_triples = scan_unvalid_triples(results)
# invalid_triples_str = ""
# for triple in unvalid_triples:
#   invalid_triples_str += f"sub: {triple['subj']}, pred: {triple['pred']}, obj: {triple['obj']}\n"



# # take the valid triples
# valid_triples = [triple for triple in results if triple['valid']]
# conflicts = get_conflicts(ontology_summary, invalid_triples_str)
# conflicts


In [57]:
def check_conflicts(state: InbetweenState) -> InbetweenState:
    """Check for inconsistencies in the story."""
    onto = get_ontology("IA_ontology.owl").load()

    conflicts = {}

    for chunk_name, chunk_triples in result['extract'].items():
    
      results = check_triples_against_ontology(chunk_triples, onto)
      unvalid_triples = scan_unvalid_triples(results)
      invalid_triples_str = ""
      # make a string of the invalid triples
      for triple in unvalid_triples:
        invalid_triples_str += f"sub: {triple['subj']}, pred: {triple['pred']}, obj: {triple['obj']}\n"

      for triple in results:
        print(f"triple: {triple}")


      # make a string of the valid triples
      '''valid_triples = [triple for triple in results if triple['valid']]
      valid_triples_str = ""
      for triple in valid_triples:
        valid_triples_str += f"sub: {triple['subj']}, pred: {triple['pred']}, obj: {triple['obj']}\n"'''


      ontology_summary = get_ontology_summary()
      conflics_for_chunk = get_conflicts(ontology_summary, invalid_triples_str, valid_triples_str)
      conflicts[chunk_name] = conflics_for_chunk

    
    return {"conflicts": conflicts}



In [58]:
state = {
  "extract": {
    "chunk_0": [
      {
        "subj": "Adam",
        "pred": "hasOccupation",
        "obj": "Baker"
      },
      {
        "subj": "Baker",
        "pred": "requiresTool",
        "obj": "Oven"
      },
      {
        "subj": "Baker",
        "pred": "requiresTool",
        "obj": "Knife"
      },
      {
        "subj": "Croissant",
        "pred": "isCookedAs",
        "obj": "Baking"
      },
      {
        "subj": "Baguette",
        "pred": "isCookedAs",
        "obj": "Baking"
      },
      {
        "subj": "Paris",
        "pred": "famousForFood",
        "obj": "Croissant"
      },
      {
        "subj": "Paris",
        "pred": "famousForFood",
        "obj": "Baguette"
      },
      {
        "subj": "Croissant",
        "pred": "containsIngredient",
        "obj": "Flour"
      },
      {
        "subj": "Baguette",
        "pred": "containsIngredient",
        "obj": "Flour"
      },
      {
        "subj": "Adam",
        "pred": "hasName",
        "obj": "Adam"
      }
    ],
    "chunk_1": [
      {
        "subj": "Jan",
        "pred": "hasAllergy",
        "obj": "GlutenAllergy"
      },
      {
        "subj": "Jan",
        "pred": "hasTrait",
        "obj": "GoodHearted"
      },
      {
        "subj": "Jan",
        "pred": "isFriendOf",
        "obj": "Petra"
      },
      {
        "subj": "Walking",
        "pred": "usedBy",
        "obj": "Jan"
      },
      {
        "subj": "Jan",
        "pred": "attends",
        "obj": "FoodAcquiring"
      },
      {
        "subj": "QuietCity",
        "pred": "isQuiet",
        "obj": "Paris"
      }
    ],
    "chunk_2": [
      {
        "subj": "Adam",
        "pred": "hasName",
        "obj": "Adam"
      },
      {
        "subj": "Petra",
        "pred": "hasName",
        "obj": "Petra"
      }
    ],
    "chunk_3": []
  }
}

check_conflicts(state)


triple: {'subj': 'Adam', 'pred': 'hasOccupation', 'obj': 'Baker', 'sparql_output': []}
triple: {'subj': 'Baker', 'pred': 'requiresTool', 'obj': 'Oven', 'sparql_output': []}
triple: {'subj': 'Baker', 'pred': 'requiresTool', 'obj': 'Knife', 'sparql_output': []}
triple: {'subj': 'Croissant', 'pred': 'isCookedAs', 'obj': 'Baking', 'sparql_output': []}
triple: {'subj': 'Baguette', 'pred': 'isCookedAs', 'obj': 'Baking', 'sparql_output': []}
triple: {'subj': 'Paris', 'pred': 'famousForFood', 'obj': 'Croissant', 'sparql_output': []}
triple: {'subj': 'Paris', 'pred': 'famousForFood', 'obj': 'Baguette', 'sparql_output': []}
triple: {'subj': 'Croissant', 'pred': 'containsIngredient', 'obj': 'Flour', 'sparql_output': []}
triple: {'subj': 'Baguette', 'pred': 'containsIngredient', 'obj': 'Flour', 'sparql_output': []}
triple: {'subj': 'Adam', 'pred': 'hasName', 'obj': 'Adam', 'sparql_output': []}


NameError: name 'valid_triples_str' is not defined

# Rewrite

