In [None]:
!pip install owlready2

# Graphrag Ontology Integration Demo

This demonstration shows how graphrag works both with and without ontology integration, highlighting the differences, benefits, and practical applications of using ontological knowledge in a knowledge graph system.

## Overview

In this demo, we'll:
1. Process data without ontology integration
2. Process the same data with ontology integration
3. Compare search results between the two approaches
4. Visualize the differences in knowledge graphs

## 1. Setup and Environment



First, let's set up our environment with the necessary imports:

In [16]:
import os
import asyncio
import pathlib
from typing import List

# Import Cognee utilities
from utils import (
setup_logging, 
visualize_graph,
get_datasets,
get_dataset_data,
prune_data,
prune_system,
add,
search,
SearchType,
get_default_user,
KnowledgeGraph,
add_data_points
)

# Import the ontology handling functions
from ontology_demo import (
owl_testing_pipeline,
owl_ontology_merging_layer
)

import logging
setup_logging(logging.INFO)
from cognee.tasks.graph import extract_graph_from_data




## 2. Data Preparation

We'll use the same test data for both approaches:

In [17]:
async def prepare_data():
    # Clean previous data
    await prune_data()
    await prune_system(metadata=True)
    
    # Add test data - the path should point to your data files
    current_dir = os.getcwd()
    parent_dir = os.path.dirname(current_dir)
    file_path = os.path.join(parent_dir, "ontology_test_input")
    # file_path = os.path.join(
    #     os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)),
    #     "ontology_test_input"
    # )
    await add(file_path)
    
    print("Data prepared successfully")


## 3. Standard Knowledge Graph Processing (Without Ontology)



Let's process our data using the standard graphrag pipeline without ontology integration:

In [18]:
async def process_without_ontology():
    # Get the dataset to process
    user = await get_default_user()
    datasets = await get_datasets(user.id)
    
    if not datasets:
        print("No datasets found!")
        return
    
    # Use the standard pipeline
    from utils import (
        run_tasks,
        Task,
        classify_documents,
        check_permissions_on_documents,
        extract_chunks_from_documents,
        extract_content_graph,
        get_max_chunk_tokens
    )
    
    for dataset in datasets:
        data_documents = await get_dataset_data(dataset_id=dataset.id)

        
        tasks = [
            Task(classify_documents),
            Task(check_permissions_on_documents, user=user, permissions=["write"]),
            Task(extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()),
            Task(
                extract_graph_from_data, graph_model=KnowledgeGraph, task_config={"batch_size": 10}
            ),  # Generate knowledge graphs from the document chunks.
            Task(add_data_points, task_config={"batch_size": 10}),
        ]
        
        
        pipeline_run = run_tasks(tasks, dataset.id, data_documents, "standard_pipeline")
        
        async for run_status in pipeline_run:
            print(run_status)
    
    # Save graph visualization
    notebook_dir = pathlib.Path.cwd()
    output_dir = notebook_dir / ".artifacts"
    os.makedirs(output_dir, exist_ok=True)
    
    standard_graph_path = (output_dir / "standard_graph_visualization.html").resolve()
    await visualize_graph(str(standard_graph_path))
    
    print(f"Standard graph saved to: {standard_graph_path}")
    return standard_graph_path

In [19]:
import pathlib
import os
from utils  import visualize_graph

# Use the current working directory instead of __file__:
notebook_dir = pathlib.Path.cwd()

graph_file_path = (notebook_dir / ".artifacts" / "graph_visualization.html").resolve()

# Make sure to convert to string if visualize_graph expects a string
b = await visualize_graph(str(graph_file_path))

2025-02-25 22:13:51,836 - INFO - Graph visualization saved as /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/graph_visualization.html
2025-02-25 22:13:51,836 - INFO - The HTML file has been stored at path: /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/graph_visualization.html


## 4. Ontology-Enhanced Processing


Now, let's process the same data with ontology integration:

In [20]:

async def process_with_ontology():
    # This uses the owl_testing_pipeline from ontology_demo.py
    await owl_testing_pipeline()
    
    # Save graph visualization
    notebook_dir = pathlib.Path.cwd()
    output_dir = notebook_dir / ".artifacts"
    os.makedirs(output_dir, exist_ok=True)
    
    ontology_graph_path = (output_dir / "ontology_graph_visualization.html").resolve()
    await visualize_graph(str(ontology_graph_path))
    
    print(f"Ontology-enhanced graph saved to: {ontology_graph_path}")
    return ontology_graph_path

## 5. Comparing Search Results


Let's execute some queries to compare the results:

In [21]:

async def compare_search_results():
    # Sample queries for testing
    queries = [
        "What are the exact cars produced by Audi and what are their types?",
        "What features do luxury cars have?",
        "Tell me about vehicle manufacturers and their relationships"
    ]
    
    print("==== STANDARD KNOWLEDGE GRAPH SEARCH RESULTS ====")
    # First, search using the standard graph
    await prune_data(keep_dataset=True)  # Keep dataset but remove processing results
    await process_without_ontology()
    
    for query in queries:
        print(f"\nQuery: {query}")
        results = await search(query_type=SearchType.GRAPH_COMPLETION, query_text=query)
        print("Results:")
        for i, result in enumerate(results[:3]):
            print(f"{i+1}. {result}")
    
    print("\n==== ONTOLOGY-ENHANCED KNOWLEDGE GRAPH SEARCH RESULTS ====")
    # Now, search using the ontology-enhanced graph
    await prune_data(keep_dataset=True)  # Keep dataset but remove processing results
    await process_with_ontology()
    
    for query in queries:
        print(f"\nQuery: {query}")
        results = await search(query_type=SearchType.GRAPH_COMPLETION, query_text=query)
        print("Results:")
        for i, result in enumerate(results[:3]):
            print(f"{i+1}. {result}")



## 6. Key Differences and Benefits

### Without Ontology:
- **Knowledge is limited to extracted information**: Only relationships and entities explicitly mentioned in the text are captured
- **No hierarchical understanding**: Lacks class/subclass relationships unless explicitly stated
- **Missing implicit connections**: Cannot infer relationships that weren't explicitly stated
- **Domain knowledge is limited**: No external domain knowledge beyond the processed content

### With Ontology:
- **Enhanced semantic understanding**: Integration with domain ontologies provides richer semantic context
- **Hierarchical relationships**: Class/subclass relationships from the ontology enrich the graph
- **Inference capabilities**: Can infer relationships based on ontological axioms
- **Domain knowledge enrichment**: External knowledge from the ontology supplements extracted information
- **Standardized terminology**: Entities are mapped to standardized ontology concepts
- **Better query answering**: More comprehensive answers due to extended knowledge

## 7. Visualizations and Metrics

Here are some key metrics to observe in the visualizations:

1. **Node count**: The ontology-enhanced graph typically has more nodes
2. **Edge density**: More connections between nodes in the ontology version
3. **Clustering coefficient**: Often higher in the ontology version due to richer relationships
4. **Average path length**: May be shorter in the ontology version due to additional connections
5. **Connected components**: The ontology version usually has fewer isolated subgraphs

## 8. Running the Demo

Execute the following to run the complete demo:



In [22]:


print("Starting Ontology Comparison Demo")
await prepare_data()

print("\nProcessing without ontology...")
standard_graph = await process_without_ontology()

print("\nProcessing with ontology...")
ontology_graph = await process_with_ontology()

print("\nComparing search results...")
await compare_search_results()

print("\nDemo completed!")
print(f"Standard graph visualization: {standard_graph}")
print(f"Ontology graph visualization: {ontology_graph}")




Starting Ontology Comparison Demo
2025-02-25 22:14:07,451 - INFO - Graph deleted successfully.
2025-02-25 22:14:07,456 - INFO - Database deleted successfully.
User d4017493-773a-447e-b66f-9b186b6e4301 has registered.
<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x121a69390>
2025-02-25 22:14:07,532 - INFO - Pipeline run started: `4b84e400-23fc-5976-bbb4-f8ee303eed81`
2025-02-25 22:14:07,533 - INFO - Coroutine task started: `resolve_data_directories`
2025-02-25 22:14:07,533 - INFO - Coroutine task started: `ingest_data`
2025-02-25 22:14:07,761 - INFO - Coroutine task completed: `ingest_data`
2025-02-25 22:14:07,761 - INFO - Coroutine task completed: `resolve_data_directories`
2025-02-25 22:14:07,761 - INFO - Pipeline run completed: `4b84e400-23fc-5976-bbb4-f8ee303eed81`
<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x121780650>
Data prepared successfully

Processing without ontology...
<cognee.modules.pipelines.models.PipelineRun.PipelineRun obj

[92m22:14:19 - LiteLLM:INFO[0m: utils.py:2784 - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini

2025-02-25 22:14:19,678 - INFO - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini


[92m22:14:19 - LiteLLM:INFO[0m: utils.py:2784 - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini

2025-02-25 22:14:19,681 - INFO - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini
2025-02-25 22:14:24,787 - INFO - Coroutine task started: `add_data_points`
2025-02-25 22:14:26,201 - INFO - Coroutine task completed: `add_data_points`
2025-02-25 22:14:26,201 - INFO - Coroutine task completed: `extract_graph_from_data`
2025-02-25 22:14:26,202 - INFO - Async generator task completed: `extract_chunks_from_documents`
2025-02-25 22:14:26,202 - INFO - Coroutine task completed: `check_permissions_on_documents`
2025-02-25 22:14:26,202 - INFO - Coroutine task completed: `classify_documents`
2025-02-25 22:14:26,202 - INFO - Pipeline run completed: `ea9bd1d8-7bd9-5908-a88b-1192a13ed265`
<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x121f21c90>
2025-02-25 22:14:26,211 - INFO - Graph visualization saved as /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/standard_graph_visualization.html
2025-02-25 22:14:26,211 - INFO - The HTML fi

[92m22:14:37 - LiteLLM:INFO[0m: utils.py:2784 - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini

2025-02-25 22:14:37,864 - INFO - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini


[92m22:14:37 - LiteLLM:INFO[0m: utils.py:2784 - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini

2025-02-25 22:14:37,866 - INFO - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini
2025-02-25 22:14:49,526 - INFO - Coroutine task completed: `owl_ontology_merging_layer`
2025-02-25 22:14:49,526 - INFO - Async generator task completed: `extract_chunks_from_documents`
2025-02-25 22:14:49,526 - INFO - Coroutine task completed: `check_permissions_on_documents`
2025-02-25 22:14:49,526 - INFO - Coroutine task completed: `classify_documents`
2025-02-25 22:14:49,527 - INFO - Pipeline run completed: `af81ab41-8243-522f-a10a-b7b5febcc577`
<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x1228a0d50>
The query is What are the exact cars produced by Audi and what are their types?:


[92m22:14:50 - LiteLLM:INFO[0m: utils.py:2784 - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini

2025-02-25 22:14:50,329 - INFO - 
LiteLLM completion() model= gemini-1.5-flash; provider = gemini
{"Audi_cars": ["Audi_etron", "Audi_r8", "Audi_a8"]}
2025-02-25 22:14:50,914 - INFO - Graph visualization saved as /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/ontology_graph_visualization.html
2025-02-25 22:14:50,915 - INFO - The HTML file has been stored at path: /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/ontology_graph_visualization.html
Ontology-enhanced graph saved to: /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/ontology_graph_visualization.html

Comparing search results...
==== STANDARD KNOWLEDGE GRAPH SEARCH RESULTS ====


TypeError: prune_data() got an unexpected keyword argument 'keep_dataset'

* 'fields' has been removed
  from .autonotebook import tqdm as notebook_tqdm
INFO:cognee.modules.visualization.cognee_network_visualization:Graph visualization saved as /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/graph_visualization.htmlINFO:root:The HTML file has been stored at path: /Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo/.artifacts/graph_visualization.html

/Users/vasilije/cognee/cognee/ontology_testing_SANDBOX/Ontology_demo
