## Cognee GraphRAG with Multimedia files

## Load Data

We will use a few sample multimedia files which we have on GitHub for easy access.

In [1]:
import os
import pathlib

# cognee knowledge graph will be created based on the text
# and description of these files
mp3_file_path = os.path.join(
    os.path.abspath(""),
    "../",
    "examples/data/multimedia/text_to_speech.mp3",
)
png_file_path = os.path.join(
    os.path.abspath(""),
    "../",
    "examples/data/multimedia/example.png",
)

## Set environment variables

In [2]:
import os

if "LLM_API_KEY" not in os.environ:
    os.environ["LLM_API_KEY"] = ""

# "neo4j" or "networkx"
os.environ["GRAPH_DATABASE_PROVIDER"] = "kuzu"
# Not needed if using networkx
# os.environ["GRAPH_DATABASE_URL"]=""
# os.environ["GRAPH_DATABASE_USERNAME"]=""
# os.environ["GRAPH_DATABASE_PASSWORD"]=""

# "pgvector", "qdrant", "weaviate" or "lancedb"
os.environ["VECTOR_DB_PROVIDER"] = "lancedb"
# Not needed if using "lancedb" or "pgvector"
# os.environ["VECTOR_DB_URL"]=""
# os.environ["VECTOR_DB_KEY"]=""

# Relational Database provider "sqlite" or "postgres"
os.environ["DB_PROVIDER"] = "sqlite"

# Database name
os.environ["DB_NAME"] = "cognee_db"

# Postgres specific parameters (Only if Postgres or PGVector is used)
# os.environ["DB_HOST"]="127.0.0.1"
# os.environ["DB_PORT"]="5432"
# os.environ["DB_USERNAME"]="cognee"
# os.environ["DB_PASSWORD"]="cognee"

In [3]:
import cognee
print(cognee.__version__)


[2m2025-10-22T17:58:21.914432[0m [[32m[1minfo     [0m] [1mDeleted old log file: /Users/daulet/Desktop/dev/cognee-claude/logs/2025-10-22_18-20-40.log[0m [[0m[1m[34mcognee.shared.logging_utils[0m][0m

[2m2025-10-22T17:58:22.759223[0m [[32m[1minfo     [0m] [1mLogging initialized           [0m [[0m[1m[34mcognee.shared.logging_utils[0m][0m [36mcognee_version[0m=[35m0.3.6-local[0m [36mdatabase_path[0m=[35m/Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases[0m [36mgraph_database_name[0m=[35m[0m [36mos_info[0m=[35m'Darwin 24.5.0 (Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132)'[0m [36mpython_version[0m=[35m3.10.11[0m [36mrelational_config[0m=[35mcognee_db[0m [36mstructlog_version[0m=[35m25.4.0[0m [36mvector_config[0m=[35mlancedb[0m

[2m2025-10-22T17:58:22.759643[0m [[32m[1minfo     [0m] [1mDatabase storage: /Users/daulet/Desktop/dev/cognee-claude/cognee/.co

0.3.6-local


## Run Cognee with multimedia files

In [4]:
import cognee

# Create a clean slate for cognee -- reset data and system state
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)

# Add multimedia files and make them available for cognify
await cognee.add([mp3_file_path, png_file_path])

# Create knowledge graph with cognee
await cognee.cognify()


[2m2025-10-22T17:58:24.045051[0m [[32m[1minfo     [0m] [1mLoaded JSON extension         [0m [[0m[1m[34mcognee.shared.logging_utils[0m][0m

[2m2025-10-22T17:58:24.081025[0m [[32m[1minfo     [0m] [1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu[0m [[0m[1m[34mcognee.shared.logging_utils[0m][0m

[2m2025-10-22T17:58:26.937024[0m [[32m[1minfo     [0m] [1mDatabase deleted successfully.[0m [[0m[1m[34mcognee.shared.logging_utils[0m][0m

[1mStorage manager absolute path: /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_cache[0m

[1mDeleting cache...             [0m

[1m✓ Cache deleted successfully! [0m


User 5c6da0e1-4bda-4b32-a6e3-ca70b884fb9a has registered.



[2m2025-10-22T17:58:28.397580[0m [[32m[1minfo     [0m] [1mPipeline run started: `981301fd-9699-5cd2-9746-577c0076b844`[0m [[0m[1m[34mrun_tasks_with_telemetry()[0m][0m

[2m2025-10-22T17:58:28.398001[0m [[32m[1minfo     [0m] [1mCoroutine task started: `resolve_data_directories`[0m [[0m[1m[34mrun_tasks_base[0m][0m

[2m2025-10-22T17:58:28.398362[0m [[32m[1minfo     [0m] [1mCoroutine task started: `ingest_data`[0m [[0m[1m[34mrun_tasks_base[0m][0m

[2m2025-10-22T17:58:28.399412[0m [[32m[1minfo     [0m] [1mPipeline run started: `981301fd-9699-5cd2-9746-577c0076b844`[0m [[0m[1m[34mrun_tasks_with_telemetry()[0m][0m

[2m2025-10-22T17:58:28.399724[0m [[32m[1minfo     [0m] [1mCoroutine task started: `resolve_data_directories`[0m [[0m[1m[34mrun_tasks_base[0m][0m

[2m2025-10-22T17:58:28.400149[0m [[32m[1minfo     [0m] [1mCoroutine task started: `ingest_data`[0m [[0m[1m[34mrun_tasks_base[0m][0m

[2m2025-10-22T17:58:28.414674[0

{UUID('849137b0-173d-5a0f-9462-403398a3b1e2'): PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('8f4e8447-24c9-5d2a-afb2-f86256ca4f34'), dataset_id=UUID('849137b0-173d-5a0f-9462-403398a3b1e2'), dataset_name='main_dataset', payload=None, data_ingestion_info=[{'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('8f4e8447-24c9-5d2a-afb2-f86256ca4f34'), dataset_id=UUID('849137b0-173d-5a0f-9462-403398a3b1e2'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('cc1ec4a6-2621-5143-ad19-ae7703db040b')}, {'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('8f4e8447-24c9-5d2a-afb2-f86256ca4f34'), dataset_id=UUID('849137b0-173d-5a0f-9462-403398a3b1e2'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('f3d53fbe-2a29-57e4-9e55-d87a49890ecc')}])}

## Query Cognee for summaries related to multimedia files

In [5]:
from cognee.api.v1.search import SearchType

# Query cognee for summaries of the data in the multimedia files
search_results = await cognee.search(
    query_type=SearchType.SUMMARIES,
    query_text="What is in the multimedia files?",
)

# Display search results
for result_text in search_results:
    print(result_text)


[2m2025-10-22T17:58:43.213961[0m [[32m[1minfo     [0m] [1mStarting summary retrieval for query: 'What is in the multimedia files?'[0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-10-22T17:58:43.495466[0m [[32m[1minfo     [0m] [1mFound 2 summaries from vector search[0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-10-22T17:58:43.496119[0m [[32m[1minfo     [0m] [1mReturning 2 summary payloads  [0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-10-22T17:58:43.496456[0m [[32m[1minfo     [0m] [1mStarting completion generation for query: 'What is in the multimedia files?'[0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-10-22T17:58:43.496815[0m [[32m[1minfo     [0m] [1mReturning context with 2 item(s)[0m [[0m[1m[34mSummariesRetriever[0m][0m


{'id': 'b4da8f65-1ab7-5816-b6ca-c3b7e16d7ea9', 'created_at': 1761155918667, 'updated_at': 1761155918667, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': 'Changing a light bulb is a hardware issue for programmers.'}
{'id': '875f97da-6b05-52af-973d-54939a229a21', 'created_at': 1761155922404, 'updated_at': 1761155922404, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': 'How many coders are needed to replace a light bulb? Zero. That’s an issue for hardware.'}


In [None]:
# Only exit in interactive mode, not during GitHub Actions
import os

# Skip exit if we're running in GitHub Actions
if not os.environ.get('GITHUB_ACTIONS'):
    print("Exiting kernel to clean up resources...")
    os._exit(0)
else:
    print("Skipping kernel exit - running in GitHub Actions")

: 