## Cognee GraphRAG with Multimedia files

## Load Data

We will use a few sample multimedia files which we have on GitHub for easy access.

In [1]:
import os
import pathlib

# cognee knowledge graph will be created based on the text
# and description of these files
mp3_file_path = os.path.join(
    os.path.abspath(""),
    "../",
    "examples/data/multimedia/text_to_speech.mp3",
)
png_file_path = os.path.join(
    os.path.abspath(""),
    "../",
    "examples/data/multimedia/example.png",
)

## Set environment variables

In [2]:
import os

if "LLM_API_KEY" not in os.environ:
    os.environ["LLM_API_KEY"] = ""

# "neo4j" or "networkx"
os.environ["GRAPH_DATABASE_PROVIDER"] = "kuzu"
# Not needed if using networkx
# os.environ["GRAPH_DATABASE_URL"]=""
# os.environ["GRAPH_DATABASE_USERNAME"]=""
# os.environ["GRAPH_DATABASE_PASSWORD"]=""

# "pgvector", "qdrant", "weaviate" or "lancedb"
os.environ["VECTOR_DB_PROVIDER"] = "lancedb"
# Not needed if using "lancedb" or "pgvector"
# os.environ["VECTOR_DB_URL"]=""
# os.environ["VECTOR_DB_KEY"]=""

# Relational Database provider "sqlite" or "postgres"
os.environ["DB_PROVIDER"] = "sqlite"

# Database name
os.environ["DB_NAME"] = "cognee_db"

# Postgres specific parameters (Only if Postgres or PGVector is used)
# os.environ["DB_HOST"]="127.0.0.1"
# os.environ["DB_PORT"]="5432"
# os.environ["DB_USERNAME"]="cognee"
# os.environ["DB_PASSWORD"]="cognee"

## Run Cognee with multimedia files

In [4]:
import cognee

# Create a clean slate for cognee -- reset data and system state
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)

# Add multimedia files and make them available for cognify
await cognee.add([mp3_file_path, png_file_path])

# Create knowledge graph with cognee
await cognee.cognify()


[2m2025-08-27T13:21:47.304571[0m [[32m[1minfo     [0m] [1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu[0m [[0m[1m[34mcognee.shared.logging_utils[0m][0m

[2m2025-08-27T13:21:47.739751[0m [[32m[1minfo     [0m] [1mDatabase deleted successfully.[0m [[0m[1m[34mcognee.shared.logging_utils[0m][0m

[1mLangfuse client is disabled since no public_key was provided as a parameter or environment variable 'LANGFUSE_PUBLIC_KEY'. See our docs: https://langfuse.com/docs/sdk/python/low-level-sdk#initialize-client[0m


User f5c66ce8-859b-44d4-941a-df6eee1f1d2a has registered.


[92m14:21:48 - LiteLLM:INFO[0m: utils.py:3341 - 
LiteLLM completion() model= gpt-5-mini; provider = openai

[1m
LiteLLM completion() model= gpt-5-mini; provider = openai[0m

[1mEmbeddingRateLimiter initialized: enabled=False, requests_limit=60, interval_seconds=60[0m

[2m2025-08-27T13:21:54.231053[0m [[32m[1minfo     [0m] [1mPipeline run started: `bb1e12db-8d3f-5e80-8615-2444eda4b32a`[0m [[0m[1m[34mrun_tasks_with_telemetry()[0m][0m

[2m2025-08-27T13:21:54.377156[0m [[32m[1minfo     [0m] [1mCoroutine task started: `resolve_data_directories`[0m [[0m[1m[34mrun_tasks_base[0m][0m

[2m2025-08-27T13:21:54.527056[0m [[32m[1minfo     [0m] [1mCoroutine task started: `ingest_data`[0m [[0m[1m[34mrun_tasks_base[0m][0m

[2m2025-08-27T13:21:54.687437[0m [[32m[1minfo     [0m] [1mRegistered loader: pypdf_loader[0m [[0m[1m[34mcognee.infrastructure.loaders.LoaderEngine[0m][0m

[2m2025-08-27T13:21:54.687879[0m [[32m[1minfo     [0m] [1mRegistered 

{UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'): PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1cde937f-ae7a-5151-a20c-dc3567bee0a9'), dataset_id=UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'), dataset_name='main_dataset', payload=None, data_ingestion_info=[{'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1cde937f-ae7a-5151-a20c-dc3567bee0a9'), dataset_id=UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('692741cd-46e5-5988-85e9-f3901d104b7e')}, {'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1cde937f-ae7a-5151-a20c-dc3567bee0a9'), dataset_id=UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('899de74a-1bef-5afd-a478-1ea944503514')}])}

## Query Cognee for summaries related to multimedia files

In [5]:
from cognee.api.v1.search import SearchType

# Query cognee for summaries of the data in the multimedia files
search_results = await cognee.search(
    query_type=SearchType.SUMMARIES,
    query_text="What is in the multimedia files?",
)

# Display search results
for result_text in search_results:
    print(result_text)


[2m2025-08-27T13:23:56.768437[0m [[32m[1minfo     [0m] [1mStarting completion generation for query: 'What is in the multimedia files?'[0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-08-27T13:23:56.769790[0m [[32m[1minfo     [0m] [1mStarting summary retrieval for query: 'What is in the multimedia files?'[0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-08-27T13:23:57.168012[0m [[32m[1minfo     [0m] [1mFound 2 summaries from vector search[0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-08-27T13:23:57.168772[0m [[32m[1minfo     [0m] [1mReturning 2 summary payloads  [0m [[0m[1m[34mSummariesRetriever[0m][0m

[2m2025-08-27T13:23:57.169214[0m [[32m[1minfo     [0m] [1mReturning context with 2 item(s)[0m [[0m[1m[34mSummariesRetriever[0m][0m


{'id': '686b9e03-4505-56ec-9295-94c53d4db004', 'created_at': 1756301013713, 'updated_at': 1756301013713, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': "Programmer light-bulb joke: none — it's a hardware problem."}
{'id': '82343440-bd02-5149-ad3f-80289121146c', 'created_at': 1756300957894, 'updated_at': 1756300957894, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': "Programmers don't change light bulbs — that's a hardware problem."}


In [None]:
import os
os._exit(0)

: 