# LightRAG Quick Start Jupyter Notebook

This notebook demonstrates how to use LightRAG with your own data.

In [8]:
# !pip install "lightrag-hku"
%pip install -U faiss-cpu python-dotenv "lightrag-hku" neo4j sentence-transformers python-dotenv pytest faiss-cpu accelerate bitsandbytes

Collecting lightrag-hku
  Downloading lightrag_hku-1.4.9.2-py3-none-any.whl.metadata (80 kB)
Downloading lightrag_hku-1.4.9.2-py3-none-any.whl (3.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.7/3.7 MB[0m [31m44.9 kB/s[0m eta [36m0:00:00[0ma [36m0:00:04[0m
[?25hInstalling collected packages: lightrag-hku
  Attempting uninstall: lightrag-hku
    Found existing installation: lightrag-hku 1.4.9.1
    Uninstalling lightrag-hku-1.4.9.1:
      Successfully uninstalled lightrag-hku-1.4.9.1
Successfully installed lightrag-hku-1.4.9.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [9]:
%pip install -U bitsandbytes torch httpx


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [18]:
from ingestion import initialize_rag, index_file
from retrieve import run_async_query
from dotenv import load_dotenv
import asyncio
load_dotenv()



True

In [19]:
async def print_stream(stream):
    async for chunk in stream:
        print(chunk, end="", flush=True)


In [20]:
from lightrag import  QueryParam
import inspect
async def main(question: str, mode, data_path: str = "data/data.txt") -> None:
    """
    1. Initialize RAG
    2. Index file (open file, read file, chunking, stream each chunk to both vector store and knowldege graph) all being done by rag.ainsert().
    3. Run async queries
    """
    rag = await initialize_rag()
    
    # Test embedding function
    test_text = ["This is a test string for embedding."]
    embedding = await rag.embedding_func(test_text)
    embedding_dim = embedding.shape[1]
    print("\n=======================")
    print("Test embedding function")
    print("========================")
    print(f"Test dict: {test_text}")
    print(f"Detected embedding dimension: {embedding_dim}\n\n")    

    await index_file(rag, data_path) # this function wait here until all files be

    # Perform local search
    print("\n=====================")
    print("Query mode: local")
    print("=====================")
    resp = await rag.aquery(
            "What are the top themes in this data?",
            param=QueryParam(mode="local", stream=True),
        )
    if inspect.isasyncgen(resp):
            await print_stream(resp)
    else:
            print(resp)
    # run query
    resp_async = await run_async_query(rag, question, mode)
    print("\n===== Query Result =====\n")
    print(resp_async)

  

In [21]:
question = "Extract the Escrow Agent name and phone number?"
mode="hybrid"
asyncio.run(main(question=question, mode=mode))

INFO: [_] Created new empty graph fiel: ./rag_storage/graph_chunk_entity_relation.graphml
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './rag_storage/vdb_entities.json'} 0 data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './rag_storage/vdb_relationships.json'} 0 data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './rag_storage/vdb_chunks.json'} 0 data
INFO: Embedding func: 8 new workers initialized (Timeouts: Func: 30s, Worker: 60s, Health Check: 75s)
INFO: Reset 2 documents from PROCESSING/FAILED to PENDING status
INFO: Processing 2 document(s)
INFO: Extracting stage 1/2: data/data_2.txt
INFO: Processing d-id: doc-e922ef7ae031528373ad6f0a33b25d1d
INFO: Extracting stage 2/2: data/data_2.txt
INFO: Processing d-id: doc-9c4432771f62bbc9695b81325e5f4435



Test embedding function
Test dict: ['This is a test string for embedding.']
Detected embedding dimension: 768




ERROR: Traceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/lightrag/utils.py", line 835, in wait_func
    return await future
           ^^^^^^^^^^^^
  File "/usr/local/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/futures.py", line 287, in __await__
    yield self  # This tells Task to wait for completion.
    ^^^^^^^^^^
  File "/usr/local/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/tasks.py", line 349, in __wakeup
    future.result()
  File "/usr/local/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/futures.py", line 203, in result
    raise self._exception.with_traceback(self._exception_tb)
lightrag.utils.WorkerTimeoutError: Worker execution timeout after 60s

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/lightra


Query mode: local


INFO: Query nodes: Analysis methods, Key findings, Trends, Patterns (top_k:40, cosine:0.2)
ERROR: Query failed: shapes (0,1024) and (768,) not aligned: 1024 (dim 1) != 768 (dim 0)
INFO: Query nodes: Name, Phone number (top_k:5, cosine:0.2)


None


ERROR: Query failed: shapes (0,1024) and (768,) not aligned: 1024 (dim 1) != 768 (dim 0)



===== Query Result =====

None
