<a href="https://colab.research.google.com/github/nvv1d/Auto-GPT/blob/master/LightRAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

# ============================================================
# Cell 1: Install Dependencies
# ============================================================
!pip install lightrag-hku google-generativeai pypdf sentence-transformers

print("✅ Installation Complete.")



Collecting lightrag-hku
  Downloading lightrag_hku-1.4.9.8-py3-none-any.whl.metadata (83 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/83.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.8/83.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting pypdf
  Downloading pypdf-6.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting configparser (from lightrag-hku)
  Downloading configparser-7.2.0-py3-none-any.whl.metadata (5.5 kB)
Collecting json_repair (from lightrag-hku)
  Downloading json_repair-0.54.1-py3-none-any.whl.metadata (12 kB)
Collecting nano-vectordb (from lightrag-hku)
  Downloading nano_vectordb-0.0.4.3-py3-none-any.whl.metadata (3.7 kB)
Collecting pipmaster (from lightrag-hku)
  Downloading pipmaster-1.0.10-py3-none-any.whl.metadata (13 kB)
Collecting pypinyin (from lightrag-hku)
  Downloading pypinyin-0.55.0-py2.py3-none-any.whl.metadata (12 kB)
Collecting xlsxwriter>=3.1.0 (from lightrag

In [2]:
# ============================================================
# Cell 2: Mount Drive & Configure Storage
# ============================================================
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Setup Directories
WORKING_DIR = "/content/drive/MyDrive/LightRAG_Storage"
INPUT_DOCS_DIR = "/content/drive/MyDrive/Research_Papers"

if not os.path.exists(WORKING_DIR):
    os.makedirs(WORKING_DIR)
    print(f"✅ Created storage directory: {WORKING_DIR}")

if not os.path.exists(INPUT_DOCS_DIR):
    os.makedirs(INPUT_DOCS_DIR)
    print(f"⚠️  Created input directory: {INPUT_DOCS_DIR}")
    print(f"📁 Please upload your PDFs/TXT files to '{INPUT_DOCS_DIR}' in Google Drive.")
else:
    print(f"✅ Storage connected. Reading papers from: {INPUT_DOCS_DIR}")


Mounted at /content/drive
✅ Storage connected. Reading papers from: /content/drive/MyDrive/Research_Papers


In [6]:
# ============================================================
# Cell 3: Configure LOCAL Models (No API needed!)
# ============================================================
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from lightrag import LightRAG, QueryParam
from lightrag.utils import EmbeddingFunc
from lightrag.llm.hf import hf_model_complete
import numpy as np

print("🧹 Clearing GPU memory...")
torch.cuda.empty_cache()

# Model Selection - Choose based on your needs:

# OPTION 1: Qwen2.5-3B (RECOMMENDED - Best balance)
LLM_MODEL = "Qwen/Qwen2.5-3B-Instruct"  # ~4GB GPU, good quality

# OPTION 2: Qwen2.5-1.5B (Fastest, lighter)
# LLM_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"  # ~2GB GPU, acceptable quality

# OPTION 3: Qwen2.5-7B (Best quality, needs more memory)
# LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"  # ~8-9GB GPU, best quality

EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

print(f"⏳ Loading LLM: {LLM_MODEL}")
print("   (This takes 2-3 minutes on first run...)")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)

# Load embedding model
print(f"⏳ Loading embedding model: {EMBEDDING_MODEL}")
embed_model = SentenceTransformer(EMBEDDING_MODEL)

print("✅ Models loaded!")

# Custom embedding function for SentenceTransformer
async def local_embed(texts):
    """Async wrapper for SentenceTransformer embedding"""
    import asyncio
    loop = asyncio.get_event_loop()
    embeddings = await loop.run_in_executor(
        None,
        lambda: embed_model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
    )
    return embeddings

# Initialize LightRAG with local models
print("⏳ Initializing LightRAG...")
rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=hf_model_complete,
    llm_model_name=LLM_MODEL,
    llm_model_max_async=1,  # Single worker for stability
    llm_model_kwargs={
        "device_map": "auto",
        "torch_dtype": torch.float16,  # Use float16 without quantization
        "max_new_tokens": 512,
        "do_sample": False,  # Deterministic output
    },
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
        max_token_size=512,
        func=local_embed  # Use our custom async wrapper
    ),
    # Optimized settings for smaller model
    chunk_token_size=800,
    chunk_overlap_token_size=100,
    entity_extract_max_gleaning=0,  # Skip refinement for speed
)

# Initialize storage
from lightrag.kg.shared_storage import initialize_pipeline_status

await rag.initialize_storages()
await initialize_pipeline_status()

print("\n" + "="*60)
print("✅ LightRAG System Online!")
print("="*60)
print(f"🖥️  LLM: {LLM_MODEL} (float16)")
print(f"🖥️  Embeddings: {EMBEDDING_MODEL} (local)")
print(f"💾 GPU Memory: ~{torch.cuda.memory_allocated()/1024**3:.1f}GB / 15GB")
print(f"⚡ All processing runs locally - no API limits!")
print("="*60)

🧹 Clearing GPU memory...
⏳ Loading LLM: Qwen/Qwen2.5-3B-Instruct
   (This takes 2-3 minutes on first run...)
⏳ Loading embedding model: sentence-transformers/all-MiniLM-L6-v2


INFO: [_] Loaded graph from /content/drive/MyDrive/LightRAG_Storage/graph_chunk_entity_relation.graphml with 0 nodes, 0 edges


✅ Models loaded!
⏳ Initializing LightRAG...

✅ LightRAG System Online!
🖥️  LLM: Qwen/Qwen2.5-3B-Instruct (float16)
🖥️  Embeddings: sentence-transformers/all-MiniLM-L6-v2 (local)
💾 GPU Memory: ~0.2GB / 15GB
⚡ All processing runs locally - no API limits!


In [7]:
# ============================================================
# Cell 4: Index Documents
# ============================================================
import glob
from pypdf import PdfReader

async def index_papers():
    """Index all documents in the INPUT_DOCS_DIR"""
    files = glob.glob(f"{INPUT_DOCS_DIR}/*.txt") + \
            glob.glob(f"{INPUT_DOCS_DIR}/*.pdf")

    if not files:
        print("❌ No files found! Please upload PDFs or TXT files to:")
        print(f"   {INPUT_DOCS_DIR}")
        return

    print(f"📚 Found {len(files)} documents")
    indexed_count = 0

    for idx, file_path in enumerate(files, 1):
        file_name = os.path.basename(file_path)
        print(f"[{idx}/{len(files)}] Processing: {file_name}...")

        try:
            content = ""
            if file_path.endswith(".pdf"):
                reader = PdfReader(file_path)
                for page in reader.pages:
                    text = page.extract_text()
                    if text:
                        content += text + "\n"
            else:
                with open(file_path, "r", encoding="utf-8", errors='ignore') as f:
                    content = f.read()

            if len(content) > 100:
                await rag.ainsert(content)
                indexed_count += 1
                print(f"  ✅ Indexed successfully")
            else:
                print(f"  ⚠️  Skipped (content too short)")

        except Exception as e:
            print(f"  ❌ Error: {e}")

    print(f"\n🎉 Indexing Complete! Processed {indexed_count}/{len(files)} documents")
    print(f"💾 Knowledge graph saved to: {WORKING_DIR}")

# Run indexing
# NOTE: Only run this cell when you add new documents!
await index_papers()

INFO: Reset 1 documents from PROCESSING/FAILED to PENDING status
INFO: Processing 1 document(s)
INFO: Extracting stage 1/1: unknown_source
INFO: Processing d-id: doc-14988456e477bf51ec3488b3f381162d
INFO: Embedding func: 8 new workers initialized (Timeouts: Func: 30s, Worker: 60s, Health Check: 75s)


📚 Found 1 documents
[1/1] Processing: Zotero Report.txt...


INFO: LLM func: 1 new workers initialized (Timeouts: Func: 180s, Worker: 360s, Health Check: 375s)


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
INFO:  == LLM cache == saving: default:extract:b94159f4994399ba99c570e6540cd095
INFO: Chunk 1 of 112 extracted 12 Ent + 0 Rel chunk-00850b1c3f948a71fe7f1406ac0db5d6
INFO:  == LLM cache == saving: default:extract:0aac8c60f3c1f2b33813f5044e24b7bb
INFO: Chunk 2 of 112 extracted 13 Ent + 0 Rel chunk-af514b5d5341cdd0373165f94c8d3a4f
INFO:  == LLM cache == saving: default:extract:e6ab3c4333bd423f00370235afb8ac0e
INFO: Chunk 3 of 112 extracted 15 Ent + 0 Rel chunk-986b7fc234e23ccfce54b963e5970244
INFO:  == LLM cache == saving: default:extract:831b4dd4253daa62bd0ad0ffbfeb8a97
INFO: Chunk 4 of 112 extracted 11 Ent + 0 Rel chunk-4782431d811de9324468ef1fed60cb77
INFO:  == LLM cache == saving: default:extract:4202cd1677d7b0d348e1aacba52a0d43
INFO: Chunk 5 of 112 extracted 8 Ent + 4 Rel chunk-e66c943b1316f01e4e911c380dc25fd0
INFO:  == LLM cache == saving: default:

  ✅ Indexed successfully

🎉 Indexing Complete! Processed 1/1 documents
💾 Knowledge graph saved to: /content/drive/MyDrive/LightRAG_Storage


In [11]:
# ============================================================
# Cell 5: Query Interface
# ============================================================

# @title 🔎 Research Query Interface
question = "What do you know about Ideal L2 writing self?" # @param {type:"string"}
search_mode = "local" # @param ["global", "local", "hybrid", "naive"]

# Search mode guide:
# - global: Best for themes, summaries, big picture
# - local: Best for specific facts, detailed info
# - hybrid: Combines both (recommended)
# - naive: Simple vector search

param = QueryParam(mode=search_mode)

print(f"🧠 Querying: '{question}'")
print(f"🔍 Mode: {search_mode}")
print("⏳ Thinking...\n")

result = await rag.aquery(question, param=param)

print("="*80)
print("📝 RESEARCH REPORT:")
print("="*80)
print(result)
print("="*80)


🧠 Querying: 'What do you know about Ideal L2 writing self?'
🔍 Mode: local
⏳ Thinking...



INFO:  == LLM cache == saving: local:keywords:57d699c230a993dd4ef0af2fc2e962be
INFO: Query nodes: Ideal L2, writing, self (top_k:40, cosine:0.2)
INFO: Local query: 40 entites, 4 relations
INFO: Raw search results: 40 entities, 4 relations, 0 vector chunks
INFO: After truncation: 40 entities, 4 relations
INFO: Selecting 42 from 42 entity-related chunks by vector similarity
INFO: Find no additional relations-related chunks from 4 relations
INFO: Round-robin merged chunks: 42 -> 42 (deduplicated 0)
INFO: Final context: 40 entities, 4 relations, 20 chunks
INFO: Final chunks S+F/O: E6/1 E3/2 E1/3 E1/4 E3/5 E2/6 E2/7 E2/8 E2/9 E2/10 E4/11 E2/12 E2/13 E3/14 E1/15 E2/16 E1/17 E1/18 E1/19 E2/20
ERROR: LLM func: Error in decorated function for task 139329075923264_9336.609689835: CUDA out of memory. Tried to allocate 1.75 GiB. GPU 0 has a total capacity of 14.74 GiB of which 856.12 MiB is free. Process 15755 has 13.90 GiB memory in use. Of the allocated memory 12.90 GiB is allocated by PyTorch, 

📝 RESEARCH REPORT:
None
