In [14]:
#!/usr/bin/env python3
"""
RAPTOR Retrieve - Works in both terminal and Jupyter
"""
import asyncio
from raptor import RetrievalAugmentation, RetrievalAugmentationConfig
import requests
from raptor import BaseEmbeddingModel

class VLLMQueryEmbedding(BaseEmbeddingModel):
    """VLLM embedding for queries with 'query:' prefix"""
    
    def __init__(self, base_url: str = "http://localhost:8008"):
        self.base_url = base_url
        
    def create_embedding(self, text: str):
        # Always use query prefix for retrieval
        payload = {
            "input": [f"query: {text}"],
            "model": "intfloat/multilingual-e5-large"
        }
        
        response = requests.post(f"{self.base_url}/v1/embeddings", json=payload, timeout=30)
        if response.status_code == 200:
            return response.json()['data'][0]['embedding']
        else:
            raise Exception(f"VLLM error: {response.status_code}")

# =============================================================================
# JUPYTER KULLANIMI: Aşağıdaki satırı kopyala/yapıştır
# await retrieve_test()
# =============================================================================

async def retrieve_test():
    """Main retrieval test - USE THIS IN JUPYTER"""
    
    tree_path = "raptor_tree.pkl"
    
    # Query embedding model (with query: prefix)
    query_embedding_model = VLLMQueryEmbedding()
    
    # Config for retrieval (must match tree's embedding key)
    config = RetrievalAugmentationConfig(
        tr_embedding_model=query_embedding_model,
        tr_context_embedding_model="VLLM",  # Key from tree
        tr_top_k=5,
        tr_selection_mode="top_k"
    )
    
    # Load RAPTOR with tree
    RA = RetrievalAugmentation(config=config, tree=tree_path)
    
    print(f"✅ Tree loaded: {len(RA.tree.all_nodes)} nodes, {RA.tree.num_layers} layers")
    
    # Test queries
    queries = [
        "Bu dokümanın ana konusu nedir?",
        "En önemli bilgiler nelerdir?",
        "What is the main topic?"
    ]
    
    print("\n" + "="*50)
    print("🔍 RAPTOR RETRIEVAL TEST")
    print("="*50)
    
    for query in queries:
        print(f"\n📝 Query: {query}")
        print("-" * 40)
        
        # Use async retrieve
        result = await RA.retrieve_async(
            query,              # Direct query (query: prefix automatic)
            top_k=3,           # Top 3 nodes
            max_tokens=300,    # Max response length
            collapse_tree=True # Search all layers
        )
        
        print(f"📄 Result ({len(result)} chars):")
        print(result[:250] + "..." if len(result) > 250 else result)
    
    print("\n✅ Retrieval test completed!")

def main():
    """Terminal version only"""
    print("🚀 Running RAPTOR retrieve test...")
    asyncio.run(retrieve_test())

# For direct use in Jupyter
async def jupyter_test():
    """Simple single query test"""
    tree_path = "raptor_tree.pkl" 
    query_embedding_model = VLLMQueryEmbedding()
    
    config = RetrievalAugmentationConfig(
        tr_embedding_model=query_embedding_model,
        tr_context_embedding_model="VLLM"
    )
    
    RA = RetrievalAugmentation(config=config, tree=tree_path)
    print(f"✅ Tree loaded: {len(RA.tree.all_nodes)} nodes")
    
    # Single test query
    query = "Bu dokümanın ana konusu nedir?"
    result = await RA.retrieve_async(query, top_k=3, max_tokens=300)
    
    print(f"\n📝 Query: {query}")
    print(f"📄 Result: {result[:200]}...")

if __name__ == "__main__":
    main()
else:
    print("🔍 Jupyter kullanımı:")
    print("   await retrieve_test()     # Full test")
    print("   await jupyter_test()      # Quick test")

2025-07-13 16:37:45,683 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'o200k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 100
            Summarization Model: <raptor.SummarizationModels.GPT41SummarizationModel object at 0x7c26a9f13eb0>
            Embedding Models: {'OpenAI': <raptor.EmbeddingModels.OpenAIEmbeddingModel object at 0x7c26a9f134f0>}
            Cluster Embedding Model: OpenAI
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-07-13 16:37:45,684 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'o200k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selec

🚀 Running RAPTOR retrieve test...
✅ Tree loaded: 65 nodes, 1 layers

🔍 RAPTOR RETRIEVAL TEST

📝 Query: Bu dokümanın ana konusu nedir?
----------------------------------------
📄 Result (1072 chars):
Bu metin, "ZULFICORE SYSTEM" adlı bir projenin tanıtımını ve yazarın bu konudaki düşüncelerini anlatıyor. Yazar, bu devrimi başlatmaya karar verdiğini ve hedefinin dünyada tek bir çocuğun bile aç kalmaması olduğunu ifade ediyor. 18 yaşında Atatürk’ü ...

📝 Query: En önemli bilgiler nelerdir?
----------------------------------------
📄 Result (884 chars):
Özet:  Bu metin, çeşitli peygamberlerin kıssalarını ve Zulficore yorumunu ele alarak, mucizelerin ve sessiz yankıların kaynağını ifade ediyor.   1. **Hz. İsa – Mucizelerin Kaynağı:** Maide Suresi 110. ayette, Allah, Hz. İsa'ya ve annesine verilen nim...

📝 Query: What is the main topic?
----------------------------------------
📄 Result (889 chars):
I write to you not merely as a technology entrepreneur, but with deep respect for someone who h