## 1. Setting Up this Notebook
We will start with a few imports.

In [1]:
# RAG Integration Discovery and Fix
import os
import json
import uuid
import time
from llama_stack_client import LlamaStackClient, RAGDocument

def discover_vector_db_providers(client):
    """Discover what vector DB providers are actually available."""
    print("üîç Discovering Vector DB Providers...")
    print("=" * 40)
    
    # Try to get existing vector databases to see what's configured
    try:
        response = client._client.get(f"{client._base_url}/v1/vector-dbs")
        if response.status_code == 200:
            data = response.json()
            print(f"üìã Vector DBs response: {json.dumps(data, indent=2)}")
            
            # Look for provider information
            if 'providers' in data:
                providers = data['providers']
                print(f"‚úÖ Available providers: {providers}")
                return providers
            elif 'data' in data:
                print(f"üìã Existing vector DBs: {data['data']}")
    except Exception as e:
        print(f"‚ùå Could not get vector DB info: {e}")
    
    # Try common providers
    common_providers = [
        "chroma", "qdrant", "weaviate", "pinecone", 
        "elasticsearch", "opensearch", "milvus", "pgvector"
    ]
    
    working_providers = []
    for provider in common_providers:
        try:
            print(f"üîÑ Testing provider: {provider}")
            test_db_id = f"test_{provider}_{int(time.time())}"
            
            # Try to create a test vector DB with this provider
            client.vector_dbs.register(
                vector_db_id=test_db_id,
                embedding_model="test-model",  # This will likely fail, but we want to see the error
                embedding_dimension=384,
                provider_id=provider
            )
            working_providers.append(provider)
            print(f"‚úÖ Provider {provider} works!")
        except Exception as e:
            error_msg = str(e)
            if "not found" in error_msg.lower() and "provider" in error_msg.lower():
                print(f"‚ùå Provider {provider} not available")
            elif "model" in error_msg.lower() and "not found" in error_msg.lower():
                print(f"‚úÖ Provider {provider} available (but embedding model not found)")
                working_providers.append(provider)
            else:
                print(f"‚ùå Provider {provider} error: {error_msg[:100]}")
    
    return working_providers


def discover_embedding_models(client):
    """Try to discover what embedding models are available."""
    print("\nüîç Discovering Embedding Models...")
    print("=" * 40)
    
    # Check if there's an embeddings endpoint
    endpoints_to_check = [
        "/v1/embeddings",
        "/v1/embedding-models", 
        "/v1/models",
        "/embeddings"
    ]
    
    for endpoint in endpoints_to_check:
        try:
            response = client._client.get(f"{client._base_url}{endpoint}")
            if response.status_code == 200:
                data = response.json()
                print(f"‚úÖ {endpoint} works:")
                
                # Look for embedding models
                if 'data' in data:
                    models = data['data']
                    embedding_models = [
                        model for model in models 
                        if 'embedding' in str(model).lower() or 
                        model.get('model_type') == 'embedding'
                    ]
                    if embedding_models:
                        print(f"üìã Embedding models found: {embedding_models}")
                        return embedding_models
                
                print(f"üìã Response: {json.dumps(data, indent=2)[:300]}...")
        except Exception as e:
            print(f"‚ùå {endpoint}: {str(e)[:50]}")
    
    # Try common embedding model names with the models we know exist
    known_models = ["llama32-3b"]
    print(f"\nüîÑ Testing if known models support embeddings...")
    
    for model in known_models:
        print(f"  Testing: {model}")
        # We'll test this when we try to create the vector DB
    
    return []


def test_vector_db_with_discovered_info(client, providers, embedding_models):
    """Test vector DB creation with discovered providers and models."""
    print(f"\nüß™ Testing Vector DB Creation...")
    print("=" * 40)
    
    vector_db_id = f"working_vector_db_{int(time.time())}"
    
    # If we found working providers and models, use them
    if providers and embedding_models:
        for provider in providers[:1]:  # Test first working provider
            for model in embedding_models[:1]:  # Test first embedding model
                try:
                    print(f"üîÑ Testing: provider={provider}, model={model}")
                    
                    model_name = model if isinstance(model, str) else model.get('identifier', str(model))
                    
                    client.vector_dbs.register(
                        vector_db_id=f"{vector_db_id}_{provider}_{model_name}",
                        embedding_model=os.getenv("VDB_EMBEDDING"),
                        embedding_dimension=384,  # Start with common dimension
                        provider_id=os.getenv("VDB_PROVIDER")
                    )
                    
                    final_db_id = f"{vector_db_id}_{provider}_{model_name}"
                    print(f"‚úÖ Vector DB created successfully: {final_db_id}")
                    return final_db_id
                    
                except Exception as e:
                    print(f"‚ùå Failed with {provider}/{model_name}: {e}")
    
    # If no embedding models found, try using the main LLM models
    if providers:
        known_models = ["llama32-3b"]
        for provider in providers:
            for model in known_models:
                try:
                    print(f"üîÑ Testing LLM as embedding: provider={provider}, model={model}")
                    
                    client.vector_dbs.register(
                        vector_db_id=f"{vector_db_id}_{provider}_{model}",
                        embedding_model=model,
                        embedding_dimension=4096,  # Larger dimension for LLM
                        provider_id=provider
                    )
                    
                    final_db_id = f"{vector_db_id}_{provider}_{model}"
                    print(f"‚úÖ Vector DB created with LLM: {final_db_id}")
                    return final_db_id
                    
                except Exception as e:
                    print(f"‚ùå LLM embedding failed {provider}/{model}: {e}")
    
    return None


def create_rag_agent_with_working_config(client, vector_db_id):
    """Create an agent with RAG using the working vector DB."""
    print(f"\nü§ñ Creating RAG Agent with Vector DB: {vector_db_id}")
    print("=" * 40)
    
    if not vector_db_id:
        print("‚ùå No working vector DB available")
        return None
    
    try:
        # First, try to add some documents to the vector DB
        print("üìÑ Adding documents to vector DB...")
        
        documents = [
            RAGDocument(
                document_id="openshift-guide",
                content="https://www.openshift.guide/openshift-guide-screen.pdf",
                mime_type="application/pdf",
                metadata={"source": "openshift-guide"},
            )
        ]
        
        # Try to insert documents
        # Note: We might need to adjust this based on the actual API
        try:
            if hasattr(client, 'tool_runtime') and hasattr(client.tool_runtime, 'rag_tool'):
                client.tool_runtime.rag_tool.insert(
                    documents=documents,
                    vector_db_id=vector_db_id,
                    chunk_size_in_tokens=512
                )
                print("‚úÖ Documents inserted via tool_runtime")
            else:
                # Try alternative approach
                print("‚ùå tool_runtime.rag_tool not available, trying direct insertion...")
                # We might need to implement this differently
        except Exception as e:
            print(f"‚ùå Document insertion failed: {e}")
            print("üîÑ Continuing with agent creation anyway...")
        
        # Create agent with RAG tool
        agent_config = {
            "model": "llama32-3b",
            "instructions": """You are a helpful assistant with access to OpenShift documentation through RAG.
            Use the RAG tool to search for relevant information when answering questions about OpenShift, Kubernetes, or container technologies.
            Always search the documentation first before providing answers.""",
            "tools": [
                {
                    "name": "builtin::rag",
                    "args": {
                        "vector_db_ids": [vector_db_id]
                    }
                }
            ],
            "sampling_params": {
                "strategy": {"type": "greedy"},
                "max_tokens": 512,
                "temperature": 0.1
            }
        }
        
        agent_response = client.agents.create(agent_config=agent_config)
        agent_id = agent_response.agent_id
        print(f"‚úÖ RAG Agent created successfully: {agent_id}")
        return agent_id
        
    except Exception as e:
        print(f"‚ùå RAG Agent creation failed: {e}")
        import traceback
        traceback.print_exc()
        return None


def extract_response_content(turn_response):
    """Extract content from the streaming response properly."""
    print("üîÑ Processing streaming response...")
    
    response_content = ""
    chunk_count = 0
    
    for chunk in turn_response:
        chunk_count += 1
        
        # Look for text in the event payload
        if hasattr(chunk, 'event') and chunk.event:
            event = chunk.event
            if hasattr(event, 'payload'):
                payload = event.payload
                
                # Check for text delta (this is where the actual content is)
                if hasattr(payload, 'delta') and hasattr(payload.delta, 'text'):
                    text = payload.delta.text
                    response_content += text
                    print(f"  üìù Added: '{text}'")
                
                # Check for complete step content
                elif hasattr(payload, 'step_details') and hasattr(payload.step_details, 'api_model_response'):
                    content = payload.step_details.api_model_response.content
                    if content and content not in response_content:
                        response_content = content  # Use the complete content
                        print(f"  üìã Complete content: '{content}'")
    
    print(f"‚úÖ Extracted {len(response_content)} characters from {chunk_count} chunks")
    return response_content


def test_rag_agent(client, agent_id):
    """Test the RAG agent with OpenShift questions."""
    print(f"\nüß™ Testing RAG Agent: {agent_id}")
    print("=" * 40)
    
    if not agent_id:
        print("‚ùå No RAG agent to test")
        return
    
    try:
        # Create session
        session_response = client.agents.session.create(
            agent_id=agent_id,
            session_name=f"rag-test-{int(time.time())}"
        )
        session_id = session_response.session_id
        print(f"‚úÖ Session created: {session_id}")
        
        # Test questions
        test_questions = [
            "What is OpenShift?",
            "How do I install OpenShift?",
            "What are the main differences between OpenShift and Kubernetes?"
        ]
        
        for i, question in enumerate(test_questions, 1):
            print(f"\nüìã Question {i}: {question}")
            
            turn_response = client.agents.turn.create(
                agent_id=agent_id,
                session_id=session_id,
                messages=[{"role": "user", "content": question}],
                stream=True
            )
            
            response_content = extract_response_content(turn_response)
            print(f"ü§ñ Response: {response_content}")
            
            if not response_content:
                print("‚ùå No response content received")
            else:
                print(f"‚úÖ Response received ({len(response_content)} chars)")
        
        return True
        
    except Exception as e:
        print(f"‚ùå RAG agent test failed: {e}")
        import traceback
        traceback.print_exc()
        return False


def comprehensive_rag_setup():
    """Complete RAG setup with discovery."""
    print("=" * 60)
    print("üîß COMPREHENSIVE RAG SETUP WITH DISCOVERY")
    print("=" * 60)
    
    # Create client
    client = LlamaStackClient(base_url=os.getenv("REMOTE_BASE_URL", "http://llamastack-server:8321"))
    
    # Step 1: Discover providers
    providers = discover_vector_db_providers(client)
    print(f"\n‚úÖ Working providers: {providers}")
    
    # Step 2: Discover embedding models
    embedding_models = discover_embedding_models(client)
    print(f"‚úÖ Embedding models: {embedding_models}")
    
    # Step 3: Create working vector DB
    vector_db_id = test_vector_db_with_discovered_info(client, providers, embedding_models)
    print(f"‚úÖ Vector DB: {vector_db_id}")
    
    # Step 4: Create RAG agent
    rag_agent_id = create_rag_agent_with_working_config(client, vector_db_id)
    print(f"‚úÖ RAG Agent: {rag_agent_id}")
    
    # Step 5: Test RAG agent
    if rag_agent_id:
        test_success = test_rag_agent(client, rag_agent_id)
        print(f"‚úÖ RAG Test: {'Passed' if test_success else 'Failed'}")
    
    return client, rag_agent_id, vector_db_id


# Run the comprehensive setup
if __name__ == "__main__":
    client, rag_agent_id, vector_db_id = comprehensive_rag_setup()

INFO:httpx:HTTP Request: GET http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


üîß COMPREHENSIVE RAG SETUP WITH DISCOVERY
üîç Discovering Vector DB Providers...
üìã Vector DBs response: {
  "data": [
    {
      "identifier": "test_vector_db_6a2ba4eb-cece-4e0b-ad1e-d0cb07bfc9f1",
      "provider_resource_id": "test_vector_db_6a2ba4eb-cece-4e0b-ad1e-d0cb07bfc9f1",
      "provider_id": "milvus",
      "type": "vector_db",
      "embedding_model": "all-MiniLM-L6-v2",
      "embedding_dimension": 384
    },
    {
      "identifier": "test_vector_db_e95d55b1-f8a4-4b0c-97ef-8b3dee20f796",
      "provider_resource_id": "test_vector_db_e95d55b1-f8a4-4b0c-97ef-8b3dee20f796",
      "provider_id": "milvus",
      "type": "vector_db",
      "embedding_model": "all-MiniLM-L6-v2",
      "embedding_dimension": 384
    },
    {
      "identifier": "test_vector_db_d5554528-59de-4b69-bb98-cd7b90efbaec",
      "provider_resource_id": "test_vector_db_d5554528-59de-4b69-bb98-cd7b90efbaec",
      "provider_id": "milvus",
      "type": "vector_db",
      "embedding_model": "all-Mini

INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚úÖ Provider qdrant available (but embedding model not found)
üîÑ Testing provider: weaviate
‚úÖ Provider weaviate available (but embedding model not found)
üîÑ Testing provider: pinecone


INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚úÖ Provider pinecone available (but embedding model not found)
üîÑ Testing provider: elasticsearch
‚úÖ Provider elasticsearch available (but embedding model not found)
üîÑ Testing provider: opensearch


INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚úÖ Provider opensearch available (but embedding model not found)
üîÑ Testing provider: milvus
‚úÖ Provider milvus available (but embedding model not found)
üîÑ Testing provider: pgvector


INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: GET http://llamastack-server:8321/v1/embeddings "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: GET http://llamastack-server:8321/v1/embedding-models "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: GET http://llamastack-server:8321/v1/models "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚úÖ Provider pgvector available (but embedding model not found)

‚úÖ Working providers: ['chroma', 'qdrant', 'weaviate', 'pinecone', 'elasticsearch', 'opensearch', 'milvus', 'pgvector']

üîç Discovering Embedding Models...
‚úÖ /v1/models works:
üìã Embedding models found: [{'identifier': 'all-MiniLM-L6-v2', 'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}]
‚úÖ Embedding models: [{'identifier': 'all-MiniLM-L6-v2', 'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}]

üß™ Testing Vector DB Creation...
üîÑ Testing: provider=chroma, model={'identifier': 'all-MiniLM-L6-v2', 'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
‚ùå Failed 

INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚ùå LLM embedding failed qdrant/llama32-3b: Error code: 400 - {'detail': 'Invalid value: Model llama32-3b is not an embedding model'}
üîÑ Testing LLM as embedding: provider=weaviate, model=llama32-3b
‚ùå LLM embedding failed weaviate/llama32-3b: Error code: 400 - {'detail': 'Invalid value: Model llama32-3b is not an embedding model'}
üîÑ Testing LLM as embedding: provider=pinecone, model=llama32-3b


INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚ùå LLM embedding failed pinecone/llama32-3b: Error code: 400 - {'detail': 'Invalid value: Model llama32-3b is not an embedding model'}
üîÑ Testing LLM as embedding: provider=elasticsearch, model=llama32-3b
‚ùå LLM embedding failed elasticsearch/llama32-3b: Error code: 400 - {'detail': 'Invalid value: Model llama32-3b is not an embedding model'}
üîÑ Testing LLM as embedding: provider=opensearch, model=llama32-3b


INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"
INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚ùå LLM embedding failed opensearch/llama32-3b: Error code: 400 - {'detail': 'Invalid value: Model llama32-3b is not an embedding model'}
üîÑ Testing LLM as embedding: provider=milvus, model=llama32-3b
‚ùå LLM embedding failed milvus/llama32-3b: Error code: 400 - {'detail': 'Invalid value: Model llama32-3b is not an embedding model'}
üîÑ Testing LLM as embedding: provider=pgvector, model=llama32-3b


INFO:httpx:HTTP Request: POST http://llamastack-server:8321/v1/vector-dbs "HTTP/1.1 400 Bad Request"


‚ùå LLM embedding failed pgvector/llama32-3b: Error code: 400 - {'detail': 'Invalid value: Model llama32-3b is not an embedding model'}
‚úÖ Vector DB: None

ü§ñ Creating RAG Agent with Vector DB: None
‚ùå No working vector DB available
‚úÖ RAG Agent: None
