# Philatelic Gradio App with Weaviate
Interactive web interface for CR Philately

## 1. Setup & Imports
Load all the modules and functions

In [None]:
import os
import json
import glob
import time
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime
import weaviate
import gradio as gr

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Third-party imports
import pandas as pd


from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_weaviate import WeaviateVectorStore
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage
from langchain.retrievers import MultiQueryRetriever, EnsembleRetriever, ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


from philatelic_weaviate import *

from philatelic_chunk_schema import *

print("✅ Basic imports completed")

In [None]:
# Verify environment variables
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
WEAVIATE_URL = os.getenv('WEAVIATE_URL', 'http://localhost:8083')
PHILATELIC_JSONS_DIR = os.getenv('PHILATELIC_JSONS_DIR', './results/final_jsons')
COLLECTION_NAME = os.getenv('WEAVIATE_COLLECTION_NAME', 'Oxcart')

print(f"🔧 Configuration:")
print(f"   • Weaviate URL: {WEAVIATE_URL}")
print(f"   • JSONs Directory: {PHILATELIC_JSONS_DIR}")
print(f"   • Collection Name: {COLLECTION_NAME}")
print(f"   • OpenAI API Key: {'✅ Configured' if OPENAI_API_KEY else '❌ Missing configuration'}")

if not OPENAI_API_KEY:
    print("\\n⚠️  IMPORTANT: Configure your OPENAI_API_KEY in the .env file")
    print("   Copy .env.example to .env and add your API key")

# Verify that the JSONs directory exists
if not os.path.exists(PHILATELIC_JSONS_DIR):
    print(f"\\n⚠️  Directory {PHILATELIC_JSONS_DIR} not found")
    print("   Make sure you have processed documents with the Dolphin parser")
else:
    json_files = glob.glob(os.path.join(PHILATELIC_JSONS_DIR, '*_final.json'))
    print(f"\\n📁 Found {len(json_files)} philatelic JSON files")
    if json_files:
        print("   Examples:")
        for file in json_files[:3]:
            print(f"   • {os.path.basename(file)}")
        if len(json_files) > 3:
            print(f"   • ... and {len(json_files) - 3} more")

In [None]:
llm = ChatOpenAI(
            model="gpt-5-nano", 
            api_key=OPENAI_API_KEY, 
            temperature=1,  # obligatorio para gpt-5-nano
            timeout=120.0,
            model_kwargs={
                "verbosity": "medium",
                "reasoning_effort" : "low"
            })
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=OPENAI_API_KEY)

In [None]:
# ========================================================================================
# 📝 RAG PROMPT TEMPLATE - Professional Philatelic Consultation
# ========================================================================================

philatelic_rag_template = """You are a senior philatelic researcher and catalog specialist with expertise in Costa Rican stamps and postal history. Provide comprehensive, well-structured analysis based strictly on the source materials provided.

SOURCE MATERIALS:
{context}

RESEARCH QUERY: {question}

RESPONSE REQUIREMENTS:

FORMATTING & STRUCTURE:
• Use clear hierarchical organization with descriptive headers using markdown
• Group related information under logical categories using ## and **bold subheadings**
• Use bullet points (•) for individual facts and varieties
• Include relevant emojis for major sections (🔍 📮 📚 🎯) to enhance readability
• Bold key terms, catalog numbers, and important details
• KEEP SECTIONS CONCISE - avoid excessive repetition or overly detailed explanations
• Your output is in markdown format

CITATION FORMAT:
• Every factual statement must include the name of the document (doc_id) and its page numeber like this example: (CRF 100, page 15)
• Multiple sources: (doc_id, page #; doc_id, page #) Example: (OXCART 123, page 24 ; OXCART 25, page 15)
• Always cite catalog numbers (scott, yvert, michell, etc), varieties, dates, quantities, and technical specifications
• When quoting directly, use quotation marks around quoted text

CONTENT ORGANIZATION:
• Lead with the most direct answer to the query
• Organize by catalog numbers, chronological order, or logical categories as appropriate
• Include technical specifications: dates, quantities, colors, perforations, varieties
• Provide brief historical context and collecting significance
• Note relationships between issues, varieties, or catalog entries
• Address valuation or rarity when relevant to the query

RESPONSE LENGTH:
• Aim for clear, informative responses that are thorough but not excessive
• Eliminate redundant information and repetitive explanations
• Focus on the most relevant information that directly answers the query
• If information is extensive, prioritize the most important catalog entries and varieties

TECHNICAL STANDARDS:
• Use precise philatelic terminology (definitive, commemorative, variety, error, overprint, etc.)
• Specify exact catalog numbers with proper formatting (Scott C216, not just C216)
• Include denomination and color details when available
• Note printing quantities, dates, and technical varieties
• Distinguish between verified catalog facts and expert opinions
• Flag incomplete or uncertain information clearly

RESEARCH COMPLETENESS:
• If source materials are insufficient, state: "The provided documents do not contain sufficient information about..."
• Suggest what additional sources or information would be needed
• Note any gaps in catalog coverage or missing details

PROFESSIONAL TONE:
• Maintain authoritative but accessible language
• Present information objectively without unnecessary qualifiers
• Use active voice and clear, direct statements
• Avoid speculation beyond what sources support

RESPONSE:"""

# Create the prompt template
rag_prompt = PromptTemplate(
    template=philatelic_rag_template,
    input_variables=["context", "question"]
)

In [None]:
# ========================================================================================
# 📄 OPTIMIZED DOCUMENT FORMATTING - For Academic Citation Style
# ========================================================================================

def format_docs_for_rag(docs_results: List[Dict]) -> str:
    """Efficient document formatting optimized for academic citation style (Document Name, p. Page)"""
    
    if not docs_results:
        return "\nNo source documents available."
    
    # Group and sort documents by authority
    #doc_groups = {'catalog': [], 'literature': [], 'collection': [], 'reference': []}
    docs = []
    
    for i, doc in enumerate(docs_results, 1):
        #category, reliability = classify_document_authority(doc.metadata.get('doc_id', 'Unknown'))
        
        doc_info = {
            'doc_num': i,
            'doc_id': doc.metadata.get('doc_id', 'Unknown'),
            'page': doc.metadata.get('page_number', 'N/A'),
            'content': doc.page_content,
        }
        #doc_groups[category].append(doc_info)
        docs.append(doc_info)
    return docs

def create_rag_response(retriever_results: List[Dict], query: str) -> Dict:
    """Streamlined RAG chain execution with academic citation style"""
    
    if not retriever_results:
        return {"response": "No documents found for this query.", "generation_time": 0}
    
    # Format context efficiently for academic citations
    context = format_docs_for_rag(retriever_results)
    
    #limited_llm = llm.bind(max_tokens=4000)  # Limita la respuesta a 4000 tokens

    
    # Execute RAG chain
    rag_chain = (
        {"context": lambda x: context, "question": RunnablePassthrough()}
        | rag_prompt | llm | StrOutputParser()
    )
    
    start_time = time.time()
    response = rag_chain.invoke(query)
    generation_time = round(time.time() - start_time, 2)
    
    return {
        "response": response,
        "generation_time": generation_time,
        "context_docs_count": len(retriever_results),
        "context_length": len(context),
       # "max_tokens": 4000  # Agregar para tracking
    }

## 2. Weaviate Client

In [None]:
# Connect to Weaviate
print("🔌 Connecting to Weaviate...")

try:
    client = create_weaviate_client(WEAVIATE_URL, OPENAI_API_KEY)
    print("✅ Connection successful")
    
    # Verify that Weaviate is working
    meta = client.get_meta()
    print(f"📊 Weaviate version: {meta.get('version', 'unknown')}")
    
    # Verify if collection exists
    try:
        collections = client.collections.list_all()
        collection_names = [col.name for col in collections]
        
        if COLLECTION_NAME in collection_names:
            collection = client.collections.get(COLLECTION_NAME)
            total_objects = collection.aggregate.over_all(total_count=True).total_count
            print(f"📊 Collection '{COLLECTION_NAME}' exists with {total_objects} documents")
        else:
            print(f"📝 Collection '{COLLECTION_NAME}' does not exist (will be created during indexing)")
    except Exception as e:
        print(f"⚠️ Could not verify collections: {e}")
        
except Exception as e:
    print(f"❌ Error connecting to Weaviate: {e}")
    print("💡 Make sure Weaviate is running:")
    print("   docker-compose up -d")
    client = None

## 3. Weaviate Search Tests

Test the function search_chunks_semantic


In [None]:
results = search_chunks_semantic(
                client, 
                "Costa Rica 1907 2 colones stamp with original gum. Scott 68 issue of 1907", 
                "Oxcart", 
                limit=50,
                filters=[],
                mode = "hybrid",
                alpha= 0.35
                
            )
            
print(f"   📊 Resultados: {len(results)}")

for j, result in enumerate(results, 1):
    print(f"\n      🏷️ #{j} (Score: {result['score']:.3f})")
    print(f"         📄 Documento: {result['doc_id']}")
    print(f"         📋 Tipo: {result['chunk_type']}")
    print(f"         📄 Página: {result['page_number']}")
    
    # Mostrar metadatos relevantes
    if result.get('catalog_systems'):
        print(f"         📖 Catálogos: {result['catalog_systems']}")
    if result.get('scott_numbers'):
        print(f"         🔢 Scott: {result['scott_numbers']}")
    if result.get('years'):
        print(f"         📅 Años: {result['years']}")
    if result.get('colors'):
        print(f"         🎨 Colores: {result['colors']}")
    if result.get('variety_classes'):
        print(f"         🔀 Variedades: {result['variety_classes']}")
    
    # Texto truncado
    text = result.get('text', '')
    # if len(text) > 200:
    #     text = text[:200] + "..."
    print(f"         📝 Texto: {text}")
    print("**********************************************************************************************************")

## 4. Advance Retriever Implementation

In [None]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables.config import RunnableConfig

def compress_documents_simple(documents: List[Document], query: str, llm) -> List[Document]:
    """
    Simple document compression using LangChain's native batch processing.
    Each document is processed individually with the same prompt.
    """
    if not documents:
        return []
    
    # Simple compression prompt for individual documents
    compress_prompt_template = """You are a philatelic expert. Extract and compress ONLY the information relevant to this query from the document below.

QUERY: {query}

DOCUMENT:
{document}

INSTRUCTIONS:
- Extract only information directly relevant to the query
- Preserve exact catalog numbers, dates, denominations, and technical specifications
- Keep Scott numbers, Michel numbers, and other catalog references intact
- Maintain variety descriptions and error information
- Remove irrelevant content but preserve context for understanding

If the document contains no relevant information, respond with exactly: NO_RELEVANT_CONTENT

COMPRESSED CONTENT:"""

    # Create individual prompts for each document
    prompts = []
    for doc in documents:
        prompt_text = compress_prompt_template.format(
            query=query, 
            document=doc.page_content
        )
        prompts.append([("user", prompt_text)])
    
    # Use LangChain's native batch processing with concurrency control
    config = RunnableConfig(max_concurrency=10)  # Process 10 documents concurrently
    
    try:
        responses = llm.batch(prompts, config=config)
        
        # Filter and create compressed documents
        compressed_docs = []
        for i, response in enumerate(responses):
            content = response.content.strip() if hasattr(response, 'content') else str(response).strip()
            
            # Only include documents that have relevant content
            if content and content != "NO_RELEVANT_CONTENT":
                compressed_doc = Document(
                    page_content=content,
                    metadata=documents[i].metadata
                )
                compressed_docs.append(compressed_doc)
        
        return compressed_docs
        
    except Exception as e:
        print(f"Error during batch compression: {e}")
        # Fallback: return original documents
        return documents

def search_stamps_with_compression(query, client, embeddings, llm, limit=100, 
                                 alpha=0.30, diversity_lambda=0.75):
    """
    Optimized philatelic search with simple batch document compression using LangChain's native batch processing.
    
    Args:
        query (str): The stamp query
        client: Weaviate client
        embeddings: Embedding model
        llm: Language model
        limit (int): Maximum documents to retrieve
        alpha (float): Hybrid search factor (0.30 = 30% vector, 70% keywords)
        diversity_lambda (float): MMR diversity factor (0.75 = good diversity)
    
    Returns:
        list: Compressed and optimized documents for philatelic queries
    """  
    
    # Create vector store
    vector_store = WeaviateVectorStore(
        client=client,
        index_name=COLLECTION_NAME,
        text_key="text",
        embedding=embeddings
    )
    
    # Try to create hybrid retriever
    hybrid_kwargs = {"k": limit // 2}
    if alpha is not None:
        hybrid_kwargs["alpha"] = alpha
    
    # 1. Precision hybrid retriever (captures exact numbers + context)
    precision_retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs=hybrid_kwargs
    )
    
    # 2. Diversity MMR retriever (avoids duplicate stamps)
    diversity_retriever = vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={"k": limit // 2, "lambda_mult": diversity_lambda}
    )
    
    # 3. Ensemble with dual strategy
    base_retriever = EnsembleRetriever(
        retrievers=[precision_retriever, diversity_retriever],
        weights=[0.7, 0.3]  # 70% precision + 30% diversity
    )
    
    # Specialized prompt for philatelic multi-query generation
    query_prompt = PromptTemplate(
        input_variables=["question"],
        template="""You are a specialized philatelic researcher expert in stamp catalogues and varieties.
Generate 3 strategically different versions of the question to capture comprehensive stamp information:

ORIGINAL: {question}

Create variations that target:
1. CATALOG PRECISION: Focus on exact catalog numbers, dates, and technical specifications
2. CONTEXTUAL SEARCH: Include related series, printings, varieties, and historical context  
3. TERMINOLOGY ALTERNATIVES: Use alternative philatelic terms and synonyms

Consider these philatelic elements:
- Catalog systems: Scott, Michel, Yvert, SG, local catalogs
- Technical terms: definitive/commemorative, variety/error, overprint/surcharge
- Time references: issue dates, printing dates, first day covers
- Denominations: face values, colors, perforations

Alternative searches:
1.
2. 
3."""
    )
    
    # MultiQueryRetriever with specialized prompt
    multi_retriever = MultiQueryRetriever.from_llm(
        retriever=base_retriever,
        llm=llm,
        prompt=query_prompt,
        parser_key="lines"
    )
    
    # Execute initial retrieval
    initial_results = multi_retriever.invoke(query)
       
    compression_llm = ChatOpenAI(
            model="gpt-5-nano", 
            api_key=OPENAI_API_KEY, 
            temperature=1,  # obligatorio para gpt-5-nano
            timeout=120.0,
            model_kwargs={
                "verbosity": "medium",
                "reasoning_effort" : "low"
            })
    
    # Simple batch compression using LangChain's native batch processing
    compressed_results = compress_documents_simple(initial_results, query, compression_llm)
    
    # Reorder by quality_score if it exists
    def get_quality_score(doc):
        return getattr(doc, 'metadata', {}).get('quality_score', 0.0)
    
    sorted_results = sorted(compressed_results, key=get_quality_score, reverse=True)
    return sorted_results

### Test the method search_stamps_with_compression

In [None]:
# # Test the optimized search_stamps_with_compression with batch processing
# print("🧪 Testing optimized batch compression...")

# # Test query focused on specific stamps
# test_query = "Costa Rica 1907 2 colones stamp with original gum Scott 68"

# print(f"🔍 Query: {test_query}")
# print("⏱️ Starting optimized search with batch compression...")

# import time
# start_time = time.time()

# try:
#     compressed_docs = search_stamps_with_compression(
#         query=test_query,
#         client=client, 
#         embeddings=embeddings, 
#         limit=30,
#         llm=llm,
#         alpha=0.30,  # 30% vectorial, 70% keywords for exact numbers
#         diversity_lambda=0.75  # 75% relevance, 25% diversity
#     )
    
#     end_time = time.time()
#     execution_time = end_time - start_time
    
#     print(f"✅ Batch compression completed in {execution_time:.2f} seconds")
#     print(f"📊 Retrieved and compressed {len(compressed_docs)} documents")
    
#     # Show sample results
#     for i, doc in enumerate(compressed_docs[:3], 1):
#         print(f"\\n📄 Document {i}:")
#         print(f"   Metadata: {getattr(doc, 'metadata', {})}")
#         content = getattr(doc, 'page_content', str(doc))
#         preview = content[:200] + "..." if len(content) > 200 else content
#         print(f"   Content: {preview}")
        
# except Exception as e:
#     print(f"❌ Error during batch compression test: {e}")
#     import traceback
#     traceback.print_exc()

## 5. Gradio Interface

In [None]:
def get_collection_info() -> str:
    """
    Get collection information to display in the interface.
    """
    if not client:
        return "❌ No Weaviate connection"
    
    try:
        stats = get_collection_stats(client, "Oxcart")
        if stats:
            info = f"📊 **Oxcart Collection Statistics:**\\n\\n"
            info += f"📦 **Total chunks:** {stats['total_chunks']:,}\\n"
            info += f"📄 **Documents:** {stats['total_documents']}\\n\\n"
            
            if stats.get('documents'):
                info += "**Indexed documents:**\\n"
                for doc_id, count in stats['documents'].items():
                    info += f"• {doc_id}: {count:,} chunks\\n"
            
            return info
        else:
            return "❌ Could not retrieve statistics"
    except Exception as e:
        return f"❌ Error: {e}"

print("✅ RAG functions defined")

In [None]:
stats = get_collection_stats(client, "Oxcart")
stats['total_documents']
stats['total_chunks']

In [None]:
# Estructura que usan tus funciones de búsqueda/respuesta
rag_system = {
    "success": True,
    "client": client,                    # para que search_and_answer pueda consultar
    "collection_name": COLLECTION_NAME,  # nombre de la colección
    "weaviate_url": WEAVIATE_URL,        # info para la UI
    "total_documents": stats['total_documents'],       # para mostrar estado
    "total_chunks": stats['total_chunks'],        # opcional en la UI
    "embeddings":embeddings,
    "llm":llm,
    # puedes añadir más campos que tu search_and_answer necesite
}

In [None]:
import time
from typing import Dict, Any, List

def search_and_answer_basic(
    query: str,
    rag_system: Dict[str, Any],
    use_filters: bool = False,
    catalog_system: str = "",
    chunk_type: str = "",
    has_varieties: bool = False,
    max_results: int = 10,
) -> Dict[str, Any]:
    """
    Basic hybrid search approach - ejecuta solo la búsqueda básica.
    """
    # Validation
    if not rag_system or not rag_system.get("client"):
        return {
            "answer": "❌ Error: No Weaviate connection",
            "results": [],
            "metadata": {"error": "No Weaviate connection"}
        }

    client_wv = rag_system["client"]
    collection_name = rag_system.get("collection_name", "Oxcart")
    
    # Build filters
    filters = None
    if use_filters:
        filters = {}
        if catalog_system:
            filters["catalog_system"] = catalog_system
        if chunk_type:
            filters["chunk_type"] = chunk_type
        if has_varieties:
            filters["has_varieties"] = True

    try:
        start_time = time.time()
        
        # Basic semantic search
        results = search_chunks_semantic(
            client=client_wv,
            query=query,
            collection_name=collection_name,
            limit=int(max_results),
            filters=filters,
            mode="hybrid",
            alpha=0.35
        )
        
        # Convert to LangChain document format for RAG
        docs_for_rag = []
        for r in results:
            doc = type('Document', (), {
                'page_content': r.get('text', ''),
                'metadata': {
                    'doc_id': r.get('doc_id', 'N/A'),
                    'page_number': r.get('page_number', 'N/A'),
                    'chunk_type': r.get('chunk_type', 'N/A'),
                    'score': r.get('score', 0.0)
                }
            })()
            docs_for_rag.append(doc)
        
        # Generate RAG response using LangChain
        rag_response = create_rag_response(docs_for_rag, query)
        
        execution_time = time.time() - start_time
        
        metadata = {
            "approach": "Basic Hybrid Search",
            "query": query,
            "total_results": len(results),
            "max_results": int(max_results),
            "filters_used": filters or {},
            "generation_time": execution_time,
            "context_docs_count": rag_response["context_docs_count"]
        }
        
        return {
            "answer": rag_response["response"],
            "results": results,
            "metadata": metadata
        }
        
    except Exception as e:
        return {
            "answer": f"❌ Basic search error: {e}",
            "results": [],
            "metadata": {"error": str(e), "generation_time": 0}
        }


def search_and_answer_advanced(
    query: str,
    rag_system: Dict[str, Any],
    use_filters: bool = False,
    catalog_system: str = "",
    chunk_type: str = "",
    has_varieties: bool = False,
    max_results: int = 10,
) -> Dict[str, Any]:
    """
    Advanced compression search approach - ejecuta solo la búsqueda avanzada.
    """
    # Validation
    if not rag_system or not rag_system.get("client"):
        return {
            "answer": "❌ Error: No Weaviate connection",
            "results": [],
            "metadata": {"error": "No Weaviate connection"}
        }

    client_wv = rag_system["client"]
    embeddings = rag_system.get("embeddings")  # Asegúrate de que esto esté en rag_system
    llm = rag_system.get("llm")  # Asegúrate de que esto esté en rag_system
    
    # Build filters
    filters = None
    if use_filters:
        filters = {}
        if catalog_system:
            filters["catalog_system"] = catalog_system
        if chunk_type:
            filters["chunk_type"] = chunk_type
        if has_varieties:
            filters["has_varieties"] = True

    try:
        start_time = time.time()
        
        # Advanced search with compression
        compressed_docs = search_stamps_with_compression(
            query=query,
            client=client_wv,
            embeddings=embeddings,
            llm=llm,
            limit=50,
            alpha=0.30,
            diversity_lambda=0.75
        )
        
        # Generate RAG response using LangChain
        rag_response = create_rag_response(compressed_docs, query)
        
        # Convert compressed docs to results format for display
        results = []
        for i, doc in enumerate(compressed_docs):
            result = {
                'doc_id': doc.metadata.get('doc_id', 'N/A'),
                'page_number': doc.metadata.get('page_number', 'N/A'),
                'chunk_type': doc.metadata.get('chunk_type', 'N/A'),
                'text': doc.page_content,
                'score': doc.metadata.get('quality_score', 0.0),
                'catalog_systems': doc.metadata.get('catalog_systems', []),
                'scott_numbers': doc.metadata.get('scott_numbers', []),
                'years': doc.metadata.get('years', []),
                'colors': doc.metadata.get('colors', []),
                'variety_classes': doc.metadata.get('variety_classes', [])
            }
            results.append(result)
        
        execution_time = time.time() - start_time
        
        metadata = {
            "approach": "Advanced Compression Search",
            "query": query,
            "total_results": len(results),
            "compressed_docs": len(compressed_docs),
            "filters_used": filters or {},
            "generation_time": execution_time,
            "context_docs_count": rag_response["context_docs_count"]
        }
        
        return {
            "answer": rag_response["response"],
            "results": results,
            "metadata": metadata
        }
        
    except Exception as e:
        return {
            "answer": f"❌ Advanced search error: {e}",
            "results": [],
            "metadata": {"error": str(e), "generation_time": 0}
        }



In [None]:

# def create_gradio_interface(rag_system: Dict[str, Any]) -> gr.Blocks:
#     """
#     Creates the Gradio interface for dual RAG queries with sequential execution.
#     Basic approach runs first and displays results, then Advanced approach runs.
#     """

#     def gradio_sequential_search(query, use_filters, catalog_system, chunk_type, has_varieties, max_results):
#         """
#         Sequential search: executes Basic first, yields results, then executes Advanced.
#         Uses yield to provide progressive updates to the UI.
#         """
#         if not rag_system:
#             error_msg = "❌ RAG system not configured"
#             yield error_msg, "", "", "⏳ Waiting...", "", "", "No timing data available"
#             return
            
#         if not query or not query.strip():
#             error_msg = "❌ Please enter a query"
#             yield error_msg, "", "", "⏳ Waiting...", "", "", "No timing data available"
#             return

#         try:
#             start_total_time = time.time()
            
#             # ============= STEP 1: Execute Basic Search =============
#             yield "🔄 Running Basic Hybrid Search...", "", "", "⏳ Waiting for Basic search to complete...", "", "", "⏱️ Basic search in progress..."
            
#             # Call basic search function
#             basic_results_data = search_and_answer_basic(
#                 query=query,
#                 rag_system=rag_system,
#                 use_filters=use_filters,
#                 catalog_system=catalog_system,
#                 chunk_type=chunk_type,
#                 has_varieties=has_varieties,
#                 max_results=int(max_results),
#             )
            
#             # Format Basic Results
#             basic_answer = basic_results_data["answer"]
#             basic_results = basic_results_data["results"]
#             basic_metadata = basic_results_data["metadata"]
#             basic_execution_time = basic_metadata.get("generation_time", 0)
            
#             basic_search_output = format_search_results(basic_results, "Basic Hybrid Search")
#             basic_metadata_output = format_metadata(basic_metadata, basic_execution_time)
            
#             # Yield Basic results with Advanced pending
#             timing_partial = f"""**⏱️ EXECUTION TIMING (Partial)**

# 🔍 **Basic Hybrid Search** ✅
# • Processing Time: {basic_execution_time:.2f} seconds
# • Documents Found: {len(basic_results)}
# • Status: COMPLETED

# 🚀 **Advanced Compression Search** ⏳
# • Status: STARTING...

# 📊 **Progress**
# • Basic search completed successfully
# • Advanced search initiating..."""
            
#             yield (
#                 basic_answer,
#                 basic_search_output,
#                 basic_metadata_output,
#                 "🔄 Starting Advanced Compression Search...",
#                 "",
#                 "",
#                 timing_partial
#             )
            
#             # ============= STEP 2: Execute Advanced Search =============
#             advanced_results_data = search_and_answer_advanced(
#                 query=query,
#                 rag_system=rag_system,
#                 use_filters=use_filters,
#                 catalog_system=catalog_system,
#                 chunk_type=chunk_type,
#                 has_varieties=has_varieties,
#                 max_results=int(max_results),
#             )
            
#             # Format Advanced Results
#             advanced_answer = advanced_results_data["answer"]
#             advanced_results = advanced_results_data["results"]
#             advanced_metadata = advanced_results_data["metadata"]
#             advanced_execution_time = advanced_metadata.get("generation_time", 0)
            
#             advanced_search_output = format_search_results(advanced_results, "Advanced Compression Search")
#             advanced_metadata_output = format_metadata(advanced_metadata, advanced_execution_time)
            
#             # Calculate total execution time
#             total_execution_time = time.time() - start_total_time
            
#             # Final timing information
#             timing_final = format_timing_display(
#                 basic_execution_time,
#                 advanced_execution_time,
#                 total_execution_time,
#                 len(basic_results),
#                 len(advanced_results)
#             )
            
#             # Yield final complete results
#             yield (
#                 basic_answer,
#                 basic_search_output,
#                 basic_metadata_output,
#                 advanced_answer,
#                 advanced_search_output,
#                 advanced_metadata_output,
#                 timing_final
#             )
            
#         except Exception as e:
#             error_msg = f"❌ Error during search: {str(e)}"
#             yield error_msg, "", "", error_msg, "", "", f"❌ Error occurred - no timing data"

#     def format_search_results(results, approach_name):
#         """Format search results for display"""
#         if not results:
#             return f"No results found with {approach_name}"

#         lines = []
#         lines.append(f"**{approach_name} Results ({len(results)} documents found)**\n")
        
#         for i, r in enumerate(results):
#             doc_id = r.get("doc_id", "N/A")
#             chunk_type_val = r.get("chunk_type", "N/A")
#             page_number = r.get("page_number", "N/A")
#             score = r.get("score", 0.0)
#             catalogs = r.get("catalog_systems", [])
#             scotts = r.get("scott_numbers", [])
#             years = r.get("years", [])

#             # Preview text
#             text = r.get("text", "")
#             preview = (text[:300] + "...") if len(text) > 300 else text

#             block = []
#             block.append(f"**Result {i+1}** (Score: {score:.3f})")
#             block.append(f"• Document: {doc_id}")
#             block.append(f"• Type: {chunk_type_val} | Page: {page_number}")
#             if catalogs:
#                 block.append(f"• Catalogs: {', '.join(catalogs)}")
#             if scotts:
#                 block.append(f"• Scott Numbers: {', '.join(scotts)}")
#             if years:
#                 block.append(f"• Years: {', '.join(str(y) for y in years)}")
#             block.append(f"• Preview: {preview}")
#             block.append("-" * 60)
#             lines.append("\n".join(block))
        
#         return "\n".join(lines)

#     def format_metadata(metadata, execution_time):
#         """Format metadata for display"""
#         if not metadata:
#             return "No metadata available"
        
#         output_lines = [
#             "**Search Metadata:**",
#             f"• Approach: {metadata.get('approach', 'Unknown')}",
#             f"• Query: {metadata.get('query', 'N/A')}",
#             f"• Results found: {metadata.get('total_results', 0)}",
#             f"• Generation time: {metadata.get('generation_time', 'N/A')} seconds",
#             f"• Context documents: {metadata.get('context_docs_count', 'N/A')}",
#             f"• Total execution time: {execution_time} seconds",
#         ]
        
#         if metadata.get('filters_used'):
#             output_lines.append(f"• Filters used: {metadata['filters_used']}")
        
#         if metadata.get('compressed_docs'):
#             output_lines.append(f"• Compressed documents: {metadata['compressed_docs']}")
            
#         if metadata.get('error'):
#             output_lines.append(f"• Error: {metadata['error']}")
        
#         return "\n".join(output_lines)

#     def format_timing_display(basic_time, advanced_time, total_time, basic_results, advanced_results):
#         """Format timing information for prominent display"""
#         try:
#             basic_time = float(basic_time) if basic_time else 0
#             advanced_time = float(advanced_time) if advanced_time else 0
#             total_time = float(total_time) if total_time else 0
            
#             # Determine which approach was faster
#             if basic_time > 0 and advanced_time > 0:
#                 if basic_time < advanced_time:
#                     faster = f"🏆 Basic search was {advanced_time/basic_time:.1f}x faster"
#                 elif advanced_time < basic_time:
#                     faster = f"🏆 Advanced search was {basic_time/advanced_time:.1f}x faster"
#                 else:
#                     faster = "⚡ Both approaches took similar time"
#             else:
#                 faster = "⏱️ Timing comparison not available"
            
#             # Calculate speeds safely
#             basic_speed = f"{basic_results/basic_time:.1f}" if basic_time > 0 else "N/A"
#             advanced_speed = f"{advanced_results/advanced_time:.1f}" if advanced_time > 0 else "N/A"
            
#             timing_display = f"""**⏱️ EXECUTION TIMING COMPARISON**

# 🔍 **Basic Hybrid Search**
# • Processing Time: {basic_time:.2f} seconds
# • Documents Found: {basic_results}
# • Speed: {basic_speed} docs/sec

# 🚀 **Advanced Compression Search**  
# • Processing Time: {advanced_time:.2f} seconds
# • Documents Found: {advanced_results}
# • Speed: {advanced_speed} docs/sec

# 📊 **Overall Performance**
# • Total Execution: {total_time:.2f} seconds
# • Sequential Execution (Basic → Advanced)
# • {faster}

# 💡 **Performance Notes:**
# • Basic search completed first for quick results
# • Advanced search provides enhanced quality
# • Sequential execution allows progressive viewing"""

#             return timing_display
            
#         except Exception as e:
#             return f"❌ Error formatting timing data: {e}"

#     # Set example query functions
#     def set_example_1():
#         return "What Costa Rica stamps from 1907 have Scott catalog number 68?"
    
#     def set_example_2():
#         return "Show me Costa Rica overprinted stamps with varieties or errors"
    
#     def set_example_3():
#         return "Costa Rica airmail stamps from the 1930s with catalog values"
    
#     def set_example_4():
#         return "What Costa Rica definitive stamps feature the coat of arms?"
    
#     def set_example_5():
#         return "Costa Rica stamps with perforation errors or printing varieties"
    
#     def set_example_6():
#         return "Show me Costa Rica commemorative stamps issued between 1940-1950"

#     # System information
#     collection_name = rag_system.get("collection_name", "Oxcart")
#     total_docs = rag_system.get("total_documents", 0)
#     total_chunks = rag_system.get("total_chunks", 0)
#     weaviate_url = rag_system.get("weaviate_url") or os.getenv("WEAVIATE_URL", "http://localhost:8080")

#     # --- UI with default theme ---
#     with gr.Blocks(title="OXCART RAG - Costa Rica Philatelic System") as interface:
#         gr.Markdown(
#             "# 🇨🇷 OXCART RAG - Costa Rica Philatelic System\n\n"
#             "Advanced search for Costa Rican stamps and postal history with sequential dual AI approaches."
#         )

#         with gr.Row():
#             with gr.Column(scale=3):
#                 # Main input
#                 query_input = gr.Textbox(
#                     label="Your Costa Rica philatelic query",
#                     placeholder="e.g., What Costa Rica stamps from 1907 have Scott number 68?",
#                     lines=2,
#                 )

#                 # Search button
#                 search_btn = gr.Button("🔍 Search with Both Approaches (Sequential)", variant="primary")

#                 # Example queries with individual functions
#                 gr.Markdown("**Example Queries:**")
                
#                 with gr.Row():
#                     btn1 = gr.Button("📮 1907 Scott 68 stamps", variant="secondary")
#                     btn2 = gr.Button("📮 Overprinted varieties", variant="secondary")
#                     btn3 = gr.Button("📮 1930s airmail stamps", variant="secondary")
                
#                 with gr.Row():
#                     btn4 = gr.Button("📮 Coat of arms definitives", variant="secondary")
#                     btn5 = gr.Button("📮 Perforation errors", variant="secondary")
#                     btn6 = gr.Button("📮 1940-1950 commemoratives", variant="secondary")

#             with gr.Column(scale=1):
#                 # Advanced filters
#                 gr.Markdown("**Advanced Search Filters**")

#                 use_filters = gr.Checkbox(label="Enable specific filters", value=False)

#                 catalog_system = gr.Dropdown(
#                     choices=["", "Scott", "Michel", "Yvert", "Stanley Gibbons", "Edifil"],
#                     label="Catalog system",
#                     value="",
#                 )

#                 chunk_type = gr.Dropdown(
#                     choices=["", "text", "table", "figure", "title", "header"],
#                     label="Content type",
#                     value="",
#                 )

#                 has_varieties = gr.Checkbox(label="Only documents with varieties", value=False)

#                 max_results = gr.Slider(
#                     minimum=5,
#                     maximum=50,
#                     value=15,
#                     step=5,
#                     label="Maximum results per approach",
#                 )

#         # Prominent timing display section
#         with gr.Row():
#             with gr.Column():
#                 timing_display = gr.Textbox(
#                     label="⏱️ Performance Timing Comparison",
#                     lines=12,
#                     interactive=False,
#                     value="Run a search to see detailed timing comparison between approaches"
#                 )

#         # Tabbed output interface
#         with gr.Tabs():
#             with gr.TabItem("🔍 Basic Hybrid Search"):
#                 gr.Markdown("**Combines vector similarity with keyword matching (35% vector + 65% keyword)**")
                
#                 with gr.Row():
#                     with gr.Column():
#                         gr.Markdown("## AI Response - Basic Approach")
#                         basic_answer_output = gr.Textbox(
#                             label="Generated response", 
#                             lines=10, 
#                             interactive=False
#                         )

#                 with gr.Row():
#                     with gr.Column():
#                         gr.Markdown("## Documents Found - Basic Search")
#                         basic_search_output = gr.Textbox(
#                             label="Search results", 
#                             lines=15, 
#                             interactive=False
#                         )

#                     with gr.Column():
#                         gr.Markdown("## Metadata - Basic Search")
#                         basic_metadata_output = gr.Textbox(
#                             label="Query information", 
#                             lines=12, 
#                             interactive=False
#                         )

#             with gr.TabItem("🚀 Advanced Compression Search"):
#                 gr.Markdown("**Multi-query ensemble retrieval with AI-powered document compression**")
                
#                 with gr.Row():
#                     with gr.Column():
#                         gr.Markdown("## AI Response - Advanced Approach")
#                         advanced_answer_output = gr.Textbox(
#                             label="Generated response", 
#                             lines=10, 
#                             interactive=False
#                         )

#                 with gr.Row():
#                     with gr.Column():
#                         gr.Markdown("## Documents Found - Advanced Search")
#                         advanced_search_output = gr.Textbox(
#                             label="Search results with compression", 
#                             lines=15, 
#                             interactive=False
#                         )

#                     with gr.Column():
#                         gr.Markdown("## Metadata - Advanced Search")
#                         advanced_metadata_output = gr.Textbox(
#                             label="Query information", 
#                             lines=12, 
#                             interactive=False
#                         )

#         # Wire up all events
#         # Main search button with sequential execution
#         search_btn.click(
#             fn=gradio_sequential_search,
#             inputs=[query_input, use_filters, catalog_system, chunk_type, has_varieties, max_results],
#             outputs=[
#                 basic_answer_output, basic_search_output, basic_metadata_output,
#                 advanced_answer_output, advanced_search_output, advanced_metadata_output,
#                 timing_display
#             ],
#         )

#         # Enter key in search box
#         query_input.submit(
#             fn=gradio_sequential_search,
#             inputs=[query_input, use_filters, catalog_system, chunk_type, has_varieties, max_results],
#             outputs=[
#                 basic_answer_output, basic_search_output, basic_metadata_output,
#                 advanced_answer_output, advanced_search_output, advanced_metadata_output,
#                 timing_display
#             ],
#         )

#         # Example buttons
#         btn1.click(fn=set_example_1, outputs=[query_input])
#         btn2.click(fn=set_example_2, outputs=[query_input])
#         btn3.click(fn=set_example_3, outputs=[query_input])
#         btn4.click(fn=set_example_4, outputs=[query_input])
#         btn5.click(fn=set_example_5, outputs=[query_input])
#         btn6.click(fn=set_example_6, outputs=[query_input])

#         # System information
#         gr.Markdown(
#             "---\n"
#             f"**System Status:**\n"
#             f"• Collection: {collection_name}\n"
#             f"• Documents indexed: {total_docs:,}\n"
#             f"• Total chunks: {total_chunks:,}\n"
#             f"• Status: ✅ Operational\n\n"
#             f"**Execution Mode:**\n"
#             f"• Sequential execution: Basic search completes first, then Advanced\n"
#             f"• Results display progressively as each search completes\n\n"
#             f"**Search Approaches:**\n"
#             f"• **Basic Search**: Hybrid semantic search optimized for exact catalog numbers\n"
#             f"• **Advanced Search**: Multi-query ensemble with AI compression for complex queries"
#         )

#     return interface

In [None]:
import time
import markdown
from typing import Dict, Any, List

def create_gradio_interface(rag_system: Dict[str, Any]) -> gr.Blocks:
    """
    Creates the Gradio interface with mixed components for better progressive updates.
    Uses HTML for markdown content and Textbox for timing display.
    """
    
    # Función para convertir Markdown a HTML
    def markdown_to_html(text):
        """Convert markdown text to HTML"""
        if not text:
            return "<p><em>No content</em></p>"
        return markdown.markdown(text, extensions=['tables', 'fenced_code', 'codehilite'])

    def gradio_sequential_search(query, use_filters, catalog_system, chunk_type, has_varieties, max_results):
        """
        Sequential search with proper component handling to avoid orange boxes.
        """
        if not rag_system:
            error_msg = "❌ RAG system not configured"
            error_html = markdown_to_html(error_msg)
            yield error_html, "", "", error_html, "", "", "No timing data available"
            return
            
        if not query or not query.strip():
            error_msg = "❌ Please enter a query"
            error_html = markdown_to_html(error_msg)
            yield error_html, "", "", error_html, "", "", "No timing data available"
            return

        try:
            start_total_time = time.time()
            
            # ============= STEP 1: Execute Basic Search =============
            # Mensajes de estado inicial
            loading_basic = markdown_to_html("*🔄 Running Basic Hybrid Search...*")
            loading_advanced = markdown_to_html("*⏳ Waiting for Basic search to complete...*")
            
            yield (
                loading_basic, 
                "", 
                "", 
                loading_advanced, 
                "", 
                "", 
                "⏱️ Basic search in progress..."
            )
            
            # Call basic search function
            basic_results_data = search_and_answer_basic(
                query=query,
                rag_system=rag_system,
                use_filters=use_filters,
                catalog_system=catalog_system,
                chunk_type=chunk_type,
                has_varieties=has_varieties,
                max_results=int(max_results),
            )
            
            # Format Basic Results
            basic_answer = basic_results_data["answer"]  # Ya viene en Markdown
            basic_answer_html = markdown_to_html(basic_answer)
            
            basic_results = basic_results_data["results"]
            basic_metadata = basic_results_data["metadata"]
            basic_execution_time = basic_metadata.get("generation_time", 0)
            
            basic_search_output = format_search_results(basic_results, "Basic Hybrid Search")
            basic_search_html = markdown_to_html(basic_search_output)
            
            basic_metadata_output = format_metadata(basic_metadata, basic_execution_time)
            basic_metadata_html = markdown_to_html(basic_metadata_output)
            
            # Timing parcial
            timing_partial = f"""⏱️ EXECUTION TIMING (Partial)
            
Basic Hybrid Search: ✅ COMPLETED
• Time: {basic_execution_time:.2f}s
• Results: {len(basic_results)}

Advanced Search: ⏳ STARTING...
"""
            
            loading_advanced_2 = markdown_to_html("*🔄 Starting Advanced Compression Search...*")
            
            yield (
                basic_answer_html,
                basic_search_html,
                basic_metadata_html,
                loading_advanced_2,
                "",
                "",
                timing_partial
            )
            
            # ============= STEP 2: Execute Advanced Search =============
            advanced_results_data = search_and_answer_advanced(
                query=query,
                rag_system=rag_system,
                use_filters=use_filters,
                catalog_system=catalog_system,
                chunk_type=chunk_type,
                has_varieties=has_varieties,
                max_results=int(max_results),
            )
            
            # Format Advanced Results
            advanced_answer = advanced_results_data["answer"]  # Ya viene en Markdown
            advanced_answer_html = markdown_to_html(advanced_answer)
            
            advanced_results = advanced_results_data["results"]
            advanced_metadata = advanced_results_data["metadata"]
            advanced_execution_time = advanced_metadata.get("generation_time", 0)
            
            advanced_search_output = format_search_results(advanced_results, "Advanced Compression Search")
            advanced_search_html = markdown_to_html(advanced_search_output)
            
            advanced_metadata_output = format_metadata(advanced_metadata, advanced_execution_time)
            advanced_metadata_html = markdown_to_html(advanced_metadata_output)
            
            # Calculate total execution time
            total_execution_time = time.time() - start_total_time
            
            # Final timing information
            timing_final = format_timing_display(
                basic_execution_time,
                advanced_execution_time,
                total_execution_time,
                len(basic_results),
                len(advanced_results)
            )
            
            # Yield final complete results
            yield (
                basic_answer_html,
                basic_search_html,
                basic_metadata_html,
                advanced_answer_html,
                advanced_search_html,
                advanced_metadata_html,
                timing_final
            )
            
        except Exception as e:
            error_msg = f"❌ Error during search: {str(e)}"
            error_html = markdown_to_html(error_msg)
            yield error_html, "", "", error_html, "", "", f"❌ Error occurred - no timing data"

    def format_search_results(results, approach_name):
        """Format search results for display in Markdown"""
        if not results:
            return f"*No results found with {approach_name}*"

        lines = []
        lines.append(f"### {approach_name} Results")
        lines.append(f"**Found {len(results)} documents**\n")
        lines.append("---")
        
        for i, r in enumerate(results):
            doc_id = r.get("doc_id", "N/A")
            chunk_type_val = r.get("chunk_type", "N/A")
            page_number = r.get("page_number", "N/A")
            score = r.get("score", 0.0)
            catalogs = r.get("catalog_systems", [])
            scotts = r.get("scott_numbers", [])
            years = r.get("years", [])

            # Preview text
            text = r.get("text", "")
            preview = (text[:300] + "...") if len(text) > 300 else text

            lines.append(f"\n#### 📄 Result {i+1}")
            lines.append(f"**Score:** `{score:.3f}`\n")
            
            # Create a table for metadata
            lines.append("| Field | Value |")
            lines.append("|-------|-------|")
            lines.append(f"| Document | `{doc_id}` |")
            lines.append(f"| Type | {chunk_type_val} |")
            lines.append(f"| Page | {page_number} |")
            
            if catalogs:
                lines.append(f"| Catalogs | {', '.join(catalogs)} |")
            if scotts:
                lines.append(f"| Scott Numbers | **{', '.join(scotts)}** |")
            if years:
                lines.append(f"| Years | {', '.join(str(y) for y in years)} |")
            
            lines.append(f"\n**Preview:**")
            lines.append(f"> {preview}")
            lines.append("\n---")
        
        return "\n".join(lines)

    def format_metadata(metadata, execution_time):
        """Format metadata for display in Markdown"""
        if not metadata:
            return "*No metadata available*"
        
        output_lines = []
        output_lines.append("### Search Metadata\n")
        
        # Create a table for metadata
        output_lines.append("| Property | Value |")
        output_lines.append("|----------|-------|")
        output_lines.append(f"| **Approach** | {metadata.get('approach', 'Unknown')} |")
        output_lines.append(f"| **Query** | `{metadata.get('query', 'N/A')}` |")
        output_lines.append(f"| **Results found** | {metadata.get('total_results', 0)} |")
        output_lines.append(f"| **Generation time** | {metadata.get('generation_time', 'N/A'):.2f} seconds |")
        output_lines.append(f"| **Context docs** | {metadata.get('context_docs_count', 'N/A')} |")
        output_lines.append(f"| **Total execution** | {execution_time:.2f} seconds |")
        
        if metadata.get('filters_used'):
            filters_str = str(metadata['filters_used']).replace('{', '').replace('}', '')
            output_lines.append(f"| **Filters** | `{filters_str}` |")
        
        if metadata.get('compressed_docs'):
            output_lines.append(f"| **Compressed docs** | {metadata['compressed_docs']} |")
            
        if metadata.get('error'):
            output_lines.append(f"\n⚠️ **Error:** `{metadata['error']}`")
        
        return "\n".join(output_lines)

    def format_timing_display(basic_time, advanced_time, total_time, basic_results, advanced_results):
        """Format timing information for display in Textbox"""
        try:
            basic_time = float(basic_time) if basic_time else 0
            advanced_time = float(advanced_time) if advanced_time else 0
            total_time = float(total_time) if total_time else 0
            
            # Determine which approach was faster
            if basic_time > 0 and advanced_time > 0:
                if basic_time < advanced_time:
                    faster = f"🏆 Basic search was {advanced_time/basic_time:.1f}x faster"
                elif advanced_time < basic_time:
                    faster = f"🏆 Advanced search was {basic_time/advanced_time:.1f}x faster"
                else:
                    faster = "⚡ Both approaches took similar time"
            else:
                faster = "⏱️ Timing comparison not available"
            
            # Calculate speeds safely
            basic_speed = f"{basic_results/basic_time:.1f}" if basic_time > 0 else "N/A"
            advanced_speed = f"{advanced_results/advanced_time:.1f}" if advanced_time > 0 else "N/A"
            
            timing_display = f"""⏱️ EXECUTION TIMING COMPARISON
================================================

🔍 BASIC HYBRID SEARCH
• Processing Time: {basic_time:.2f} seconds
• Documents Found: {basic_results}
• Speed: {basic_speed} docs/sec
• Status: ✅ Complete

🚀 ADVANCED COMPRESSION SEARCH
• Processing Time: {advanced_time:.2f} seconds
• Documents Found: {advanced_results}
• Speed: {advanced_speed} docs/sec
• Status: ✅ Complete

📊 OVERALL PERFORMANCE
• Total Execution: {total_time:.2f} seconds
• Execution Mode: Sequential (Basic → Advanced)
• {faster}

💡 PERFORMANCE NOTES:
• Basic search completed first for quick results
• Advanced search provides enhanced quality
• Sequential execution allows progressive viewing"""

            return timing_display
            
        except Exception as e:
            return f"❌ Error formatting timing data: {e}"

    # Set example query functions
    def set_example_1():
        return "What Costa Rica stamps from 1907 have Scott catalog number 68?"
    
    def set_example_2():
        return "Show me Costa Rica overprinted stamps with varieties or errors"
    
    def set_example_3():
        return "Costa Rica airmail stamps from the 1930s with catalog values"
    
    def set_example_4():
        return "What Costa Rica definitive stamps feature the coat of arms?"
    
    def set_example_5():
        return "Costa Rica stamps with perforation errors or printing varieties"
    
    def set_example_6():
        return "Show me Costa Rica commemorative stamps issued between 1940-1950"

    # System information
    collection_name = rag_system.get("collection_name", "Oxcart")
    total_docs = rag_system.get("total_documents", 0)
    total_chunks = rag_system.get("total_chunks", 0)

    # --- UI with mixed components ---
    with gr.Blocks(
        title="OXCART RAG - Costa Rica Philatelic System",
        css="""
        .markdown-text {
            font-family: 'Inter', sans-serif;
        }
        table {
            border-collapse: collapse;
            width: 100%;
        }
        th, td {
            border: 1px solid #ddd;
            padding: 8px;
            text-align: left;
        }
        """
    ) as interface:
        gr.Markdown(
            "# 🇨🇷 OXCART RAG - Costa Rica Philatelic System\n\n"
            "Advanced search for Costa Rican stamps and postal history with sequential dual AI approaches."
        )

        with gr.Row():
            with gr.Column(scale=3):
                query_input = gr.Textbox(
                    label="Your Costa Rica philatelic query",
                    placeholder="e.g., What Costa Rica stamps from 1907 have Scott number 68?",
                    lines=2,
                )

                search_btn = gr.Button("🔍 Search with Both Approaches (Sequential)", variant="primary")

                gr.Markdown("**Example Queries:**")
                
                with gr.Row():
                    btn1 = gr.Button("📮 1907 Scott 68 stamps", variant="secondary")
                    btn2 = gr.Button("📮 Overprinted varieties", variant="secondary")
                    btn3 = gr.Button("📮 1930s airmail stamps", variant="secondary")
                
                with gr.Row():
                    btn4 = gr.Button("📮 Coat of arms definitives", variant="secondary")
                    btn5 = gr.Button("📮 Perforation errors", variant="secondary")
                    btn6 = gr.Button("📮 1940-1950 commemoratives", variant="secondary")

            with gr.Column(scale=1):
                gr.Markdown("**Advanced Search Filters**")

                use_filters = gr.Checkbox(label="Enable specific filters", value=False)
                catalog_system = gr.Dropdown(
                    choices=["", "Scott", "Michel", "Yvert", "Stanley Gibbons", "Edifil"],
                    label="Catalog system",
                    value="",
                )
                chunk_type = gr.Dropdown(
                    choices=["", "text", "table", "figure", "title", "header"],
                    label="Content type",
                    value="",
                )
                has_varieties = gr.Checkbox(label="Only documents with varieties", value=False)
                max_results = gr.Slider(
                    minimum=5,
                    maximum=50,
                    value=15,
                    step=5,
                    label="Maximum results per approach",
                )

        # Timing display as Textbox for better updates
        with gr.Row():
            with gr.Column():
                timing_display = gr.Textbox(
                    label="⏱️ Performance Timing Comparison",
                    lines=18,
                    interactive=False,
                    value="Run a search to see detailed timing comparison between approaches",
                    elem_id="timing-display"
                )

        # Tabbed output with HTML components
        with gr.Tabs():
            with gr.TabItem("🔍 Basic Hybrid Search"):
                gr.Markdown("**Combines vector similarity with keyword matching (35% vector + 65% keyword)**")
                
                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## AI Response - Basic Approach")
                        basic_answer_output = gr.HTML(
                            value="<p><em>Waiting for search...</em></p>",
                            elem_id="basic_answer"
                        )

                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## Documents Found - Basic Search")
                        basic_search_output = gr.HTML(
                            value="<p><em>No results yet</em></p>",
                            elem_id="basic_search"
                        )

                    with gr.Column():
                        gr.Markdown("## Metadata - Basic Search")
                        basic_metadata_output = gr.HTML(
                            value="<p><em>No metadata yet</em></p>",
                            elem_id="basic_metadata"
                        )

            with gr.TabItem("🚀 Advanced Compression Search"):
                gr.Markdown("**Multi-query ensemble retrieval with AI-powered document compression**")
                
                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## AI Response - Advanced Approach")
                        advanced_answer_output = gr.HTML(
                            value="<p><em>Waiting for search...</em></p>",
                            elem_id="advanced_answer"
                        )

                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## Documents Found - Advanced Search")
                        advanced_search_output = gr.HTML(
                            value="<p><em>No results yet</em></p>",
                            elem_id="advanced_search"
                        )

                    with gr.Column():
                        gr.Markdown("## Metadata - Advanced Search")
                        advanced_metadata_output = gr.HTML(
                            value="<p><em>No metadata yet</em></p>",
                            elem_id="advanced_metadata"
                        )

        # Wire up events
        search_btn.click(
            fn=gradio_sequential_search,
            inputs=[query_input, use_filters, catalog_system, chunk_type, has_varieties, max_results],
            outputs=[
                basic_answer_output, basic_search_output, basic_metadata_output,
                advanced_answer_output, advanced_search_output, advanced_metadata_output,
                timing_display
            ],
        )

        query_input.submit(
            fn=gradio_sequential_search,
            inputs=[query_input, use_filters, catalog_system, chunk_type, has_varieties, max_results],
            outputs=[
                basic_answer_output, basic_search_output, basic_metadata_output,
                advanced_answer_output, advanced_search_output, advanced_metadata_output,
                timing_display
            ],
        )

        # Example buttons
        btn1.click(fn=set_example_1, outputs=[query_input])
        btn2.click(fn=set_example_2, outputs=[query_input])
        btn3.click(fn=set_example_3, outputs=[query_input])
        btn4.click(fn=set_example_4, outputs=[query_input])
        btn5.click(fn=set_example_5, outputs=[query_input])
        btn6.click(fn=set_example_6, outputs=[query_input])

        # System information
        gr.Markdown(
            "---\n"
            f"**System Status:**\n"
            f"• Collection: {collection_name}\n"
            f"• Documents indexed: {total_docs:,}\n"
            f"• Total chunks: {total_chunks:,}\n"
            f"• Status: ✅ Operational\n\n"
            f"**Execution Mode:**\n"
            f"• Sequential execution: Basic search completes first, then Advanced\n"
            f"• Results display progressively as each search completes\n\n"
            f"**Search Approaches:**\n"
            f"• **Basic Search**: Hybrid semantic search optimized for exact catalog numbers\n"
            f"• **Advanced Search**: Multi-query ensemble with AI compression for complex queries"
        )

    return interface

In [None]:
# ---- Enhanced launcher ----
if rag_system and rag_system.get("success", False):
    print("\\n" + "=" * 60)
    print("🚀 LAUNCHING COSTA RICA PHILATELIC RAG INTERFACE")
    print("=" * 60)

    gradio_app = create_gradio_interface(rag_system)

    GRADIO_PORT = int(os.getenv("GRADIO_PORT", 7860))
    GRADIO_SHARE = os.getenv("GRADIO_SHARE", "false").lower() == "true"

    print(f"⚙️ Port: {GRADIO_PORT}")
    print(f"🌍 Public URL: {'⚠️ Attempting...' if GRADIO_SHARE else '❌ Disabled (more secure)'}")
    
    try:
        print("🔄 Starting Gradio server...")
        
        if GRADIO_SHARE:
            print("⏳ Attempting to create public tunnel...")
            try:
                demo = gradio_app.launch(
                    server_port=GRADIO_PORT,
                    share=True,
                    inbrowser=False,
                    show_error=True,
                    prevent_thread_lock=False,
                    quiet=False
                )
                
                print("\\n🎉 SUCCESS! Public tunnel created")
                print(f"🌐 AVAILABLE URLS:")
                print(f"   📱 Local: http://localhost:{GRADIO_PORT}")
                
                if hasattr(demo, 'share_url') and demo.share_url:
                    print(f"   🌍 Public: {demo.share_url}")
                    print(f"\\n🔗 **PUBLIC URL:** {demo.share_url}")
                else:
                    print(f"   🌍 Public: Check Gradio output above ☝️")
                
            except Exception as share_error:
                print(f"⚠️ Error creating public tunnel: {share_error}")
                print("🔄 Switching to local mode only...")
                
                demo = gradio_app.launch(
                    server_port=GRADIO_PORT,
                    share=False,
                    inbrowser=True,
                    show_error=True,
                    prevent_thread_lock=False
                )
                
                print(f"\\n✅ LOCAL SERVER OPERATIONAL:")
                print(f"   📱 Local URL: http://localhost:{GRADIO_PORT}")
                print(f"   ⚠️ Public URL: Not available (tunnel error)")
                
        else:
            demo = gradio_app.launch(
                server_port=GRADIO_PORT,
                share=False,
                inbrowser=True,
                show_error=True,
                prevent_thread_lock=False
            )
            
            print(f"\\n✅ LOCAL SERVER OPERATIONAL:")
            print(f"   📱 Local URL: http://localhost:{GRADIO_PORT}")
            print(f"   💡 For public URL, set GRADIO_SHARE=true in .env")
        
        print(f"\\n📋 COSTA RICA PHILATELIC FEATURES:")
        print(f"   • Specialized Costa Rica stamp queries")
        print(f"   • Scott catalog number search")
        print(f"   • Variety and error detection")
        print(f"   • Dual search approaches for comprehensive results")
        print(f"   • Performance timing comparison")
        print(f"   • To stop: gr.close_all()")
        
        print(f"\\n{'='*60}")
        print(f"🇨🇷 COSTA RICA PHILATELIC RAG INTERFACE READY!")
        print(f"{'='*60}")
        
    except Exception as e:
        print(f"❌ Critical error launching Gradio: {e}")
        print("\\n🔧 SUGGESTED SOLUTIONS:")
        print("   1. Run: gr.close_all()")
        print("   2. Change port: GRADIO_PORT=7861 in .env")
        print("   3. Verify no other services on the port")
        print("   4. Restart the notebook")
        
else:
    print("\\n⚠️  Cannot create Gradio interface:")
    if not rag_system:
        print("   • RAG system not configured")
    else:
        print(f"   • RAG error: {rag_system.get('error', 'Unknown error')}")
    print("\\n🔧 To resolve:")
    print("   1. Verify Weaviate is running")
    print("   2. Configure OPENAI_API_KEY in .env") 
    print("   3. Run document indexing")
    print("   4. Restart this notebook")

In [None]:
#gr.close_all()