# Philatelic Gradio App with Weaviate
Interactive web interface for CR Philately

## 1. Setup & Imports
Load all the modules and functions

In [None]:
import os
import json
import glob
import time
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
from langchain.callbacks import get_openai_callback
from datetime import datetime
import weaviate
import gradio as gr
import re

import time

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Third-party imports
import pandas as pd


from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_weaviate import WeaviateVectorStore
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage
from langchain.retrievers import MultiQueryRetriever, EnsembleRetriever, ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


from philatelic_weaviate import *

from philatelic_chunk_schema import *

print("✅ Basic imports completed")

In [None]:
# Verify environment variables
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
WEAVIATE_URL = os.getenv('WEAVIATE_URL', 'http://localhost:8083')
PHILATELIC_JSONS_DIR = os.getenv('PHILATELIC_JSONS_DIR', './results/final_jsons')
COLLECTION_NAME = os.getenv('WEAVIATE_COLLECTION_NAME', 'Oxcart')

print(f"🔧 Configuration:")
print(f"   • Weaviate URL: {WEAVIATE_URL}")
print(f"   • JSONs Directory: {PHILATELIC_JSONS_DIR}")
print(f"   • Collection Name: {COLLECTION_NAME}")
print(f"   • OpenAI API Key: {'✅ Configured' if OPENAI_API_KEY else '❌ Missing configuration'}")

if not OPENAI_API_KEY:
    print("\\n⚠️  IMPORTANT: Configure your OPENAI_API_KEY in the .env file")
    print("   Copy .env.example to .env and add your API key")

# Verify that the JSONs directory exists
if not os.path.exists(PHILATELIC_JSONS_DIR):
    print(f"\\n⚠️  Directory {PHILATELIC_JSONS_DIR} not found")
    print("   Make sure you have processed documents with the Dolphin parser")
else:
    json_files = glob.glob(os.path.join(PHILATELIC_JSONS_DIR, '*_final.json'))
    print(f"\\n📁 Found {len(json_files)} philatelic JSON files")
    if json_files:
        print("   Examples:")
        for file in json_files[:3]:
            print(f"   • {os.path.basename(file)}")
        if len(json_files) > 3:
            print(f"   • ... and {len(json_files) - 3} more")

In [None]:
llm = ChatOpenAI(
            model="gpt-5-nano", 
            api_key=OPENAI_API_KEY, 
            temperature=1,  # obligatorio para gpt-5-nano
            timeout=120.0,
            #max_completion_tokens=2500,
            model_kwargs={
                "verbosity": "medium",
                "reasoning_effort" : "low"
            })
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=OPENAI_API_KEY)

In [None]:
# ========================================================================================
# 📝 RAG PROMPT TEMPLATE - Professional Philatelic Consultation
# ========================================================================================

philatelic_rag_template = """You are a senior philatelic researcher and catalog specialist with expertise in Costa Rican stamps and postal history. Provide comprehensive, well-structured analysis based strictly on the source materials provided.

SOURCE MATERIALS:
{context}

RESEARCH QUERY: {question}

RESPONSE REQUIREMENTS:

FORMATTING & STRUCTURE:
• Use clear hierarchical organization with descriptive headers using markdown
• Group related information under logical categories using ## and **bold subheadings**
• Use bullet points (•) for individual facts and varieties
• Include relevant emojis for major sections (🔍 📮 📚 🎯) to enhance readability
• Bold key terms, catalog numbers, and important details
• KEEP SECTIONS CONCISE - avoid excessive repetition or overly detailed explanations
• Your output is in markdown format

CITATION FORMAT:
• Every factual statement must include the name of the document (doc_id) and its page numeber like this example: (CRF 100, page 15)
• Multiple sources: (doc_id, page number; doc_id, page number) Example: (OXCART 123, page 24 ; OXCART 25, page 15)
• Always cite catalog numbers (scott, yvert, michell, etc), varieties, dates, quantities, and technical specifications
• When quoting directly, use quotation marks around quoted text

CONTENT ORGANIZATION:
• Lead with the most direct answer to the query
• Organize by catalog numbers, chronological order, or logical categories as appropriate
• Include technical specifications: dates, quantities, colors, perforations, varieties
• Provide brief historical context and collecting significance
• Note relationships between issues, varieties, or catalog entries
• Address valuation or rarity when relevant to the query

RESPONSE LENGTH:
• Aim for clear, informative responses that are thorough but not excessive
• Eliminate redundant information and repetitive explanations
• Focus on the most relevant information that directly answers the query
• If information is extensive, prioritize the most important catalog entries and varieties

TECHNICAL STANDARDS:
• Use precise philatelic terminology (definitive, commemorative, variety, error, overprint, etc.)
• Specify exact catalog numbers with proper formatting (Scott C216, not just C216)
• Include denomination and color details when available
• Note printing quantities, dates, and technical varieties
• Distinguish between verified catalog facts and expert opinions
• Flag incomplete or uncertain information clearly

RESEARCH COMPLETENESS:
• If source materials are insufficient, state: "The provided documents do not contain sufficient information about..."
• Suggest what additional sources or information would be needed
• Note any gaps in catalog coverage or missing details

PROFESSIONAL TONE:
• Maintain authoritative but accessible language
• Present information objectively without unnecessary qualifiers
• Use active voice and clear, direct statements
• Avoid speculation beyond what sources support

RESPONSE:"""

# Create the prompt template
rag_prompt = PromptTemplate(
    template=philatelic_rag_template,
    input_variables=["context", "question"]
)

In [None]:
# ========================================================================================
# 📄 OPTIMIZED DOCUMENT FORMATTING - For Academic Citation Style
# ========================================================================================

def format_docs_for_rag(docs_results: List[Dict]) -> str:
    """Efficient document formatting optimized for academic citation style (Document Name, p. Page)"""
    
    if not docs_results:
        return "\nNo source documents available."
    
    # Group and sort documents by authority
    #doc_groups = {'catalog': [], 'literature': [], 'collection': [], 'reference': []}
    docs = []
    
    for i, doc in enumerate(docs_results, 1):
        #category, reliability = classify_document_authority(doc.metadata.get('doc_id', 'Unknown'))
        
        doc_info = {
            'doc_num': i,
            'doc_id': doc.metadata.get('doc_id', 'Unknown'),
            'page': doc.metadata.get('page_number', 'N/A'),
            'content': doc.page_content,
        }
        #doc_groups[category].append(doc_info)
        docs.append(doc_info)
    return docs

def create_rag_response(retriever_results: List[Dict], query: str) -> Dict:
    """Streamlined RAG chain execution with academic citation style and token tracking"""
    
    if not retriever_results:
        return {
            "response": "No documents found for this query.", 
            "generation_time": 0,
            "context_docs_count": 0,
            "context_length": 0,
            "token_usage": {
                "input_tokens": 0,
                "output_tokens": 0,
                "total_tokens": 0
            },
            "cost_info": {
                "estimated_cost_usd": 0,
                "input_cost": 0,
                "output_cost": 0
            }
        }
    
    # Format context efficiently for academic citations
    context = format_docs_for_rag(retriever_results)
    
    # Execute RAG chain with OpenAI callback for token tracking
    rag_chain = (
        {"context": lambda x: context, "question": RunnablePassthrough()}
        | rag_prompt | llm | StrOutputParser()
    )
    
    start_time = time.time()
    
    # Use OpenAI callback to track token usage
    with get_openai_callback() as cb:
        response = rag_chain.invoke(query)
        # Get token counts from callback
        input_tokens = cb.prompt_tokens
        output_tokens = cb.completion_tokens
        total_tokens = cb.total_tokens
        
        # OpenAI callback provides cost directly, but we'll calculate our own
        # based on GPT-5-nano pricing
    
    generation_time = round(time.time() - start_time, 2)
    
    # Calculate costs for GPT-5-nano
    # $0.05 per 1M input tokens, $0.40 per 1M output tokens
    cost_per_1m_input = 0.05
    cost_per_1m_output = 0.40
    
    # Convert to cost per token
    cost_per_input_token = cost_per_1m_input / 1_000_000
    cost_per_output_token = cost_per_1m_output / 1_000_000
    
    input_cost = input_tokens * cost_per_input_token
    output_cost = output_tokens * cost_per_output_token
    estimated_cost = input_cost + output_cost
    
    return {
        "response": response,
        "generation_time": generation_time,
        "context_docs_count": len(retriever_results),
        "context_length": len(context),        
        "token_usage": {
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "total_tokens": total_tokens
        },
        "cost_info": {
            "estimated_cost_usd": round(estimated_cost, 6),
            "input_cost": round(input_cost, 6),
            "output_cost": round(output_cost, 6)
        }
    }

## 2. Weaviate Client

In [None]:
# Connect to Weaviate
print("🔌 Connecting to Weaviate...")

try:
    client = create_weaviate_client(WEAVIATE_URL, OPENAI_API_KEY)
    print("✅ Connection successful")
    
    # Verify that Weaviate is working
    meta = client.get_meta()
    print(f"📊 Weaviate version: {meta.get('version', 'unknown')}")
    
    # Verify if collection exists
    try:
        collections = client.collections.list_all()
        collection_names = [col.name for col in collections]
        
        if COLLECTION_NAME in collection_names:
            collection = client.collections.get(COLLECTION_NAME)
            total_objects = collection.aggregate.over_all(total_count=True).total_count
            print(f"📊 Collection '{COLLECTION_NAME}' exists with {total_objects} documents")
        else:
            print(f"📝 Collection '{COLLECTION_NAME}' does not exist (will be created during indexing)")
    except Exception as e:
        print(f"⚠️ Could not verify collections: {e}")
        
except Exception as e:
    print(f"❌ Error connecting to Weaviate: {e}")
    print("💡 Make sure Weaviate is running:")
    print("   docker-compose up -d")
    client = None

## 3. Weaviate Search Tests

Test the function search_chunks_semantic


In [None]:
results = search_chunks_semantic(
                client, 
                "Costa Rica 1907 2 colones stamp with original gum. Scott 68 issue of 1907", 
                "Oxcart", 
                limit=50,
                filters=[],
                mode = "hybrid",
                alpha= 0.35
                
            )
            
print(f"   📊 Resultados: {len(results)}")

for j, result in enumerate(results, 1):
    print(f"\n      🏷️ #{j} (Score: {result['score']:.3f})")
    print(f"         📄 Documento: {result['doc_id']}")
    print(f"         📋 Tipo: {result['chunk_type']}")
    print(f"         📄 Página: {result['page_number']}")
    
    # Mostrar metadatos relevantes
    if result.get('catalog_systems'):
        print(f"         📖 Catálogos: {result['catalog_systems']}")
    if result.get('scott_numbers'):
        print(f"         🔢 Scott: {result['scott_numbers']}")
    if result.get('years'):
        print(f"         📅 Años: {result['years']}")
    if result.get('colors'):
        print(f"         🎨 Colores: {result['colors']}")
    if result.get('variety_classes'):
        print(f"         🔀 Variedades: {result['variety_classes']}")
    
    # Texto truncado
    text = result.get('text', '')
    # if len(text) > 200:
    #     text = text[:200] + "..."
    print(f"         📝 Texto: {text}")
    print("**********************************************************************************************************")

## 4. Advance Retriever Implementation

In [None]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables.config import RunnableConfig

def compress_documents_simple(documents: List[Document], query: str, llm) -> List[Document]:
    """
    Simple document compression using LangChain's native batch processing.
    Each document is processed individually with the same prompt.
    """
    if not documents:
        return []
    
    # Simple compression prompt for individual documents
    compress_prompt_template = """You are a philatelic expert. Extract and compress ONLY the information relevant to this query from the document below.

QUERY: {query}

DOCUMENT:
{document}

INSTRUCTIONS:
- Extract only information directly relevant to the query
- Preserve exact catalog numbers, dates, denominations, and technical specifications
- Keep Scott numbers, Michel numbers, and other catalog references intact
- Maintain variety descriptions and error information
- Remove irrelevant content but preserve context for understanding

If the document contains no relevant information, respond with exactly: NO_RELEVANT_CONTENT

COMPRESSED CONTENT:"""

    # Create individual prompts for each document
    prompts = []
    for doc in documents:
        prompt_text = compress_prompt_template.format(
            query=query, 
            document=doc.page_content
        )
        prompts.append([("user", prompt_text)])
    
    # Use LangChain's native batch processing with concurrency control
    config = RunnableConfig(max_concurrency=10)  # Process 10 documents concurrently
    
    try:
        responses = llm.batch(prompts, config=config)
        
        # Filter and create compressed documents
        compressed_docs = []
        for i, response in enumerate(responses):
            content = response.content.strip() if hasattr(response, 'content') else str(response).strip()
            
            # Only include documents that have relevant content
            if content and content != "NO_RELEVANT_CONTENT":
                compressed_doc = Document(
                    page_content=content,
                    metadata=documents[i].metadata
                )
                compressed_docs.append(compressed_doc)
        
        return compressed_docs
        
    except Exception as e:
        print(f"Error during batch compression: {e}")
        # Fallback: return original documents
        return documents

def search_stamps_with_compression(query, client, embeddings, llm, limit=100, 
                                 alpha=0.30, diversity_lambda=0.75):
    """
    Optimized philatelic search with simple batch document compression using LangChain's native batch processing.
    
    Args:
        query (str): The stamp query
        client: Weaviate client
        embeddings: Embedding model
        llm: Language model
        limit (int): Maximum documents to retrieve
        alpha (float): Hybrid search factor (0.30 = 30% vector, 70% keywords)
        diversity_lambda (float): MMR diversity factor (0.75 = good diversity)
    
    Returns:
        list: Compressed and optimized documents for philatelic queries
    """  
    
    # Create vector store
    vector_store = WeaviateVectorStore(
        client=client,
        index_name=COLLECTION_NAME,
        text_key="text",
        embedding=embeddings
    )
    
    # Try to create hybrid retriever
    hybrid_kwargs = {"k": limit // 2}
    if alpha is not None:
        hybrid_kwargs["alpha"] = alpha
    
    # 1. Precision hybrid retriever (captures exact numbers + context)
    precision_retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs=hybrid_kwargs
    )
    
    # 2. Diversity MMR retriever (avoids duplicate stamps)
    diversity_retriever = vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={"k": limit // 2, "lambda_mult": diversity_lambda}
    )
    
    # 3. Ensemble with dual strategy
    base_retriever = EnsembleRetriever(
        retrievers=[precision_retriever, diversity_retriever],
        weights=[0.7, 0.3]  # 70% precision + 30% diversity
    )
    
    # Specialized prompt for philatelic multi-query generation
    query_prompt = PromptTemplate(
        input_variables=["question"],
        template="""You are a specialized philatelic researcher expert in stamp catalogues and varieties.
Generate 3 strategically different versions of the question to capture comprehensive stamp information:

ORIGINAL: {question}

Create variations that target:
1. CATALOG PRECISION: Focus on exact catalog numbers, dates, and technical specifications
2. CONTEXTUAL SEARCH: Include related series, printings, varieties, and historical context  
3. TERMINOLOGY ALTERNATIVES: Use alternative philatelic terms and synonyms

Consider these philatelic elements:
- Catalog systems: Scott, Michel, Yvert, SG, local catalogs
- Technical terms: definitive/commemorative, variety/error, overprint/surcharge
- Time references: issue dates, printing dates, first day covers
- Denominations: face values, colors, perforations

Alternative searches:
1.
2. 
3."""
    )
    
    # MultiQueryRetriever with specialized prompt
    multi_retriever = MultiQueryRetriever.from_llm(
        retriever=base_retriever,
        llm=llm,
        prompt=query_prompt,
        parser_key="lines"
    )
    
    # Execute initial retrieval
    initial_results = multi_retriever.invoke(query)
       
    compression_llm = ChatOpenAI(
            model="gpt-5-nano", 
            api_key=OPENAI_API_KEY, 
            temperature=1,  # obligatorio para gpt-5-nano
            timeout=120.0,
            model_kwargs={
                "verbosity": "medium",
                "reasoning_effort" : "low"
            })
    
    # Simple batch compression using LangChain's native batch processing
    compressed_results = compress_documents_simple(initial_results, query, compression_llm)
    
    # Reorder by quality_score if it exists
    def get_quality_score(doc):
        return getattr(doc, 'metadata', {}).get('quality_score', 0.0)
    
    sorted_results = sorted(compressed_results, key=get_quality_score, reverse=True)
    return sorted_results

### Test the method search_stamps_with_compression

In [None]:
# # Test the optimized search_stamps_with_compression with batch processing
# print("🧪 Testing optimized batch compression...")

# # Test query focused on specific stamps
# test_query = "Costa Rica 1907 2 colones stamp with original gum Scott 68"

# print(f"🔍 Query: {test_query}")
# print("⏱️ Starting optimized search with batch compression...")

# import time
# start_time = time.time()

# try:
#     compressed_docs = search_stamps_with_compression(
#         query=test_query,
#         client=client, 
#         embeddings=embeddings, 
#         limit=30,
#         llm=llm,
#         alpha=0.30,  # 30% vectorial, 70% keywords for exact numbers
#         diversity_lambda=0.75  # 75% relevance, 25% diversity
#     )
    
#     end_time = time.time()
#     execution_time = end_time - start_time
    
#     print(f"✅ Batch compression completed in {execution_time:.2f} seconds")
#     print(f"📊 Retrieved and compressed {len(compressed_docs)} documents")
    
#     # Show sample results
#     for i, doc in enumerate(compressed_docs[:3], 1):
#         print(f"\\n📄 Document {i}:")
#         print(f"   Metadata: {getattr(doc, 'metadata', {})}")
#         content = getattr(doc, 'page_content', str(doc))
#         preview = content[:200] + "..." if len(content) > 200 else content
#         print(f"   Content: {preview}")
        
# except Exception as e:
#     print(f"❌ Error during batch compression test: {e}")
#     import traceback
#     traceback.print_exc()

## 5. Gradio Interface

In [None]:
def get_collection_info() -> str:
    """
    Get collection information to display in the interface.
    """
    if not client:
        return "❌ No Weaviate connection"
    
    try:
        stats = get_collection_stats(client, "Oxcart")
        if stats:
            info = f"📊 **Oxcart Collection Statistics:**\\n\\n"
            info += f"📦 **Total chunks:** {stats['total_chunks']:,}\\n"
            info += f"📄 **Documents:** {stats['total_documents']}\\n\\n"
            
            if stats.get('documents'):
                info += "**Indexed documents:**\\n"
                for doc_id, count in stats['documents'].items():
                    info += f"• {doc_id}: {count:,} chunks\\n"
            
            return info
        else:
            return "❌ Could not retrieve statistics"
    except Exception as e:
        return f"❌ Error: {e}"

print("✅ RAG functions defined")

In [None]:
stats = get_collection_stats(client, "Oxcart")
stats['total_documents']
stats['total_chunks']

In [None]:
# Estructura que usan tus funciones de búsqueda/respuesta
rag_system = {
    "success": True,
    "client": client,                    # para que search_and_answer pueda consultar
    "collection_name": COLLECTION_NAME,  # nombre de la colección
    "weaviate_url": WEAVIATE_URL,        # info para la UI
    "total_documents": stats['total_documents'],       # para mostrar estado
    "total_chunks": stats['total_chunks'],        # opcional en la UI
    "embeddings":embeddings,
    "llm":llm,
    # puedes añadir más campos que tu search_and_answer necesite
}

### Search Approaches

In [None]:
def search_and_answer_basic(
    query: str,
    rag_system: Dict[str, Any],
    year_start: Optional[int] = None,
    year_end: Optional[int] = None,
    scott_numbers: Optional[List[str]] = None,
    max_results: int = 10,
) -> Dict[str, Any]:
    """
    Basic hybrid search approach with improved philatelic filters.
    All filters are OPTIONAL - only applied when provided.
    """
    # Validation
    if not rag_system or not rag_system.get("client"):
        return {
            "answer": "❌ Error: No Weaviate connection",
            "results": [],
            "metadata": {"error": "No Weaviate connection"}
        }
    
    client_wv = rag_system["client"]
    collection_name = rag_system.get("collection_name", "Oxcart")
    
    # Build philatelic filters only if values are provided
    filters = {}
    
    # Year range filter - ONLY if both years are provided and valid
    if year_start is not None and year_end is not None:
        try:
            # Ensure both are integers and valid
            start = int(year_start)
            end = int(year_end)
            # Ensure start <= end
            if start > end:
                start, end = end, start
            filters["year_range"] = (start, end)
            print(f"[DEBUG] Year filter applied: {start}-{end}")
        except (ValueError, TypeError) as e:
            print(f"[WARNING] Invalid year values, skipping year filter: {e}")
    
    # Scott numbers ONLY if provided and not empty
    if scott_numbers:
        print("Scott Numbers: ",scott_numbers)
        filters["catalog_system"] = "Scott"
        filters["scott_numbers"] = scott_numbers        
    
    # Log final filter status
    if not filters:
        print("[DEBUG] No filters applied - searching all documents")
    else:
        print(f"[DEBUG] Filters being used: {filters}")
    
    try:
        start_time = time.time()
        
        # Basic semantic search with philatelic filters
        # Pass None if no filters, not empty dict
        results = search_chunks_semantic(
            client=client_wv,
            query=query,
            collection_name=collection_name,
            limit=int(max_results),
            filters=filters if filters else None,  # Pass None if no filters
            mode="hybrid",
            alpha=0.35
        )
        
        # Convert to LangChain document format for RAG
        docs_for_rag = []
        for r in results:
            doc = type('Document', (), {
                'page_content': r.get('text', ''),
                'metadata': {
                    'doc_id': r.get('doc_id', 'N/A'),
                    'page_number': r.get('page_number', 'N/A'),
                    'chunk_type': r.get('chunk_type', 'N/A'),
                    'score': r.get('score', 0.0),
                    'scott_numbers': r.get('scott_numbers', []),
                    'years': r.get('years', []),
                    'catalog_systems': r.get('catalog_systems', [])
                }
            })()
            docs_for_rag.append(doc)
        
        # Generate RAG response using LangChain
        rag_response = create_rag_response(docs_for_rag, query)
        execution_time = time.time() - start_time
        
        # Build metadata with actual filters used
        metadata = {
            "approach": "Basic Hybrid Search",
            "query": query,
            "total_results": len(results),
            "max_results": int(max_results),
            "filters_used": filters if filters else "None (searching all documents)",
            "generation_time": execution_time,
            "context_docs_count": rag_response.get("context_docs_count", len(docs_for_rag)),
            "context_length": sum(len(d.page_content) for d in docs_for_rag),
            "token_usage": rag_response.get("token_usage", {}),
            "cost_info": rag_response.get("cost_info", {}),
        }
        
        return {
            "answer": rag_response.get("response", "No response generated"),
            "results": results,
            "metadata": metadata
        }
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"[ERROR] Basic search error: {str(e)}")
        print(f"[ERROR] Full traceback: {error_details}")
        print(f"[ERROR] Filters attempted: {filters}")
        
        return {
            "answer": f"❌ Basic search error: {str(e)}",
            "results": [],
            "metadata": {
                "error": str(e),
                "generation_time": 0,
                "filters_attempted": filters if filters else "None"
            }
        }

In [None]:
def search_and_answer_advanced(
    query: str,
    rag_system: Dict[str, Any],
    max_results: int = 10,
) -> Dict[str, Any]:
    """
    Advanced compression search approach - filters NOT applied (as requested).
    """
    # Validation
    if not rag_system or not rag_system.get("client"):
        return {
            "answer": "❌ Error: No Weaviate connection",
            "results": [],
            "metadata": {"error": "No Weaviate connection"}
        }

    client_wv = rag_system["client"]
    embeddings = rag_system.get("embeddings")
    llm = rag_system.get("llm")
    
    # NOTE: Advanced search does not apply filters as requested by user
    # This approach uses ensemble retrieval and compression instead

    try:
        start_time = time.time()
        
        # Advanced search with compression (no filters applied)
        compressed_docs = search_stamps_with_compression(
            query=query,
            client=client_wv,
            embeddings=embeddings,
            llm=llm,
            limit=max_results,
            alpha=0.30,
            diversity_lambda=0.75
        )
        
        # Generate RAG response using LangChain
        rag_response = create_rag_response(compressed_docs, query)
        
        # Extraer y preservar figuras de los documentos originales
        figure_pattern = r'(!\[([^\]]*)\]\([^)]+\))'

        for doc in compressed_docs:
            # Buscar figuras en el contenido original si está disponible
            original_content = doc.metadata.get('text_original', doc.page_content)
            
            # Extraer todas las figuras del contenido original
            figures = re.findall(figure_pattern, original_content)
            
            # Eliminar duplicados manteniendo el orden
            seen_figures = set()
            unique_figures = []
            for fig in figures:
                # Usar el path de la imagen como identificador único (ignorando el alt text)
                img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                if img_path:
                    img_identifier = img_path.group(1)
                    if img_identifier not in seen_figures:
                        seen_figures.add(img_identifier)
                        unique_figures.append(fig)
            
            # Verificar qué figuras ya están en el contenido comprimido
            existing_figures = set()
            for fig in unique_figures:
                if fig[0] in doc.page_content:
                    img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                    if img_path:
                        existing_figures.add(img_path.group(1))
            
            # Agregar solo las figuras que faltan
            missing_figures = []
            for fig in unique_figures:
                img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                if img_path and img_path.group(1) not in existing_figures:
                    missing_figures.append(fig[0])
            
            # Si hay figuras faltantes, agregarlas al final
            if missing_figures:
                figures_text = "\n\n" + "\n".join(missing_figures)
                doc.page_content = doc.page_content + figures_text
            
            # Guardar las figuras únicas en metadata para acceso rápido
            doc.metadata['figures'] = [fig[0] for fig in unique_figures] if unique_figures else []
            doc.metadata['has_figures'] = len(unique_figures) > 0
        
        # Convert compressed docs to results format for display
        results = []
        for i, doc in enumerate(compressed_docs):
            result = {
                'doc_id': doc.metadata.get('doc_id', 'N/A'),
                'page_number': doc.metadata.get('page_number', 'N/A'),
                'chunk_type': doc.metadata.get('chunk_type', 'N/A'),
                'text': doc.page_content,
                'score': doc.metadata.get('quality_score', 0.0),
                'catalog_systems': doc.metadata.get('catalog_systems', []),
                'scott_numbers': doc.metadata.get('scott_numbers', []),
                'years': doc.metadata.get('years', []),
                'colors': doc.metadata.get('colors', []),
                'variety_classes': doc.metadata.get('variety_classes', []),
                'has_figures': doc.metadata.get('has_figures', False),  
                'figures': doc.metadata.get('figures', [])  
            }
            results.append(result)
        
        execution_time = time.time() - start_time
        
        metadata = {
            "approach": "Advanced Compression Search",
            "query": query,
            "total_results": len(results),
            "compressed_docs": len(compressed_docs),
            "filters_used": "No filters (advanced approach)",
            "generation_time": execution_time,
            "context_docs_count": rag_response["context_docs_count"],
            "docs_with_figures": sum(1 for r in results if r.get('has_figures', False)),
            "token_usage": rag_response.get("token_usage", {}),
            "cost_info": rag_response.get("cost_info", {}),
        }
        
        return {
            "answer": rag_response.get("response", "No response generated"),
            "results": results,
            "metadata": metadata
        }
        
    except Exception as e:
        return {
            "answer": f"❌ Advanced search error: {str(e)}",
            "results": [],
            "metadata": {
                "error": str(e),
                "generation_time": 0,
                "filters_attempted": "None"
            }
        }


In [None]:
# query = "Costa Rica 1907 2 colones stamp with original gum. Scott 68 issue of 1907"

# results = search_and_answer_basic(
#     query,
#     rag_system,
#     None,
#     None,
#     ["1","2"],
#     10,
# )

### Test Basic Approach

In [None]:
# # Enhanced test of search_chunks_semantic function
# def display_search_results(results, query, filters_used=None):
#     """
#     Enhanced display function for search results
#     """
#     print(f"🔍 SEARCH RESULTS")
#     print(f"=" * 60)
#     print(f"📝 Query: '{query}'")
#     if filters_used:
#         print(f"🔧 Filters applied: {filters_used}")
#     print(f"📊 Total results: {len(results)}")
#     print(f"=" * 60)
    
#     if not results:
#         print("❌ No results found")
#         return
    
#     for j, result in enumerate(results[:5], 1):  # Show top 5 results
#         print(f"\n🏷️ RESULT #{j} (Score: {result['score']:.4f})")
#         print(f"   📄 Document: {result['doc_id']}")
#         print(f"   📋 Chunk Type: {result['chunk_type']}")
#         print(f"   📄 Page: {result['page_number']}")
        
#         # Show metadata if available
#         metadata_items = [
#             ('📖 Catalog Systems', result.get('catalog_systems', [])),
#             ('🔢 Scott Numbers', result.get('scott_numbers', [])),
#             ('📅 Years', result.get('years', [])),
#             ('🎨 Colors', result.get('colors', [])),
#             ('🔀 Variety Classes', result.get('variety_classes', [])),
#         ]
        
#         for label, data in metadata_items:
#             if data:
#                 display_data = ', '.join(str(item) for item in data) if isinstance(data, list) else str(data)
#                 print(f"   {label}: {display_data}")
        
#         # Boolean flags
#         if result.get('has_varieties'):
#             print(f"   ✅ Has varieties")
#         if result.get('is_guanacaste'):
#             print(f"   🌎 Guanacaste province")
#         if result.get('has_technical_specs'):
#             print(f"   🔧 Has technical specs")
            
#         # Text preview
#         text = result.get('text', '')
#         preview = text[:300] + "..." if len(text) > 300 else text
#         print(f"   📝 Text preview: {preview}")
#         print(f"   {'─' * 50}")

# # Test 1: Basic search without filters (original test enhanced)
# print("🧪 TEST 1: Basic Hybrid Search (No Filters)")
# query = "Costa Rica 1907 2 colones stamp with original gum. Scott 68 issue of 1907"

# results = search_chunks_semantic(
#     client=client, 
#     query=query, 
#     collection_name="Oxcart", 
#     limit=20,
#     filters=[],  # No filters
#     mode="hybrid",
#     alpha=0.35
# )

# display_search_results(results, query)

# print(f"\n💡 This test shows unfiltered results. Now let's test with specific filters...")

In [None]:
# # Test: Combined Filters (Advanced Testing) - UPDATED with Multiple Scott Numbers
# print("🧪 TEST 6: Combined Filters (Advanced Testing)")
# print("=" * 80)

# # Test complex filter combinations for precise searches
# combined_tests = [
#     {
#         "name": "1907 stamps with varieties",
#         "query": "1907 Costa Rica stamps with varieties or errors",
#         "filters": {
#             "year_range": (1907, 1907)
#         }
#     },
#     {
#         "name": "1934 Costa Rica stamps",
#         "query": "List all 1934 Costa Rica Stamps",
#         "filters": {
#             "year_range": (1934, 1934)
#         }
#     },
#     {
#         "name": "Costa Rica First Issue Scott 1-5 (MULTIPLE SCOTT NUMBERS TEST)",
#         "query": "Costa Rica First Issue Scott 1 2 3 4 5",
#         "filters": {
#             "catalog_system": "Scott",
#             "scott_numbers": ["1", "2", "3", "4", "5"]  # TEST: Multiple Scott numbers as list
#         }
#     }
# ]

# for i, test in enumerate(combined_tests, 1):
#     print(f"\n🔬 COMBINED TEST {i}: {test['name']}")
#     print(f"{'─' * 60}")
#     print(f"🎯 Filters: {test['filters']}")
    
#     # Special logging for multiple Scott numbers test
#     if "scott_number" in test['filters'] and isinstance(test['filters']['scott_number'], list):
#         print(f"🔢 TESTING MULTIPLE SCOTT NUMBERS: {test['filters']['scott_number']}")
#         print(f"📝 Expected: Should find documents with ANY of these Scott numbers (OR logic)")
    
#     # Execute search with combined filters
#     results = search_chunks_semantic(
#         client=client,
#         query=test['query'],
#         collection_name="Oxcart",
#         limit=15,  # Increased limit for multiple Scott test
#         filters=test['filters'],
#         mode="hybrid",
#         alpha=0.35
#     )
    
#     display_search_results(results, test['query'], filters_used=test['filters'])
    
#     # Detailed validation of filter application
#     if results:
#         print(f"\n   🔍 FILTER VALIDATION:")
#         for filter_key, filter_value in test['filters'].items():
#             validation_count = 0
            
#             # Special handling for multiple Scott numbers
#             if filter_key == "scott_numbers" and isinstance(filter_value, list):
#                 print(f"      🔢 Checking for ANY Scott number from: {filter_value}")
#                 for result in results:
#                     result_scotts = result.get('scott_numbers', [])
#                     # Check if ANY of the requested Scott numbers is in the result
#                     if any(scott_num in result_scotts for scott_num in filter_value):
#                         validation_count += 1
#                 print(f"      ✅ Documents with ANY requested Scott number: {validation_count}/{len(results)}")
                
#                 # Show which specific Scott numbers were found
#                 found_scotts = set()
#                 for result in results:
#                     found_scotts.update(result.get('scott_numbers', []))
#                 matching_scotts = [s for s in filter_value if s in found_scotts]
#                 print(f"      📋 Requested Scott numbers found: {matching_scotts}")
#                 print(f"      📋 All Scott numbers in results: {sorted(found_scotts)}")
                
#             elif filter_key == "year_range":
#                 result_years = result.get('years', [])
#                 if any(filter_value[0] <= year <= filter_value[1] for year in result_years):
#                     validation_count += 1
#             elif filter_key == "catalog_system":
#                 if filter_value in result.get('catalog_systems', []):
#                     validation_count += 1
#             elif filter_key == "chunk_type":
#                 if result.get('chunk_type') == filter_value:
#                     validation_count += 1
#             elif filter_key in ["has_varieties", "is_guanacaste", "has_technical_specs"]:
#                 if result.get(filter_key) == filter_value:
#                     validation_count += 1
            
#             # Show validation for non-Scott filters
#             if filter_key != "scott_number":
#                 print(f"      ✅ {filter_key}: {validation_count}/{len(results)} results match")
    
#     print(f"\n{'═' * 80}")

### Gradio Interface

In [None]:
import time
import markdown
from typing import Dict, Any, List, Optional

def create_gradio_interface(rag_system: Dict[str, Any]) -> gr.Blocks:
    """
    Creates the Gradio interface with improved philatelic filters.
    Uses HTML for markdown content and Textbox for timing display.
    Includes token usage and cost metrics display.
    """
    
    # Función para convertir Markdown a HTML con corrección de rutas de imágenes
    
    def markdown_to_html(text):
        """Convert markdown text to HTML with lazy base64 loading"""
        if not text:
            return "<p><em>No content</em></p>"
        
        import re
        import os
        import base64
        
        base_path = r"C:\Users\VM-SERVER\Desktop\Oxcart RAG\results\markdown\figures"
        
        def image_to_base64_lazy(match):
            alt_text = match.group(1)
            filename = match.group(2).split('/')[-1].split('\\')[-1]
            full_path = os.path.join(base_path, filename)
            
            if os.path.exists(full_path):
                try:
                    with open(full_path, "rb") as img_file:
                        b64_string = base64.b64encode(img_file.read()).decode()
                        ext = filename.split('.')[-1].lower()
                        mime_type = f"image/{ext}" if ext != 'jpg' else "image/jpeg"
                        return f'<img style="max-width: 100%; height: auto; display: block; margin: 10px auto; border: 1px solid #ddd; border-radius: 4px;" alt="{alt_text}" src="data:{mime_type};base64,{b64_string}" />'
                except Exception as e:
                    print(f"Error loading image {filename}: {e}")
                    return f'<p>[Image not found: {filename}]</p>'
            else:
                return f'<p>[Image not found: {filename}]</p>'
        
        # Primero convertir markdown a HTML
        html = markdown.markdown(text, extensions=['tables', 'fenced_code'])
        
        # Luego reemplazar las imágenes en el HTML
        html = re.sub(
            r'<img[^>]*alt="([^"]*)"[^>]*src="[^"]*?([^/\\">]+\.(?:png|jpg|jpeg|gif))"[^>]*>',
            image_to_base64_lazy,
            html
        )
        
        return html

    def gradio_sequential_search(query, year_start, year_end, scott_numbers, max_results):
        """
        Sequential search with improved philatelic filters.
        Now with OPTIONAL filters - only used if provided by user.
        """
        if not rag_system:
            error_msg = "❌ RAG system not configured"
            error_html = markdown_to_html(error_msg)
            yield error_html, "", "", error_html, "", "", "No timing data available"
            return
            
        if not query or not query.strip():
            error_msg = "❌ Please enter a query"
            error_html = markdown_to_html(error_msg)
            yield error_html, "", "", error_html, "", "", "No timing data available"
            return

        try:
            start_total_time = time.time()
            
            # Process OPTIONAL filters - convert empty strings to None
            # Years: only use if both are provided and valid
            processed_year_start = None
            processed_year_end = None
            
            # Check if years are provided as strings and convert
            if year_start and year_end:
                try:
                    # Strip whitespace and check if not empty
                    year_start_str = str(year_start).strip()
                    year_end_str = str(year_end).strip()
                    
                    if year_start_str and year_end_str:
                        # Try to convert to integers
                        year_start_int = int(year_start_str)
                        year_end_int = int(year_end_str)
                        
                        # Validate year range
                        if 1800 <= year_start_int <= 2025 and 1800 <= year_end_int <= 2025:
                            processed_year_start = year_start_int
                            processed_year_end = year_end_int
                        else:
                            print(f"[WARNING] Years out of valid range (1800-2025): {year_start_int}-{year_end_int}")
                except (ValueError, TypeError) as e:
                    print(f"[WARNING] Could not parse years: {year_start}, {year_end} - {e}")
                    # Years are invalid, will proceed without year filter
            
            # Scott numbers: only use if provided and not empty
            processed_scott_numbers = None
            if scott_numbers and scott_numbers.strip():
                # Clean and split the scott numbers
                processed_scott_numbers = [s.strip() for s in scott_numbers.split(',') if s.strip()]
                # If list is empty after cleaning, set to None
                if not processed_scott_numbers:
                    processed_scott_numbers = None
            
            # Log what filters are being used
            filters_status = []
            if processed_year_start and processed_year_end:
                filters_status.append(f"Years: {processed_year_start}-{processed_year_end}")
            if processed_scott_numbers:
                filters_status.append(f"Scott: {', '.join(processed_scott_numbers)}")
            
            filter_msg = "Filters applied: " + (", ".join(filters_status) if filters_status else "None (searching all documents)")
            print(f"[DEBUG] {filter_msg}")
            
            # ============= STEP 1: Execute Basic Search =============
            # Mensajes de estado inicial
            loading_basic = markdown_to_html(f"*🔄 Running Basic Hybrid Search...*\n\n{filter_msg}")
            loading_advanced = markdown_to_html("*⏳ Waiting for Basic search to complete...*")
            
            yield (
                loading_basic, 
                "", 
                "", 
                loading_advanced, 
                "", 
                "", 
                f"⏱️ Basic search in progress...\n{filter_msg}"
            )
            
            # Call basic search function with optional filters
            basic_results_data = search_and_answer_basic(
                query=query,
                rag_system=rag_system,
                year_start=processed_year_start,  # Pass None if not provided
                year_end=processed_year_end,      # Pass None if not provided
                scott_numbers=processed_scott_numbers,  # Pass None if not provided
                max_results=int(max_results),
            )
            
            # Format Basic Results
            basic_answer = basic_results_data["answer"]  # Ya viene en Markdown
            basic_answer_html = markdown_to_html(basic_answer)
            
            basic_results = basic_results_data["results"]
            basic_metadata = basic_results_data["metadata"]
            basic_execution_time = basic_metadata.get("generation_time", 0)
            
            basic_search_output = format_search_results(basic_results, "Basic Hybrid Search")
            basic_search_html = markdown_to_html(basic_search_output)
            
            basic_metadata_output = format_metadata(basic_metadata, basic_execution_time)
            basic_metadata_html = markdown_to_html(basic_metadata_output)
            
            # Timing parcial
            timing_partial = f"""⏱️ EXECUTION TIMING (Partial)
            
Basic Hybrid Search: ✅ COMPLETED
• Time: {basic_execution_time:.2f}s
• Results: {len(basic_results)}
• {filter_msg}

Advanced Search: ⏳ STARTING...
"""
            
            loading_advanced_2 = markdown_to_html(f"*🔄 Starting Advanced Compression Search...*\n\n{filter_msg}")
            
            yield (
                basic_answer_html,
                basic_search_html,
                basic_metadata_html,
                loading_advanced_2,
                "",
                "",
                timing_partial
            )
            
            # ============= STEP 2: Execute Advanced Search =============
            advanced_results_data = search_and_answer_advanced(
                query=query,
                rag_system=rag_system,
                max_results=int(max_results),
            )
            
            # Format Advanced Results
            advanced_answer = advanced_results_data["answer"]  # Ya viene en Markdown
            advanced_answer_html = markdown_to_html(advanced_answer)
            
            advanced_results = advanced_results_data["results"]
            advanced_metadata = advanced_results_data["metadata"]
            advanced_execution_time = advanced_metadata.get("generation_time", 0)
            
            advanced_search_output = format_search_results(advanced_results, "Advanced Compression Search")
            advanced_search_html = markdown_to_html(advanced_search_output)
            
            advanced_metadata_output = format_metadata(advanced_metadata, advanced_execution_time)
            advanced_metadata_html = markdown_to_html(advanced_metadata_output)
            
            # Calculate total execution time
            total_execution_time = time.time() - start_total_time
            
            # Final timing information WITH METADATA for costs
            timing_final = format_timing_display(
                basic_execution_time,
                advanced_execution_time,
                total_execution_time,
                len(basic_results),
                len(advanced_results),
                filter_msg,
                basic_metadata,    # Pass the full metadata
                advanced_metadata  # Pass the full metadata
            )
            
            # Yield final complete results
            yield (
                basic_answer_html,
                basic_search_html,
                basic_metadata_html,
                advanced_answer_html,
                advanced_search_html,
                advanced_metadata_html,
                timing_final
            )
            
        except Exception as e:
            error_msg = f"❌ Error during search: {str(e)}"
            error_html = markdown_to_html(error_msg)
            yield error_html, "", "", error_html, "", "", f"❌ Error occurred - no timing data"

    def format_search_results(results, approach_name):
        """Format search results for display in Markdown with figure handling"""
        if not results:
            return f"*No results found with {approach_name}*"

        lines = []
        lines.append(f"### {approach_name} Results")
        lines.append(f"**Found {len(results)} documents**\n")
        lines.append("---")
        
        for i, r in enumerate(results):
            doc_id = r.get("doc_id", "N/A")
            chunk_type_val = r.get("chunk_type", "N/A")
            page_number = r.get("page_number", "N/A")
            score = r.get("score", 0.0)
            catalogs = r.get("catalog_systems", [])
            scotts = r.get("scott_numbers", [])
            years = r.get("years", [])

            # Get full text (including figures)
            text = r.get("text", "")
            
            # Check if text contains figures
            has_figures = "![Figure]" in text or "![" in text
            
            # Extract just the text preview (without figures)
            import re
            text_without_figures = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text).strip()
            preview = (text_without_figures[:300] + "...") if len(text_without_figures) > 300 else text_without_figures
            
            # Extract all figure references
            figure_pattern = r'(!\[([^\]]*)\]\([^)]+\))'
            figures = re.findall(figure_pattern, text)

            lines.append(f"\n#### 📄 Result {i+1}")
            lines.append(f"**Score:** `{score:.3f}`")
            
            if has_figures:
                lines.append("🖼️ **This result contains figures**\n")
            
            # Create a table for metadata
            lines.append("| Field | Value |")
            lines.append("|-------|-------|")
            lines.append(f"| Document | `{doc_id}` |")
            lines.append(f"| Type | {chunk_type_val} |")
            lines.append(f"| Page | {page_number} |")
            
            if catalogs:
                lines.append(f"| Catalogs | {', '.join(catalogs)} |")
            if scotts:
                lines.append(f"| Scott Numbers | **{', '.join(scotts)}** |")
            if years:
                lines.append(f"| Years | {', '.join(str(y) for y in years)} |")
            
            # Always show preview
            lines.append(f"\n**Preview:**")
            lines.append(f"> {preview}")
            
            # Always show figures if they exist
            if has_figures and figures:
                lines.append(f"\n**Figures in this result:**\n")
                for figure_match in figures:
                    lines.append(figure_match[0])  # Add the complete figure markdown
            
            lines.append("\n---")
        
        return "\n".join(lines)

    def format_metadata(metadata, execution_time):
        """Format metadata for display in Markdown including token usage and costs (robust casting)."""
        if not metadata:
            return "*No metadata available*"

        # Helpers
        def as_float(x, default=None):
            try:
                return float(x)
            except (TypeError, ValueError):
                return default

        def as_int(x, default=None):
            try:
                return int(float(x))
            except (TypeError, ValueError):
                return default

        out = []
        out.append("### Search Metadata\n")

        # Basic info
        out.append("#### 📊 Search Information")
        out.append("| Property | Value |")
        out.append("|----------|-------|")
        out.append(f"| **Approach** | {metadata.get('approach', 'Unknown')} |")
        query_val = str(metadata.get('query', 'N/A')).replace("\n", " ")
        # (opcional) escapar pipes para no romper la tabla
        query_val = query_val.replace("|", "\\|")
        out.append(f"| **Query** | `{query_val}` |")

        total_results = as_int(metadata.get('total_results'), 0)
        out.append(f"| **Results found** | {total_results} |")

        context_docs_count = as_int(metadata.get('context_docs_count'))
        out.append(f"| **Context docs** | {context_docs_count if context_docs_count is not None else 'N/A'} |")

        context_length = as_int(metadata.get('context_length'))
        out.append(
            f"| **Context length** | {context_length:,} chars |"
            if context_length is not None else
            "| **Context length** | N/A |"
        )

        if metadata.get('filters_used'):
            filters_str = str(metadata['filters_used']).replace('{', '').replace('}', '')
            filters_str = filters_str.replace("|", "\\|")
            out.append(f"| **Filters** | `{filters_str}` |")

        if 'compressed_docs' in metadata:
            out.append(f"| **Compressed docs** | {metadata['compressed_docs']} |")

        # Token usage
        token_usage = metadata.get('token_usage') or {}
        if token_usage:
            in_tok  = as_int(token_usage.get('input_tokens'), 0)
            out_tok = as_int(token_usage.get('output_tokens'), 0)
            tot_tok = as_int(token_usage.get('total_tokens'), (in_tok or 0) + (out_tok or 0))

            out.append("\n#### 🎯 Token Usage")
            out.append("| Token Type | Count |")
            out.append("|------------|-------|")
            out.append(f"| **Input tokens** | {in_tok:,} |")
            out.append(f"| **Output tokens** | {out_tok:,} |")
            out.append(f"| **Total tokens** | {tot_tok:,} |")

        # Cost info
        cost_info = metadata.get('cost_info') or {}
        if cost_info:
            in_cost  = as_float(cost_info.get('input_cost'), 0.0)
            out_cost = as_float(cost_info.get('output_cost'), 0.0)
            est_cost = as_float(cost_info.get('estimated_cost_usd'), (in_cost or 0.0) + (out_cost or 0.0))

            out.append("\n#### 💰 Cost Analysis")
            out.append("| Cost Component | USD |")
            out.append("|----------------|-----|")
            out.append(f"| **Input cost** | ${in_cost:.6f} |")
            out.append(f"| **Output cost** | ${out_cost:.6f} |")
            out.append(f"| **Total cost** | **${est_cost:.6f}** |")

        # Performance
        out.append("\n#### ⏱️ Performance")
        out.append("| Metric | Time |")
        out.append("|--------|------|")

        gen_time = as_float(metadata.get('generation_time'))
        exec_time = as_float(execution_time)

        out.append(
            f"| **Generation time** | {gen_time:.2f} seconds |"
            if gen_time is not None else
            "| **Generation time** | N/A |"
        )
        out.append(
            f"| **Total execution** | {exec_time:.2f} seconds |"
            if exec_time is not None else
            "| **Total execution** | N/A |"
        )

        if token_usage and gen_time and gen_time > 0:
            # usa out_tok ya casteado arriba; si no hay token_usage, se salta este bloque
            tps = (as_int(token_usage.get('output_tokens'), 0) or 0) / gen_time
            out.append(f"| **Generation speed** | {tps:.1f} tokens/sec |")

        if metadata.get('error'):
            err = str(metadata['error']).replace("`", "'")
            out.append(f"\n⚠️ **Error:** `{err}`")

        return "\n".join(out)


    def format_timing_display(
        basic_time, advanced_time, total_time,
        basic_results, advanced_results,
        filter_msg="",
        basic_metadata=None, advanced_metadata=None
    ):
        """Enhanced timing display with cost comparison (robust casting)"""
        try:
            # --- helpers seguros ---
            def as_float(x, default=0.0):
                try:
                    return float(x)
                except (TypeError, ValueError):
                    return default

            def as_int(x, default=0):
                try:
                    # evita ints tipo '1_234' si viniera así
                    return int(float(x))
                except (TypeError, ValueError):
                    return default

            # tiempos
            basic_time = as_float(basic_time)
            advanced_time = as_float(advanced_time)
            total_time = as_float(total_time)

            # costos
            basic_cost = 0.0
            advanced_cost = 0.0
            if basic_metadata and 'cost_info' in basic_metadata:
                basic_cost = as_float(basic_metadata['cost_info'].get('estimated_cost_usd', 0))
            if advanced_metadata and 'cost_info' in advanced_metadata:
                advanced_cost = as_float(advanced_metadata['cost_info'].get('estimated_cost_usd', 0))
            total_cost = basic_cost + advanced_cost

            # tokens (¡forzar int!)
            basic_tokens = 0
            advanced_tokens = 0
            if basic_metadata and 'token_usage' in basic_metadata:
                basic_tokens = as_int(basic_metadata['token_usage'].get('total_tokens', 0))
            if advanced_metadata and 'token_usage' in advanced_metadata:
                advanced_tokens = as_int(advanced_metadata['token_usage'].get('total_tokens', 0))
            total_tokens = basic_tokens + advanced_tokens

            # faster
            if basic_time > 0 and advanced_time > 0:
                if basic_time < advanced_time:
                    faster = f"🏆 Basic search was {advanced_time/basic_time:.1f}x faster"
                elif advanced_time < basic_time:
                    faster = f"🏆 Advanced search was {basic_time/advanced_time:.1f}x faster"
                else:
                    faster = "⚡ Both approaches took similar time"
            else:
                faster = "⏱️ Timing comparison not available"

            # cost effectiveness
            cost_comparison = ""
            if basic_cost > 0 and advanced_cost > 0:
                if basic_cost < advanced_cost:
                    cost_comparison = f"💵 Basic search was ${advanced_cost - basic_cost:.6f} cheaper"
                elif advanced_cost < basic_cost:
                    cost_comparison = f"💵 Advanced search was ${basic_cost - advanced_cost:.6f} cheaper"
                else:
                    cost_comparison = "💵 Both approaches had similar costs"

            # speeds
            basic_speed = f"{basic_results/basic_time:.1f}" if basic_time > 0 else "N/A"
            advanced_speed = f"{advanced_results/advanced_time:.1f}" if advanced_time > 0 else "N/A"

            timing_display = f"""⏱️ EXECUTION TIMING & COST COMPARISON
                ================================================

                📋 SEARCH CONFIGURATION
                • {filter_msg}

                🔍 BASIC HYBRID SEARCH
                • Processing Time: {basic_time:.2f} seconds
                • Documents Found: {basic_results}
                • Speed: {basic_speed} docs/sec
                • Tokens Used: {basic_tokens:,}
                • Cost: ${basic_cost:.6f}
                • Status: ✅ Complete

                🚀 ADVANCED COMPRESSION SEARCH
                • Processing Time: {advanced_time:.2f} seconds
                • Documents Found: {advanced_results}
                • Speed: {advanced_speed} docs/sec
                • Tokens Used: {advanced_tokens:,}
                • Cost: ${advanced_cost:.6f}
                • Status: ✅ Complete

                📊 OVERALL PERFORMANCE
                • Total Execution: {total_time:.2f} seconds
                • Total Tokens: {total_tokens:,}
                • Total Cost: ${total_cost:.6f}
                • Execution Mode: Sequential (Basic → Advanced)
                • {faster}
                • {cost_comparison}

                💡 PERFORMANCE NOTES:
                • Basic search: Fast initial results, lower cost
                • Advanced search: Enhanced quality, higher token usage
                • Costs shown are for GPT-5-Nano model
                • Sequential execution allows progressive viewing
                • Filters are optional and only applied when provided"""
            return timing_display

        except Exception as e:
            return f"❌ Error formatting timing data: {e}"


    # Set example query functions
    def set_example_1():
        return "Tell me all about the costa rica 1907 inverted centers?"
    
    def set_example_2():
        return "Show me Costa Rica overprinted stamps with varieties or errors"
    
    def set_example_3():
        return "1934 airmail definitive issue with catalog values C15-27"
    
    def set_example_4():
        return "Tell me all about the first issue crack plate?"
    
    def set_example_5():
        return "Costa Rica stamps with perforation errors or printing varieties"
    
    def set_example_6():
        return "Research about all the mirror impression stamps of Costa Rica"

    # System information
    collection_name = rag_system.get("collection_name", "Oxcart")
    total_docs = rag_system.get("total_documents", 0)
    total_chunks = rag_system.get("total_chunks", 0)

    # --- UI with improved philatelic filters ---
    with gr.Blocks(
        title="OXCART RAG - Costa Rica Philatelic System",
        css="""
        .markdown-text {
            font-family: 'Inter', sans-serif;
        }
        table {
            border-collapse: collapse;
            width: 100%;
        }
        th, td {
            border: 1px solid #ddd;
            padding: 8px;
            text-align: left;
        }
        """
    ) as interface:
        gr.Markdown(
            "# 🇨🇷 OXCART RAG - Costa Rica Philatelic System\n\n"
            "Advanced search for Costa Rican stamps and postal history with sequential dual AI approaches."
        )

        with gr.Row():
            with gr.Column(scale=3):
                query_input = gr.Textbox(
                    label="Your Costa Rica philatelic query",
                    placeholder="e.g., What Costa Rica stamps from 1907 have Scott number 68?",
                    lines=2,
                )

                search_btn = gr.Button("🔍 Search with Both Approaches (Sequential)", variant="primary")

                gr.Markdown("**Example Queries:**")
                
                with gr.Row():
                    btn1 = gr.Button("📮 1907 inverted centers", variant="secondary")
                    btn2 = gr.Button("📮 Overprinted varieties", variant="secondary")
                    btn3 = gr.Button("📮 1934 airmail stamps", variant="secondary")
                
                with gr.Row():
                    btn4 = gr.Button("📮 First issue crack plate", variant="secondary")
                    btn5 = gr.Button("📮 Perforation errors", variant="secondary")
                    btn6 = gr.Button("📮 Mirror impression stamps", variant="secondary")

            with gr.Column(scale=1):
                gr.Markdown("**🔧 Optional Philatelic Filters**")
                gr.Markdown("*Leave empty to search all documents*")

                year_start = gr.Textbox(
                    label="Start Year (optional)", 
                    value="", 
                    placeholder="e.g., 1907",
                    info="Leave empty for no year filter"
                )
                year_end = gr.Textbox(
                    label="End Year (optional)", 
                    value="", 
                    placeholder="e.g., 1910",
                    info="Leave empty for no year filter"
                )
                
                gr.Markdown("*Note: Both years must be provided to apply year filter*")
                
                scott_numbers = gr.Textbox(
                    label="Scott Numbers (optional)", 
                    placeholder="e.g., 1,2,3,4,5 or 68,C15",
                    value="",  # Empty string by default
                    info="Comma-separated catalog numbers (leave empty for no filter)"
                )
                max_results = gr.Slider(
                    minimum=20,
                    maximum=100,
                    value=30,
                    step=10,
                    label="Maximum results per approach",
                )

        # Timing display as Textbox for better updates
        with gr.Row():
            with gr.Column():
                timing_display = gr.Textbox(
                    label="⏱️ Performance Timing & Cost Comparison",
                    lines=22,
                    interactive=False,
                    value="Run a search to see detailed timing and cost comparison between approaches\n\nFilters are optional - leave them empty to search all documents",
                    elem_id="timing-display"
                )

        # Tabbed output with HTML components
        with gr.Tabs():
            with gr.TabItem("🔍 Basic Hybrid Search"):
                gr.Markdown("**Combines vector similarity with keyword matching (35% vector + 65% keyword)**")
                
                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## AI Response - Basic Approach")
                        basic_answer_output = gr.HTML(
                            value="<p><em>Waiting for search...</em></p>",
                            elem_id="basic_answer"
                        )

                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## Documents Found - Basic Search")
                        basic_search_output = gr.HTML(
                            value="<p><em>No results yet</em></p>",
                            elem_id="basic_search"
                        )

                    with gr.Column():
                        gr.Markdown("## Metadata - Basic Search")
                        basic_metadata_output = gr.HTML(
                            value="<p><em>No metadata yet</em></p>",
                            elem_id="basic_metadata"
                        )

            with gr.TabItem("🚀 Advanced Compression Search"):
                gr.Markdown("**Multi-query ensemble retrieval with AI-powered document compression**")
                
                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## AI Response - Advanced Approach")
                        advanced_answer_output = gr.HTML(
                            value="<p><em>Waiting for search...</em></p>",
                            elem_id="advanced_answer"
                        )

                with gr.Row():
                    with gr.Column():
                        gr.Markdown("## Documents Found - Advanced Search")
                        advanced_search_output = gr.HTML(
                            value="<p><em>No results yet</em></p>",
                            elem_id="advanced_search"
                        )

                    with gr.Column():
                        gr.Markdown("## Metadata - Advanced Search")
                        advanced_metadata_output = gr.HTML(
                            value="<p><em>No metadata yet</em></p>",
                            elem_id="advanced_metadata"
                        )

        # Wire up events with new parameters
        search_btn.click(
            fn=gradio_sequential_search,
            inputs=[query_input, year_start, year_end, scott_numbers, max_results],
            outputs=[
                basic_answer_output, basic_search_output, basic_metadata_output,
                advanced_answer_output, advanced_search_output, advanced_metadata_output,
                timing_display
            ],
        )

        query_input.submit(
            fn=gradio_sequential_search,
            inputs=[query_input, year_start, year_end, scott_numbers, max_results],
            outputs=[
                basic_answer_output, basic_search_output, basic_metadata_output,
                advanced_answer_output, advanced_search_output, advanced_metadata_output,
                timing_display
            ],
        )

        # Example buttons
        btn1.click(fn=set_example_1, outputs=[query_input])
        btn2.click(fn=set_example_2, outputs=[query_input])
        btn3.click(fn=set_example_3, outputs=[query_input])
        btn4.click(fn=set_example_4, outputs=[query_input])
        btn5.click(fn=set_example_5, outputs=[query_input])
        btn6.click(fn=set_example_6, outputs=[query_input])

        # System information with cost details
        gr.Markdown(
            "---\n"
            f"**System Status:**\n"
            f"• Collection: {collection_name}\n"
            f"• Documents indexed: {total_docs:,}\n"
            f"• Total chunks: {total_chunks:,}\n"
            f"• Status: ✅ Operational\n\n"
            f"**Filter Options:**\n"
            f"• **All filters are OPTIONAL** - leave empty to search entire collection\n"
            f"• **Year filter**: Requires BOTH start and end year to activate\n"
            f"• **Scott numbers**: Can specify one or multiple catalog numbers\n"
            f"• **No filters**: Searches across all documents (recommended for general queries)\n\n"
            f"**Cost Tracking:**\n"
            f"• Model: GPT-5-Nano\n"
            f"• Input: $0.05 per 1M tokens\n"
            f"• Output: $0.40 per 1M tokens\n"
            f"• Cost breakdown shown in metadata\n\n"
            f"**Execution Mode:**\n"
            f"• Sequential execution: Basic search completes first, then Advanced\n"
            f"• Results display progressively as each search completes\n"
            f"• Token usage and costs tracked for each approach\n\n"
            f"**Search Approaches:**\n"
            f"• **Basic Search**: Hybrid semantic search optimized for exact catalog numbers\n"
            f"• **Advanced Search**: Multi-query ensemble with AI compression for complex queries"
        )

    return interface

In [None]:
# ---- Enhanced launcher ----
if rag_system and rag_system.get("success", False):
    print("\\n" + "=" * 60)
    print("🚀 LAUNCHING COSTA RICA PHILATELIC RAG INTERFACE")
    print("=" * 60)

    gradio_app = create_gradio_interface(rag_system)

    GRADIO_PORT = int(os.getenv("GRADIO_PORT", 7860))
    GRADIO_SHARE = os.getenv("GRADIO_SHARE", "false").lower() == "true"

    print(f"⚙️ Port: {GRADIO_PORT}")
    print(f"🌍 Public URL: {'⚠️ Attempting...' if GRADIO_SHARE else '❌ Disabled (more secure)'}")
    
    try:
        print("🔄 Starting Gradio server...")
        
        if GRADIO_SHARE:
            print("⏳ Attempting to create public tunnel...")
            try:
                demo = gradio_app.launch(
                    server_port=GRADIO_PORT,
                    share=True,
                    inbrowser=False,
                    show_error=True,
                    prevent_thread_lock=False,
                    quiet=False,
                )
                
                print("\\n🎉 SUCCESS! Public tunnel created")
                print(f"🌐 AVAILABLE URLS:")
                print(f"   📱 Local: http://localhost:{GRADIO_PORT}")
                
                if hasattr(demo, 'share_url') and demo.share_url:
                    print(f"   🌍 Public: {demo.share_url}")
                    print(f"\\n🔗 **PUBLIC URL:** {demo.share_url}")
                else:
                    print(f"   🌍 Public: Check Gradio output above ☝️")
                
            except Exception as share_error:
                print(f"⚠️ Error creating public tunnel: {share_error}")
                print("🔄 Switching to local mode only...")
                
                demo = gradio_app.launch(
                    server_port=GRADIO_PORT,
                    share=False,
                    inbrowser=True,
                    show_error=True,
                    prevent_thread_lock=False
                )
                
                print(f"\\n✅ LOCAL SERVER OPERATIONAL:")
                print(f"   📱 Local URL: http://localhost:{GRADIO_PORT}")
                print(f"   ⚠️ Public URL: Not available (tunnel error)")
                
        else:
            demo = gradio_app.launch(
                server_port=GRADIO_PORT,
                share=False,
                inbrowser=True,
                show_error=True,
                prevent_thread_lock=False
            )
            
            print(f"\\n✅ LOCAL SERVER OPERATIONAL:")
            print(f"   📱 Local URL: http://localhost:{GRADIO_PORT}")
            print(f"   💡 For public URL, set GRADIO_SHARE=true in .env")
        
        print(f"\\n📋 COSTA RICA PHILATELIC FEATURES:")
        print(f"   • Specialized Costa Rica stamp queries")
        print(f"   • Scott catalog number search")
        print(f"   • Variety and error detection")
        print(f"   • Dual search approaches for comprehensive results")
        print(f"   • Performance timing comparison")
        print(f"   • To stop: gr.close_all()")
        
        print(f"\\n{'='*60}")
        print(f"🇨🇷 COSTA RICA PHILATELIC RAG INTERFACE READY!")
        print(f"{'='*60}")
        
    except Exception as e:
        print(f"❌ Critical error launching Gradio: {e}")
        print("\\n🔧 SUGGESTED SOLUTIONS:")
        print("   1. Run: gr.close_all()")
        print("   2. Change port: GRADIO_PORT=7861 in .env")
        print("   3. Verify no other services on the port")
        print("   4. Restart the notebook")
        
else:
    print("\\n⚠️  Cannot create Gradio interface:")
    if not rag_system:
        print("   • RAG system not configured")
    else:
        print(f"   • RAG error: {rag_system.get('error', 'Unknown error')}")
    print("\\n🔧 To resolve:")
    print("   1. Verify Weaviate is running")
    print("   2. Configure OPENAI_API_KEY in .env") 
    print("   3. Run document indexing")
    print("   4. Restart this notebook")

In [None]:
#gr.close_all()