# Philatelic Gradio App with Weaviate
Interactive web interface for CR Philately

## 1. Setup & Imports
Load all the modules and functions

In [1]:
import os
import json
import glob
import time
from pathlib import Path
import re, time, math, hashlib
from typing import Dict, Any, List, Optional, Tuple
from collections import defaultdict
from langchain.callbacks import get_openai_callback
from datetime import datetime
import weaviate
from weaviate.classes import query as wv_query
from weaviate.classes.query import Filter as WvFilter
import gradio as gr
import re

import time

import markdown

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Third-party imports
import pandas as pd


from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_weaviate import WeaviateVectorStore
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage
from langchain.retrievers import MultiQueryRetriever, EnsembleRetriever, ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


from philatelic_weaviate import *

from philatelic_chunk_schema import *

print("✅ Basic imports completed")



Philatelic Weaviate Integration v2.1 cargado exitosamente
Funciones disponibles:
   - create_weaviate_client()
   - create_oxcart_collection()
   - index_philatelic_document()
   - search_chunks_semantic()
   - get_collection_stats()
✅ Basic imports completed


In [2]:
# Verify environment variables
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
WEAVIATE_URL = os.getenv('WEAVIATE_URL', 'http://localhost:8083')
PHILATELIC_JSONS_DIR = os.getenv('PHILATELIC_JSONS_DIR', './results/final_jsons')
COLLECTION_NAME = os.getenv('WEAVIATE_COLLECTION_NAME', 'Oxcart')

print(f"🔧 Configuration:")
print(f"   • Weaviate URL: {WEAVIATE_URL}")
print(f"   • JSONs Directory: {PHILATELIC_JSONS_DIR}")
print(f"   • Collection Name: {COLLECTION_NAME}")
print(f"   • OpenAI API Key: {'✅ Configured' if OPENAI_API_KEY else '❌ Missing configuration'}")

if not OPENAI_API_KEY:
    print("\\n⚠️  IMPORTANT: Configure your OPENAI_API_KEY in the .env file")
    print("   Copy .env.example to .env and add your API key")

# Verify that the JSONs directory exists
if not os.path.exists(PHILATELIC_JSONS_DIR):
    print(f"\\n⚠️  Directory {PHILATELIC_JSONS_DIR} not found")
    print("   Make sure you have processed documents with the Dolphin parser")
else:
    json_files = glob.glob(os.path.join(PHILATELIC_JSONS_DIR, '*_final.json'))
    print(f"\\n📁 Found {len(json_files)} philatelic JSON files")
    if json_files:
        print("   Examples:")
        for file in json_files[:3]:
            print(f"   • {os.path.basename(file)}")
        if len(json_files) > 3:
            print(f"   • ... and {len(json_files) - 3} more")

🔧 Configuration:
   • Weaviate URL: http://localhost:8083
   • JSONs Directory: ./results/final_jsons
   • Collection Name: Oxcart
   • OpenAI API Key: ✅ Configured
\n📁 Found 1424 philatelic JSON files
   Examples:
   • 1901 National Theater of Costa Rica Yankowski_final.json
   • 1947_Overprint_final.json
   • 1967 Postal and social Plan_Luis Escalante_final.json
   • ... and 1421 more


In [3]:
llm = ChatOpenAI(
            model="gpt-5-mini", 
            api_key=OPENAI_API_KEY, 
            temperature=1,  # 1 obligatorio para gpt-5-mini
            timeout=120.0,
            max_completion_tokens=2500,
            model_kwargs={
                "verbosity": "low",
                "reasoning_effort" : "low"
            }
            )
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=OPENAI_API_KEY)

  if await self.run_code(code, result, async_=asy):


In [4]:
# ========================================================================================
# 📝 RAG PROMPT TEMPLATE - Professional Philatelic Consultation
# ========================================================================================

philatelic_rag_template = """You are a distinguished philatelic researcher specializing in Costa Rican stamps and postal history. Provide authoritative, well-structured analysis based exclusively on the provided source materials.

## 📚 SOURCE MATERIALS
{context}

## ❓ RESEARCH QUERY  
{question}

## 📋 RESPONSE FRAMEWORK

### 🎯 **READABILITY REQUIREMENTS**

**LENGTH CONTROL:**
• Target 800-1200 words maximum for comprehensive topics
• Use 400-600 words for straightforward queries
• Eliminate redundant explanations between sections
• Prioritize the most important information that directly answers the query

**CONCISE WRITING STYLE:**
• Use clear, direct sentences (15-20 words maximum per sentence)
• Avoid unnecessary qualifying phrases ("it should be noted that", "it is important to mention")
• Lead with conclusions, then provide supporting details
• Use active voice: "Scott C216 exists in two varieties" not "Two varieties of Scott C216 can be found"

**INFORMATION HIERARCHY:**
• Start with the most direct answer to the query
• Group related information to avoid scattering details
• Use parallel structure in lists and sections
• Eliminate overlapping content between sections

**FORMATTING & STRUCTURE:**
- Use clear hierarchical organization with descriptive headers using markdown
- Group related information under logical categories using ## and **bold subheadings**
- Use bullet points (•) for individual facts and varieties
- Include relevant emojis for major sections (🔍 📮 📚 🎯) to enhance readability
- Bold key terms, catalog numbers, and important details
- Your output is in markdown format

### 📝 **STRUCTURAL ORGANIZATION**

**1. EXECUTIVE SUMMARY (Required)**
• 2-3 sentences maximum
• Direct answer to the query
• Key finding or conclusion upfront

**2. CORE ANALYSIS SECTIONS**
Use these section headers as appropriate (not all required):
• **Catalog Details** - for Scott/Yvert numbers, varieties, specifications
• **Historical Context** - for background and significance  
• **Technical Specifications** - for printing, perforations, paper types
• **Collecting Notes** - for rarity, valuation, market insights
• **Related Issues** - for connections to other stamps or series

**3. KEY FINDINGS (Required)**
• Bullet points summarizing main discoveries
• Include specific catalog numbers and varieties
• Note any gaps in available information

### 🔗 **ENHANCED CITATION FORMAT**
• Every factual claim: (Source_Name, page XX)
• Multiple sources: (Source_A, page XX; Source_B, page YY)
• Direct quotes: "quoted text" (Source_Name, page XX)
• Uncertain information: "According to [Source], this appears to be..." (Source_Name, page XX)

### ⚡ **WRITING EFFICIENCY RULES**

**ELIMINATE THESE PATTERNS:**
• "It is worth noting that..."
• "It should be mentioned that..."
• "Furthermore, it is important to understand..."
• "In addition to the above information..."
• Repetitive introductory phrases

**USE THESE PATTERNS:**
• "Scott C216 exists in two varieties:" (direct statement)
• "The 1963 issue includes:" (immediate specifics)
• "Collectors should note:" (actionable information)
• "Sources differ on:" (clear conflict acknowledgment)

**PARAGRAPH STRUCTURE:**
• Topic sentence with main point
• 1-2 supporting sentences with specifics
• Citation at end of factual claims
• Maximum 4 sentences per paragraph

### 📊 **SECTION LENGTH GUIDELINES**
• Executive Summary: 50-75 words
• Each analysis section: 150-250 words maximum
• Key Findings: 75-100 words
• Total response: Stay within word count targets above

### 🎯 **TECHNICAL PRECISION**
• Lead with catalog numbers: "Scott C216 (10¢ green)" not "The 10¢ green stamp, cataloged as Scott C216"
• Use parallel structure: "Scott C216a (perf 12), Scott C216b (perf 11½)" 
• Group similar information: List all varieties together, not scattered
• Specific dates: "Issued March 15, 1963" not "Issued in early 1963"

### ✅ **QUALITY CONTROL CHECKLIST**
Before finalizing response, verify:
- [ ] Executive summary answers the query directly
- [ ] No redundant information between sections
- [ ] All sections stay within length limits
- [ ] Citations are complete and properly formatted
- [ ] Technical details are grouped logically
- [ ] Conclusion reinforces key findings without repetition

## 🚫 **CRITICAL RESTRICTIONS**
• **NO speculation** beyond source materials
• **NO redundant explanations** - if mentioned once, don't repeat
• **NO excessive hedging** - state facts confidently when sources support them
• **NO filler phrases** - every sentence must add value
• **NO circular explanations** - don't explain the same point multiple ways

---

**RESPONSE:**"""

# Create the prompt template
rag_prompt = PromptTemplate(
    template=philatelic_rag_template,
    input_variables=["context", "question"]
)

In [5]:
# ========================================================================================
# 📄 OPTIMIZED DOCUMENT FORMATTING - For Academic Citation Style
# ========================================================================================

def format_docs_for_rag(docs_results: List[Dict]) -> str:
    """Efficient document formatting optimized for academic citation style (Document Name, p. Page)"""
    
    if not docs_results:
        return "\nNo source documents available."
    
    # Group and sort documents by authority
    #doc_groups = {'catalog': [], 'literature': [], 'collection': [], 'reference': []}
    docs = []
    
    for i, doc in enumerate(docs_results, 1):
        #category, reliability = classify_document_authority(doc.metadata.get('doc_id', 'Unknown'))
        
        doc_info = {
            'doc_num': i,
            'doc_id': doc.metadata.get('doc_id', 'Unknown'),
            'page': doc.metadata.get('page_number', 'N/A'),
            'content': doc.page_content,
        }
        #doc_groups[category].append(doc_info)
        docs.append(doc_info)
    return docs

def create_rag_response(retriever_results: List[Dict], query: str) -> Dict:
    """Streamlined RAG chain execution with academic citation style and token tracking"""
    
    if not retriever_results:
        return {
            "response": "No documents found for this query.", 
            "generation_time": 0,
            "context_docs_count": 0,
            "context_length": 0,
            "token_usage": {
                "input_tokens": 0,
                "output_tokens": 0,
                "total_tokens": 0
            },
            "cost_info": {
                "estimated_cost_usd": 0,
                "input_cost": 0,
                "output_cost": 0
            }
        }
    
    # Format context efficiently for academic citations
    context = format_docs_for_rag(retriever_results)
    
    # Execute RAG chain with OpenAI callback for token tracking
    rag_chain = (
        {"context": lambda x: context, "question": RunnablePassthrough()}
        | rag_prompt | llm | StrOutputParser()
    )
    
    start_time = time.time()
    
    # Use OpenAI callback to track token usage
    with get_openai_callback() as cb:
        response = rag_chain.invoke(query)
        # Get token counts from callback
        input_tokens = cb.prompt_tokens
        output_tokens = cb.completion_tokens
        total_tokens = cb.total_tokens
        
        # OpenAI callback provides cost directly, but we'll calculate our own
        # based on GPT-5-mini pricing
    
    generation_time = round(time.time() - start_time, 2)
    
    # Calculate costs for GPT-5-mini
    # $0.05 per 1M input tokens, $0.40 per 1M output tokens
    cost_per_1m_input = 0.250
    cost_per_1m_output = 2.0
    
    # Calculate costs for GPT-4.1-nano
    # $0.10 per 1M input tokens, $0.40 per 1M output tokens
    # cost_per_1m_input = 0.10
    # cost_per_1m_output = 0.40
    
    # Convert to cost per token
    cost_per_input_token = cost_per_1m_input / 1_000_000
    cost_per_output_token = cost_per_1m_output / 1_000_000
    
    input_cost = input_tokens * cost_per_input_token
    output_cost = output_tokens * cost_per_output_token
    estimated_cost = input_cost + output_cost
    
    return {
        "response": response,
        "generation_time": generation_time,
        "context_docs_count": len(retriever_results),
        "context_length": len(context),        
        "token_usage": {
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "total_tokens": total_tokens
        },
        "cost_info": {
            "estimated_cost_usd": round(estimated_cost, 6),
            "input_cost": round(input_cost, 6),
            "output_cost": round(output_cost, 6)
        }
    }

## 2. Weaviate Client

In [6]:
# Connect to Weaviate
print("🔌 Connecting to Weaviate...")

try:
    client = create_weaviate_client(WEAVIATE_URL, OPENAI_API_KEY)
    print("✅ Connection successful")
    
    # Verify that Weaviate is working
    meta = client.get_meta()
    print(f"📊 Weaviate version: {meta.get('version', 'unknown')}")
    
    # Verify if collection exists
    try:
        collections = client.collections.list_all()
        collection_names = [col.name for col in collections]
        
        if COLLECTION_NAME in collection_names:
            collection = client.collections.get(COLLECTION_NAME)
            total_objects = collection.aggregate.over_all(total_count=True).total_count
            print(f"📊 Collection '{COLLECTION_NAME}' exists with {total_objects} documents")
        else:
            print(f"📝 Collection '{COLLECTION_NAME}' does not exist (will be created during indexing)")
    except Exception as e:
        print(f"⚠️ Could not verify collections: {e}")
        
except Exception as e:
    print(f"❌ Error connecting to Weaviate: {e}")
    print("💡 Make sure Weaviate is running:")
    print("   docker-compose up -d")
    client = None

🔌 Connecting to Weaviate...
Conectado a Weaviate en http://localhost:8083
✅ Connection successful
📊 Weaviate version: 1.32.4
⚠️ Could not verify collections: 'str' object has no attribute 'name'


## 3. Weaviate Search Tests

Test the function search_chunks_semantic


In [7]:
results = search_chunks_semantic(
                client, 
                "Costa Rica 1907 2 colones stamp with original gum. Scott 68 issue of 1907", 
                "Oxcart", 
                limit=50,
                filters=[],
                mode = "hybrid",
                alpha= 0.35
                
            )
            
print(f"   📊 Resultados: {len(results)}")

for j, result in enumerate(results, 1):
    print(f"\n      🏷️ #{j} (Score: {result['score']:.3f})")
    print(f"         📄 Documento: {result['doc_id']}")
    print(f"         📋 Tipo: {result['chunk_type']}")
    print(f"         📄 Página: {result['page_number']}")
    
    # Mostrar metadatos relevantes
    if result.get('catalog_systems'):
        print(f"         📖 Catálogos: {result['catalog_systems']}")
    if result.get('scott_numbers'):
        print(f"         🔢 Scott: {result['scott_numbers']}")
    if result.get('years'):
        print(f"         📅 Años: {result['years']}")
    if result.get('colors'):
        print(f"         🎨 Colores: {result['colors']}")
    if result.get('variety_classes'):
        print(f"         🔀 Variedades: {result['variety_classes']}")
    
    # Texto truncado
    text = result.get('text', '')
    # if len(text) > 200:
    #     text = text[:200] + "..."
    print(f"         📝 Texto: {text}")
    print("**********************************************************************************************************")

   📊 Resultados: 50

      🏷️ #1 (Score: 0.650)
         📄 Documento: OXCART116
         📋 Tipo: text
         📄 Página: 25
         📖 Catálogos: ['Scott']
         🔢 Scott: ['2, 3, 4', '32-34, 35–44', '4, 1', '64, 65, 66', '68', '143-146', '32-34', '35–44', '68, 143-146']
         📅 Años: [1907]
         🎨 Colores: ['red']
         📝 Texto: Got any ideas?\n\nSuggestions for the improvement of the OXCART Postal Sales are always welcome!\nCondición: centrado fine.\n\n![Figure](figures/OXCART116_page_025_figure_000.png)\nCondición: centrado good.\n\n192 193 194.\n\n![Figure](figures/OXCART116_page_025_figure_004.png)\nCondición: centrado good.\n\n![Figure](figures/OXCART116_page_025_figure_006.png)\nCondición: centrado good.\n\n196 197. 198. 199. 200.\n\n![Figure](figures/OXCART116_page_025_figure_012.png)\nCondición: centrado good.\n\n![Figure](figures/OXCART116_page_025_figure_013.png)\nCondición: centrado good.\n\n![Figure](figures/OXCART116_page_025_figure_014.png)\nCondición: centra

## 4. Advance Retriever Implementation

In [8]:
from typing import List, Tuple
from langchain_core.documents import Document
from langchain_core.runnables.config import RunnableConfig

def compress_documents_simple(documents: List[Document], query: str, llm) -> Tuple[List[Document], Dict]:
    """
    Simple document compression using LangChain's native batch processing with token tracking.
    Each document is processed individually with the same prompt.
    
    Returns:
        Tuple of (compressed_documents, token_usage_info)
    """
    if not documents:
        return [], {
            "input_tokens": 0,
            "output_tokens": 0,
            "total_tokens": 0
        }
    
    # Simple compression prompt for individual documents
    # Optimized compression prompt for philatelic documents
    compress_prompt_template = """You are an expert philatelist. Extract and summarize ONLY information that directly relates to the specific stamp or philatelic issue mentioned in the query.

    QUERY: {query}

    DOCUMENT:
    {document}

    CRITICAL INSTRUCTIONS:
    1. The query is asking about a SPECIFIC stamp, stamp issue, or philatelic item
    2. ONLY extract information if the document explicitly discusses that exact stamp/issue
    3. If the document discusses different stamps, different countries, different years, or different issues than what the query asks for - this is NOT relevant
    4. Do NOT extract general philatelic information unless it directly explains the queried stamp/issue
    5. Do NOT make connections or assumptions - the stamp/issue must be explicitly mentioned in the document

    WHAT TO EXTRACT (only if the specific stamp/issue is discussed):
    - Catalog numbers (Scott, Stanley Gibbons, Michel, etc.)
    - Denominations, colors, and design descriptions
    - Issue dates and printing details
    - Watermarks, perforations, and paper types
    - Varieties, errors, and printing methods
    - Historical context specific to this stamp/issue
    - Rarity, usage, or collecting information

    WHAT NOT TO DO:
    - Do NOT add facts about similar but different stamps
    - Do NOT include information about the same country but different issues
    - Do NOT generalize from other stamps to the one being queried
    - Do NOT assume anything not explicitly stated in the document

    If the document does NOT discuss the specific stamp or issue from the query, respond with exactly:
    NO_RELEVANT_CONTENT

    If the document DOES discuss the queried stamp/issue, respond with:
    RELEVANT CONTENT:
    [Your extracted summary here, covering ONLY the specific stamp/issue from the query]"""

    # Create individual prompts for each document
    prompts = []
    for doc in documents:
        prompt_text = compress_prompt_template.format(
            query=query, 
            document=doc.page_content
        )
        prompts.append([("user", prompt_text)])
    
    # Use LangChain's native batch processing with concurrency control and token tracking
    config = RunnableConfig(max_concurrency=10)  # Process 10 documents concurrently
    
    try:
        # Track token usage during compression
        with get_openai_callback() as cb:
            responses = llm.batch(prompts, config=config)
            
            # Get token counts from callback
            compression_token_usage = {
                "input_tokens": cb.prompt_tokens,
                "output_tokens": cb.completion_tokens,
                "total_tokens": cb.total_tokens
            }
        
        # Filter and create compressed documents
        compressed_docs = []
        for i, response in enumerate(responses):
            content = response.content.strip() if hasattr(response, 'content') else str(response).strip()
            
            # Only include documents that have relevant content
            if content and content != "NO_RELEVANT_CONTENT":
                compressed_doc = Document(
                    page_content=content,
                    metadata=documents[i].metadata
                )
                compressed_docs.append(compressed_doc)
        
        return compressed_docs, compression_token_usage
        
    except Exception as e:
        print(f"Error during batch compression: {e}")
        # Fallback: return original documents with zero token usage
        return documents, {
            "input_tokens": 0,
            "output_tokens": 0,
            "total_tokens": 0
        }

def search_stamps_with_compression(query, client, embeddings, llm, limit=100, 
                                 alpha=0.30, diversity_lambda=0.75):
    """
    Optimized philatelic search with simple batch document compression using LangChain's native batch processing.
    
    Args:
        query (str): The stamp query
        client: Weaviate client
        embeddings: Embedding model
        llm: Language model
        limit (int): Maximum documents to retrieve
        alpha (float): Hybrid search factor (0.30 = 30% vector, 70% keywords)
        diversity_lambda (float): MMR diversity factor (0.75 = good diversity)
    
    Returns:
        tuple: (compressed_docs, token_usage, cost_info)
    """  
    
    # Create vector store
    vector_store = WeaviateVectorStore(
        client=client,
        index_name=COLLECTION_NAME,
        text_key="text",
        embedding=embeddings
    )
    
    # Try to create hybrid retriever
    hybrid_kwargs = {"k": limit // 2}
    if alpha is not None:
        hybrid_kwargs["alpha"] = alpha
    
    # 1. Precision hybrid retriever (captures exact numbers + context)
    precision_retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs=hybrid_kwargs
    )
    
    # 2. Diversity MMR retriever (avoids duplicate stamps)
    diversity_retriever = vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={"k": limit // 2, "lambda_mult": diversity_lambda}
    )
    
    # 3. Ensemble with dual strategy
    base_retriever = EnsembleRetriever(
        retrievers=[precision_retriever, diversity_retriever],
        weights=[0.7, 0.3]  # 70% precision + 30% diversity
    )
    
    # Specialized prompt for philatelic multi-query generation
    query_prompt = PromptTemplate(
        input_variables=["question"],
        template="""You are a specialized philatelic researcher expert in stamp catalogues and varieties.
Generate 3 strategically different versions of the question to capture comprehensive stamp information:

ORIGINAL: {question}

Create variations that target:
1. CATALOG PRECISION: Focus on exact catalog numbers, dates, and technical specifications
2. CONTEXTUAL SEARCH: Include related series, printings, varieties, and historical context  
3. TERMINOLOGY ALTERNATIVES: Use alternative philatelic terms, synonyms or related philatelic concepts

Consider these philatelic elements:
- Catalog systems: Scott, Michel, Yvert, SG, local catalogs
- Technical terms: definitive/commemorative, variety/error, overprint/surcharge
- Time references: issue dates, printing dates, first day covers
- Denominations: face values, colors, perforations

Alternative searches:
1.
2. 
3."""
    )
    
    # multiquery_llm = ChatOpenAI(
    #         model="gpt-5-nano", 
    #         api_key=OPENAI_API_KEY, 
    #         temperature=1,  # obligatorio para gpt-5-nano
    #         timeout=120.0,
    #         #max_completion_tokens=2500,
    #         model_kwargs={
    #             "verbosity": "low",
    #         })
    
    multiquery_llm = ChatOpenAI(
            model="gpt-4.1-nano", 
            api_key=OPENAI_API_KEY, 
            temperature=0.2,
            timeout=120.0,
           )
    
    # MultiQueryRetriever with specialized prompt
    multi_retriever = MultiQueryRetriever.from_llm(
        retriever=base_retriever,
        llm=multiquery_llm,
        prompt=query_prompt,
        parser_key="lines"
    )
    
    # Execute initial retrieval
    initial_results = multi_retriever.invoke(query)
       
    # compression_llm = ChatOpenAI(
    #         model="gpt-5-nano", 
    #         api_key=OPENAI_API_KEY, 
    #         temperature=1,  # obligatorio para gpt-5-nano
    #         timeout=120.0,
    #         model_kwargs={
    #             "verbosity": "low",
    #         })
    
    compression_llm = ChatOpenAI(
            model="gpt-4.1-mini", 
            api_key=OPENAI_API_KEY, 
            temperature=0.1,
            timeout=120.0
            )
    
    # Simple batch compression using LangChain's native batch processing with token tracking
    compressed_results, compression_token_usage = compress_documents_simple(initial_results, query, compression_llm)
    
    # Calculate costs for GPT-5-nano compression
    # $0.05 per 1M input tokens, $0.40 per 1M output tokens
    # cost_per_1m_input = 0.05
    # cost_per_1m_output = 0.40
    
    # Calculate costs for GPT-4.1-nano compression
    # $0.10 per 1M input tokens, $0.40 per 1M output tokens
    cost_per_1m_input = 0.10
    cost_per_1m_output = 0.40
    
    # Convert to cost per token
    cost_per_input_token = cost_per_1m_input / 1_000_000
    cost_per_output_token = cost_per_1m_output / 1_000_000
    
    input_cost = compression_token_usage["input_tokens"] * cost_per_input_token
    output_cost = compression_token_usage["output_tokens"] * cost_per_output_token
    estimated_cost = input_cost + output_cost
    
    compression_cost_info = {
        "estimated_cost_usd": round(estimated_cost, 6),
        "input_cost": round(input_cost, 6),
        "output_cost": round(output_cost, 6)
    }
    
    # Reorder by quality_score if it exists
    def get_quality_score(doc):
        return getattr(doc, 'metadata', {}).get('quality_score', 0.0)
    
    sorted_results = sorted(compressed_results, key=get_quality_score, reverse=True)
    return sorted_results, compression_token_usage, compression_cost_info

### Test the method search_stamps_with_compression

In [9]:
# # Test the optimized search_stamps_with_compression with batch processing
# print("🧪 Testing optimized batch compression...")

# # Test query focused on specific stamps
# test_query = "Costa Rica 1907 2 colones stamp with original gum Scott 68"

# print(f"🔍 Query: {test_query}")
# print("⏱️ Starting optimized search with batch compression...")

# import time
# start_time = time.time()

# try:
#     compressed_docs = search_stamps_with_compression(
#         query=test_query,
#         client=client, 
#         embeddings=embeddings, 
#         limit=30,
#         llm=llm,
#         alpha=0.30,  # 30% vectorial, 70% keywords for exact numbers
#         diversity_lambda=0.75  # 75% relevance, 25% diversity
#     )
    
#     end_time = time.time()
#     execution_time = end_time - start_time
    
#     print(f"✅ Batch compression completed in {execution_time:.2f} seconds")
#     print(f"📊 Retrieved and compressed {len(compressed_docs)} documents")
    
#     # Show sample results
#     for i, doc in enumerate(compressed_docs[:3], 1):
#         print(f"\\n📄 Document {i}:")
#         print(f"   Metadata: {getattr(doc, 'metadata', {})}")
#         content = getattr(doc, 'page_content', str(doc))
#         preview = content[:200] + "..." if len(content) > 200 else content
#         print(f"   Content: {preview}")
        
# except Exception as e:
#     print(f"❌ Error during batch compression test: {e}")
#     import traceback
#     traceback.print_exc()

## 5. Gradio Interface

In [10]:
def get_collection_info() -> str:
    """
    Get collection information to display in the interface.
    """
    if not client:
        return "❌ No Weaviate connection"
    
    try:
        stats = get_collection_stats(client, "Oxcart")
        if stats:
            info = f"📊 **Oxcart Collection Statistics:**\\n\\n"
            info += f"📦 **Total chunks:** {stats['total_chunks']:,}\\n"
            info += f"📄 **Documents:** {stats['total_documents']}\\n\\n"
            
            if stats.get('documents'):
                info += "**Indexed documents:**\\n"
                for doc_id, count in stats['documents'].items():
                    info += f"• {doc_id}: {count:,} chunks\\n"
            
            return info
        else:
            return "❌ Could not retrieve statistics"
    except Exception as e:
        return f"❌ Error: {e}"

print("✅ RAG functions defined")

✅ RAG functions defined


In [11]:
stats = get_collection_stats(client, "Oxcart")
stats['total_documents']
stats['total_chunks']

193180

In [12]:
# Estructura que usan tus funciones de búsqueda/respuesta
rag_system = {
    "success": True,
    "client": client,                    # para que search_and_answer pueda consultar
    "collection_name": COLLECTION_NAME,  # nombre de la colección
    "weaviate_url": WEAVIATE_URL,        # info para la UI
    "total_documents": stats['total_documents'],       # para mostrar estado
    "total_chunks": stats['total_chunks'],        # opcional en la UI
    "embeddings":embeddings,
    "llm":llm,
    # puedes añadir más campos que tu search_and_answer necesite
}

### Search Approaches

In [13]:
"""
Thresholded + boosted + diversified Weaviate retrieval
- Adds: min_score gating, domain boosts (Scott/year/quality), dedup, MMR, multi-mode fallback
"""


# =========================
# Utility helpers
# =========================

def _distance_to_similarity(distance: Optional[float], metric: str = "cosine") -> Optional[float]:
    if distance is None:
        return None
    d = float(distance)
    if metric == "cosine":
        return max(0.0, min(1.0, 1.0 - d))  # cosine distance -> similarity in [0..1]
    elif metric in ("l2", "euclidean"):
        return 1.0 / (1.0 + d)
    elif metric == "dot":  # heuristic
        return 1.0 - (d / 2.0)
    return None

def _norm_score(raw: Optional[float]) -> float:
    """Normalize to [0,1]. Weaviate hybrid/bm25 'score' is usually [0..1]; vector similarity from our converter is also [0..1]."""
    if raw is None:
        return 0.0
    return max(0.0, min(1.0, float(raw)))

def _text_hash(s: str) -> str:
    return hashlib.sha256(s.encode("utf-8", errors="ignore")).hexdigest()

def _extract_query_years(query: str) -> List[int]:
    years = []
    for y in re.findall(r"\b(18\d{2}|19\d{2}|20\d{2})\b", query):
        try:
            years.append(int(y))
        except:
            pass
    return years

def _year_overlap(query_years: List[int], hit_years: List[int]) -> bool:
    if not query_years or not hit_years:
        return False
    qs = set(int(y) for y in query_years if str(y).isdigit())
    hs = set(int(y) for y in hit_years if str(y).isdigit())
    return len(qs.intersection(hs)) > 0

def _boosts(hit: Dict[str, Any], query: str, requested_scotts: Optional[List[str]], query_years: List[int]) -> float:
    """
    Domain-aware boosts capped to 0.30 total.
    - Scott exact match: +0.15
    - Year overlap     : +0.08
    - Chunk quality    : up to +0.07
    """
    boost = 0.0

    # Scott boost
    if requested_scotts:
        scotts = {str(s).strip().lower() for s in (hit.get("scott_numbers") or [])}
        want   = {str(s).strip().lower() for s in requested_scotts}
        if scotts & want:
            boost += 0.15

    # Year overlap boost
    if _year_overlap(query_years, hit.get("years") or []):
        boost += 0.08

    # Quality boost
    q = hit.get("quality_score", 0.0)
    try:
        qn = max(0.0, min(1.0, float(q)))
        boost += 0.07 * qn
    except:
        pass

    return min(boost, 0.30)

def _passes_content_gates(hit: Dict[str, Any], min_chars: int) -> Tuple[bool, str]:
    t = (hit.get("text") or "").strip()
    if len(t) < min_chars:
        return False, f"too_short<{min_chars}"
    # Gentle philately guard to prevent off-topic noise
    if not re.search(r"\bstamp\b|\bperforat|\bwatermark|\bscott\b|\bsurcharge|\bissue\b", t, flags=re.I):
        return False, "weak_domain_signal"
    return True, "ok"

def _dedup(hits: List[Dict[str, Any]], max_per_doc: int = 2) -> List[Dict[str, Any]]:
    """Cap to N per (doc_id, page_number) and deduplicate by text hash."""
    by_doc = defaultdict(int)
    seen_hash = set()
    out = []
    for h in hits:
        key = (h.get("doc_id"), h.get("page_number"))
        hsh = _text_hash(h.get("text") or "")
        if hsh in seen_hash:
            h["_reject_reason"] = "dup_text"
            continue
        if by_doc[key] >= max_per_doc:
            h["_reject_reason"] = "doc_cap"
            continue
        by_doc[key] += 1
        seen_hash.add(hsh)
        out.append(h)
    return out

def _mmr_select(candidates: List[Dict[str, Any]], k: int, lambda_diversity: float = 0.7) -> List[Dict[str, Any]]:
    """
    Lightweight MMR using Jaccard similarity on token sets.
    Assumes 'final_score' exists.
    """
    if not candidates:
        return []
    chosen, rest = [], candidates[:]
    for c in rest:
        toks = set(re.findall(r"[a-z0-9]+", (c.get("text") or "").lower()))
        c["_tokset"] = toks
    while rest and len(chosen) < k:
        best, best_val = None, -1e9
        for c in rest:
            relevance = float(c.get("final_score", 0.0))
            diversity_bonus = 0.0
            if chosen:
                max_sim = 0.0
                for p in chosen:
                    inter = len(c["_tokset"].intersection(p["_tokset"]))
                    union = len(c["_tokset"].union(p["_tokset"])) or 1
                    jacc = inter / union
                    max_sim = max(max_sim, jacc)
                diversity_bonus = (1 - max_sim)  # prefer lower similarity
            val = lambda_diversity * relevance + (1 - lambda_diversity) * diversity_bonus
            if val > best_val:
                best, best_val = c, val
        chosen.append(best)
        rest.remove(best)
    for c in chosen:
        c.pop("_tokset", None)
    return chosen


# =========================
# Filters (Weaviate v4)
# =========================

def _build_filters(filters: Optional[Dict[str, Any]]) -> Optional[Any]:
    """
    Build Weaviate v4 Filter (where clause) from your light dict.
    Supported keys in `filters`:
      - "year_range": (start:int, end:int)
      - "scott_numbers": List[str]   (TEXT_ARRAY containsAny)
      - "catalog_system": "Scott"    (TEXT eq)
      - you can extend as needed
    """
    if not filters:
        return None
    clauses = []

    if "year_range" in filters and isinstance(filters["year_range"], (tuple, list)) and len(filters["year_range"]) == 2:
        ys, ye = filters["year_range"]
        # Prefer range-capable fields if present in your schema; if not, this will still work if you added year_start/year_end.
        try:
            c1 = WvFilter.by_property("year_start").greater_than_equal(ys)
            c2 = WvFilter.by_property("year_end").less_than_equal(ye)
            clauses.append(c1)
            clauses.append(c2)
        except Exception:
            # Fallback: if you only have INT_ARRAY 'years', we approximate with containsAny of all years in range (coarse)
            year_list = list(range(int(ys), int(ye) + 1))
            clauses.append(WvFilter.by_property("years").contains_any(year_list))

    if filters.get("catalog_system"):
        clauses.append(WvFilter.by_property("catalog_systems").contains_any([filters["catalog_system"]]))

    if filters.get("scott_numbers"):
        clauses.append(WvFilter.by_property("scott_numbers").contains_any(list(filters["scott_numbers"])))

    if not clauses:
        return None

    # AND all clauses
    where = clauses[0]
    for c in clauses[1:]:
        where = where & c
    return where


# =========================
# Main: thresholded + diversified retrieval
# =========================

def search_chunks_semantic(
    client,
    query: str,
    collection_name: str = "Oxcart",
    limit: int = 5,
    filters: Optional[Dict[str, Any]] = None,
    mode: str = "vector",          # kept for backward-compat; now we may try multi-stage if needed
    alpha: float = 0.35,           # for hybrid
    distance_metric: str = "cosine",
    # --- New safety/quality knobs (tunable) ---
    min_score: float = 0.55,       # threshold AFTER boosts (0..1)
    min_chars: int = 280,          # tiny snippet filter
    mmr_lambda: float = 0.7,       # 0.5..0.8 usually fine
    overfetch_factor: int = 3,     # fetch N×limit then gate
    k_min: int = 3,                # minimum contexts needed
    requested_scotts: Optional[List[str]] = None,  # domain boost
) -> List[Dict[str, Any]]:
    """
    Advanced retrieval with thresholding, boosts, dedup, MMR and multi-stage fallback.
    Returns a list of results with 'final_score' and 'stage' annotations.
    """
    # Prepare
    coll = client.collections.get(collection_name)
    f = _build_filters(filters)
    query_years = _extract_query_years(query)
    rejected_reasons = defaultdict(int)

    def _run(mode_local: str, label: str, hard_limit: int) -> List[Dict[str, Any]]:
        # Query Weaviate
        if mode_local == "hybrid":
            resp = coll.query.hybrid(
                query=query,
                alpha=alpha,
                limit=hard_limit,
                filters=f,
                return_properties=[
                    "chunk_id","chunk_type","text","text_original","doc_id","page_number",
                    "catalog_systems","catalog_numbers","scott_numbers","years","colors",
                    "topics_primary","variety_classes","has_catalog","has_prices","has_varieties",
                    "is_guanacaste","quality_score"
                ],
                return_metadata=wv_query.MetadataQuery(score=True, distance=True),
            )
        elif mode_local == "bm25":
            resp = coll.query.bm25(
                query=query,
                limit=hard_limit,
                filters=f,
                return_properties=[
                    "chunk_id","chunk_type","text","text_original","doc_id","page_number",
                    "catalog_systems","catalog_numbers","scott_numbers","years","colors",
                    "topics_primary","variety_classes","has_catalog","has_prices","has_varieties",
                    "is_guanacaste","quality_score"
                ],
                return_metadata=wv_query.MetadataQuery(score=True),
            )
        else:
            resp = coll.query.near_text(
                query=query,
                limit=hard_limit,
                filters=f,
                return_properties=[
                    "chunk_id","chunk_type","text","text_original","doc_id","page_number",
                    "catalog_systems","catalog_numbers","scott_numbers","years","colors",
                    "topics_primary","variety_classes","has_catalog","has_prices","has_varieties",
                    "is_guanacaste","quality_score"
                ],
                return_metadata=wv_query.MetadataQuery(distance=True),
            )

        raw_out = []
        for obj in (resp.objects or []):
            props = obj.properties or {}
            meta = getattr(obj, "metadata", None)
            distance = getattr(meta, "distance", None) if meta else None
            hybrid_score = getattr(meta, "score", None) if meta else None

            similarity = _distance_to_similarity(distance, metric=distance_metric)
            base_score = hybrid_score if hybrid_score is not None else (similarity if similarity is not None else 0.0)

            # De-duplicate figure markdown inside text_original (your original logic, preserved)
            figure_pattern = r'(!\[([^\]]*)\]\([^)]+\))'
            original_content = props.get("text_original", "") or props.get("text", "") or ""

            figures = re.findall(figure_pattern, original_content)
            seen_figures = set()
            unique_figures = []
            for fig in figures:
                img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                if img_path:
                    img_identifier = img_path.group(1)
                    if img_identifier not in seen_figures:
                        seen_figures.add(img_identifier)
                        unique_figures.append(fig)

            existing_figures = set()
            for fig in unique_figures:
                if fig[0] in original_content:
                    img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                    if img_path:
                        existing_figures.add(img_path.group(1))

            missing_figures = []
            for fig in unique_figures:
                img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                if img_path and img_path.group(1) not in existing_figures:
                    missing_figures.append(fig[0])

            if missing_figures:
                figures_text = "\n\n" + "\n".join(missing_figures)
                original_content = original_content + figures_text

            raw_out.append({
                "uuid": str(obj.uuid),
                "score": base_score,             # original score (hybrid/bm25) or similarity (vector)
                "similarity": similarity,
                "distance": distance,
                "chunk_id": props.get("chunk_id", ""),
                "chunk_type": props.get("chunk_type", ""),
                "text": original_content,
                "doc_id": props.get("doc_id", ""),
                "page_number": props.get("page_number", 0),
                "catalog_systems": props.get("catalog_systems", []),
                "catalog_numbers": props.get("catalog_numbers", []),
                "scott_numbers": props.get("scott_numbers", []),
                "years": props.get("years", []),
                "colors": props.get("colors", []),
                "topics_primary": props.get("topics_primary", ""),
                "variety_classes": props.get("variety_classes", []),
                "has_catalog": props.get("has_catalog", False),
                "has_prices": props.get("has_prices", False),
                "has_varieties": props.get("has_varieties", False),
                "is_guanacaste": props.get("is_guanacaste", False),
                "quality_score": props.get("quality_score", 0.0),
                "mode": mode_local,
                "stage": label,
            })
        return raw_out

    def _gate_and_rank(raw_hits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        gated = []
        for h in raw_hits:
            s_norm = _norm_score(h.get("score"))
            b = _boosts(h, query, requested_scotts, query_years)
            final = max(0.0, min(1.0, s_norm + b))
            ok, reason = _passes_content_gates(h, min_chars=min_chars)
            if not ok:
                h["_reject_reason"] = reason
                rejected_reasons[reason] += 1
                continue
            if final < min_score:
                h["_reject_reason"] = f"below_threshold<{min_score:.2f}>"
                rejected_reasons[h["_reject_reason"]] += 1
                continue

            h["norm_score"] = s_norm
            h["boost"] = b
            h["final_score"] = final
            gated.append(h)

        # dedup per doc & by text hash
        deduped = _dedup(gated, max_per_doc=2)
        # diversify with MMR
        diversified = _mmr_select(
            sorted(deduped, key=lambda x: x["final_score"], reverse=True),
            k=min(limit, len(deduped)),
            lambda_diversity=mmr_lambda
        )
        return diversified

    # -------------------------
    # Multi-stage retrieval
    # -------------------------
    # 1) Try the user-requested mode first (keeps backward compatibility)
    modes_order = [mode]
    for m in ("hybrid", "bm25", "vector"):
        if m not in modes_order:
            modes_order.append(m)

    gathered: List[Dict[str, Any]] = []
    for mi, m in enumerate(modes_order):
        raw = _run(m, f"S{mi+1}:{m}+filters", hard_limit=limit * overfetch_factor)
        gated = _gate_and_rank(raw)

        # Merge by uuid, keep best final_score
        heap = {g["uuid"]: g for g in gathered}
        for g in gated:
            if g["uuid"] not in heap or g["final_score"] > heap[g["uuid"]]["final_score"]:
                heap[g["uuid"]] = g
        gathered = list(heap.values())
        gathered.sort(key=lambda x: x["final_score"], reverse=True)
        gathered = gathered[:limit]
        if len(gathered) >= k_min:
            break

    # 2) Relax filters if still not enough (drop Scott, then year)
    if len(gathered) < k_min and filters:
        relaxed = dict(filters)
        if "scott_numbers" in relaxed:
            relaxed.pop("scott_numbers")
        elif "year_range" in relaxed:
            relaxed.pop("year_range")

        f_relaxed = _build_filters(relaxed)
        if f_relaxed is not None:
            for mi, m in enumerate(modes_order):
                # temporarily replace f for this pass
                _old_f = f
                try:
                    f = f_relaxed
                    raw = _run(m, f"Rx{mi+1}:{m}+relaxed", hard_limit=limit * overfetch_factor)
                    gated = _gate_and_rank(raw)
                    heap = {g["uuid"]: g for g in gathered}
                    for g in gated:
                        if g["uuid"] not in heap or g["final_score"] > heap[g["uuid"]]["final_score"]:
                            heap[g["uuid"]] = g
                    gathered = list(heap.values())
                    gathered.sort(key=lambda x: x["final_score"], reverse=True)
                    gathered = gathered[:limit]
                    if len(gathered) >= k_min:
                        break
                finally:
                    f = _old_f  # restore

    # You can inspect `rejected_reasons` here for debugging if needed.
    return gathered


In [14]:
def search_and_answer_basic(
    query: str,
    rag_system: Dict[str, Any],
    year_start: Optional[int] = None,
    year_end: Optional[int] = None,
    scott_numbers: Optional[List[str]] = None,
    max_results: int = 10,
) -> Dict[str, Any]:
    """
    Basic hybrid search approach with improved philatelic filters.
    All filters are OPTIONAL - only applied when provided.
    """
    # Validation
    if not rag_system or not rag_system.get("client"):
        return {
            "answer": "❌ Error: No Weaviate connection",
            "results": [],
            "metadata": {"error": "No Weaviate connection"}
        }
    
    client_wv = rag_system["client"]
    collection_name = rag_system.get("collection_name", "Oxcart")
    
    # Build philatelic filters only if values are provided
    filters = {}
    
    # Year range filter - ONLY if both years are provided and valid
    if year_start is not None and year_end is not None:
        try:
            # Ensure both are integers and valid
            start = int(year_start)
            end = int(year_end)
            # Ensure start <= end
            if start > end:
                start, end = end, start
            filters["year_range"] = (start, end)
            print(f"[DEBUG] Year filter applied: {start}-{end}")
        except (ValueError, TypeError) as e:
            print(f"[WARNING] Invalid year values, skipping year filter: {e}")
    
    # Scott numbers ONLY if provided and not empty
    if scott_numbers:
        print("Scott Numbers: ",scott_numbers)
        filters["catalog_system"] = "Scott"
        filters["scott_numbers"] = scott_numbers        
    
    # Log final filter status
    if not filters:
        print("[DEBUG] No filters applied - searching all documents")
    else:
        print(f"[DEBUG] Filters being used: {filters}")
    
    try:
        start_time = time.time()
        
        # Basic semantic search with philatelic filters
        # Pass None if no filters, not empty dict
        results = search_chunks_semantic(
            client=client_wv,
            query=query,
            collection_name=collection_name,
            limit=int(max_results),
            filters=filters if filters else None,  # Pass None if no filters
            mode="hybrid",
            alpha=0.35,
            min_score=0.55,
            min_chars=280,
            k_min=3,
        )
        
        # Convert to LangChain document format for RAG
        docs_for_rag = []
        for r in results:
            doc = type('Document', (), {
                'page_content': r.get('text', ''),
                'metadata': {
                    'doc_id': r.get('doc_id', 'N/A'),
                    'page_number': r.get('page_number', 'N/A'),
                    'chunk_type': r.get('chunk_type', 'N/A'),
                    'score': r.get('score', 0.0),
                    'scott_numbers': r.get('scott_numbers', []),
                    'years': r.get('years', []),
                    'catalog_systems': r.get('catalog_systems', [])
                }
            })()
            docs_for_rag.append(doc)
        
        # Generate RAG response using LangChain
        rag_response = create_rag_response(docs_for_rag, query)
        execution_time = time.time() - start_time
        
        # Build metadata with actual filters used
        metadata = {
            "approach": "Basic Hybrid Search",
            "query": query,
            "total_results": len(results),
            "max_results": int(max_results),
            "filters_used": filters if filters else "None (searching all documents)",
            "generation_time": execution_time,
            "context_docs_count": rag_response.get("context_docs_count", len(docs_for_rag)),
            "context_length": sum(len(d.page_content) for d in docs_for_rag),
            "token_usage": rag_response.get("token_usage", {}),
            "cost_info": rag_response.get("cost_info", {}),
        }
        
        return {
            "answer": rag_response.get("response", "No response generated"),
            "results": results,
            "metadata": metadata
        }
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"[ERROR] Basic search error: {str(e)}")
        print(f"[ERROR] Full traceback: {error_details}")
        print(f"[ERROR] Filters attempted: {filters}")
        
        return {
            "answer": f"❌ Basic search error: {str(e)}",
            "results": [],
            "metadata": {
                "error": str(e),
                "generation_time": 0,
                "filters_attempted": filters if filters else "None"
            }
        }

In [15]:
def search_and_answer_advanced(
    query: str,
    rag_system: Dict[str, Any],
    max_results: int = 10,
) -> Dict[str, Any]:
    """
    Advanced compression search approach - filters NOT applied (as requested).
    Now tracks both compression and RAG response token usage and costs.
    """
    # Validation
    if not rag_system or not rag_system.get("client"):
        return {
            "answer": "❌ Error: No Weaviate connection",
            "results": [],
            "metadata": {"error": "No Weaviate connection"}
        }

    client_wv = rag_system["client"]
    embeddings = rag_system.get("embeddings")
    llm = rag_system.get("llm")
    
    # NOTE: Advanced search does not apply filters as requested by user
    # This approach uses ensemble retrieval and compression instead

    try:
        start_time = time.time()
        
        # Advanced search with compression (no filters applied) - now returns token info
        compressed_docs, compression_token_usage, compression_cost_info = search_stamps_with_compression(
            query=query,
            client=client_wv,
            embeddings=embeddings,
            llm=llm,
            limit=max_results,
            alpha=0.30,
            diversity_lambda=0.75
        )
        
        # Generate RAG response using LangChain
        rag_response = create_rag_response(compressed_docs, query)
        
        # Extraer y preservar figuras de los documentos originales
        figure_pattern = r'(!\[([^\]]*)\]\([^)]+\))'

        for doc in compressed_docs:
            # Buscar figuras en el contenido original si está disponible
            original_content = doc.metadata.get('text_original', doc.page_content)
            
            # Extraer todas las figuras del contenido original
            figures = re.findall(figure_pattern, original_content)
            
            # Eliminar duplicados manteniendo el orden
            seen_figures = set()
            unique_figures = []
            for fig in figures:
                # Usar el path de la imagen como identificador único (ignorando el alt text)
                img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                if img_path:
                    img_identifier = img_path.group(1)
                    if img_identifier not in seen_figures:
                        seen_figures.add(img_identifier)
                        unique_figures.append(fig)
            
            # Verificar qué figuras ya están en el contenido comprimido
            existing_figures = set()
            for fig in unique_figures:
                if fig[0] in doc.page_content:
                    img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                    if img_path:
                        existing_figures.add(img_path.group(1))
            
            # Agregar solo las figuras que faltan
            missing_figures = []
            for fig in unique_figures:
                img_path = re.search(r'\]\(([^)]+)\)', fig[0])
                if img_path and img_path.group(1) not in existing_figures:
                    missing_figures.append(fig[0])
            
            # Si hay figuras faltantes, agregarlas al final
            if missing_figures:
                figures_text = "\n\n" + "\n".join(missing_figures)
                doc.page_content = doc.page_content + figures_text
            
            # Guardar las figuras únicas en metadata para acceso rápido
            doc.metadata['figures'] = [fig[0] for fig in unique_figures] if unique_figures else []
            doc.metadata['has_figures'] = len(unique_figures) > 0
        
        # Convert compressed docs to results format for display
        results = []
        for i, doc in enumerate(compressed_docs):
            result = {
                'doc_id': doc.metadata.get('doc_id', 'N/A'),
                'page_number': doc.metadata.get('page_number', 'N/A'),
                'chunk_type': doc.metadata.get('chunk_type', 'N/A'),
                'text': doc.page_content,
                'score': doc.metadata.get('quality_score', 0.0),
                'catalog_systems': doc.metadata.get('catalog_systems', []),
                'scott_numbers': doc.metadata.get('scott_numbers', []),
                'years': doc.metadata.get('years', []),
                'colors': doc.metadata.get('colors', []),
                'variety_classes': doc.metadata.get('variety_classes', []),
                'has_figures': doc.metadata.get('has_figures', False),  
                'figures': doc.metadata.get('figures', [])  
            }
            results.append(result)
        
        execution_time = time.time() - start_time
        
        # Combine compression and RAG token usage/costs
        rag_token_usage = rag_response.get("token_usage", {})
        rag_cost_info = rag_response.get("cost_info", {})
        
        # Create combined token usage structure
        combined_token_usage = {
            "compression": {
                "input_tokens": compression_token_usage.get("input_tokens", 0),
                "output_tokens": compression_token_usage.get("output_tokens", 0),
                "total_tokens": compression_token_usage.get("total_tokens", 0)
            },
            "rag_response": {
                "input_tokens": rag_token_usage.get("input_tokens", 0),
                "output_tokens": rag_token_usage.get("output_tokens", 0),
                "total_tokens": rag_token_usage.get("total_tokens", 0)
            },
            "total": {
                "input_tokens": (compression_token_usage.get("input_tokens", 0) + 
                               rag_token_usage.get("input_tokens", 0)),
                "output_tokens": (compression_token_usage.get("output_tokens", 0) + 
                                rag_token_usage.get("output_tokens", 0)),
                "total_tokens": (compression_token_usage.get("total_tokens", 0) + 
                               rag_token_usage.get("total_tokens", 0))
            }
        }
        
        # Create combined cost info structure
        combined_cost_info = {
            "compression": {
                "input_cost": compression_cost_info.get("input_cost", 0.0),
                "output_cost": compression_cost_info.get("output_cost", 0.0),
                "estimated_cost_usd": compression_cost_info.get("estimated_cost_usd", 0.0)
            },
            "rag_response": {
                "input_cost": rag_cost_info.get("input_cost", 0.0),
                "output_cost": rag_cost_info.get("output_cost", 0.0),
                "estimated_cost_usd": rag_cost_info.get("estimated_cost_usd", 0.0)
            },
            "total": {
                "input_cost": (compression_cost_info.get("input_cost", 0.0) + 
                             rag_cost_info.get("input_cost", 0.0)),
                "output_cost": (compression_cost_info.get("output_cost", 0.0) + 
                              rag_cost_info.get("output_cost", 0.0)),
                "estimated_cost_usd": (compression_cost_info.get("estimated_cost_usd", 0.0) + 
                                     rag_cost_info.get("estimated_cost_usd", 0.0))
            }
        }
        
        metadata = {
            "approach": "Advanced Compression Search",
            "query": query,
            "total_results": len(results),
            "compressed_docs": len(compressed_docs),
            "filters_used": "No filters (advanced approach)",
            "generation_time": execution_time,
            "context_docs_count": rag_response.get("context_docs_count", len(compressed_docs)),
            "docs_with_figures": sum(1 for r in results if r.get('has_figures', False)),
            "token_usage": combined_token_usage,
            "cost_info": combined_cost_info,
        }
        
        return {
            "answer": rag_response.get("response", "No response generated"),
            "results": results,
            "metadata": metadata
        }
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"[ERROR] Advanced search error: {str(e)}")
        print(f"[ERROR] Full traceback: {error_details}")
        
        return {
            "answer": f"❌ Advanced search error: {str(e)}",
            "results": [],
            "metadata": {
                "error": str(e),
                "generation_time": 0,
                "filters_attempted": "None"
            }
        }

In [16]:
# query = "Costa Rica 1907 2 colones stamp with original gum. Scott 68 issue of 1907"

# results = search_and_answer_basic(
#     query,
#     rag_system,
#     None,
#     None,
#     ["1","2"],
#     10,
# )

### Test Basic Approach

In [17]:
# # Enhanced test of search_chunks_semantic function
# def display_search_results(results, query, filters_used=None):
#     """
#     Enhanced display function for search results
#     """
#     print(f"🔍 SEARCH RESULTS")
#     print(f"=" * 60)
#     print(f"📝 Query: '{query}'")
#     if filters_used:
#         print(f"🔧 Filters applied: {filters_used}")
#     print(f"📊 Total results: {len(results)}")
#     print(f"=" * 60)
    
#     if not results:
#         print("❌ No results found")
#         return
    
#     for j, result in enumerate(results[:5], 1):  # Show top 5 results
#         print(f"\n🏷️ RESULT #{j} (Score: {result['score']:.4f})")
#         print(f"   📄 Document: {result['doc_id']}")
#         print(f"   📋 Chunk Type: {result['chunk_type']}")
#         print(f"   📄 Page: {result['page_number']}")
        
#         # Show metadata if available
#         metadata_items = [
#             ('📖 Catalog Systems', result.get('catalog_systems', [])),
#             ('🔢 Scott Numbers', result.get('scott_numbers', [])),
#             ('📅 Years', result.get('years', [])),
#             ('🎨 Colors', result.get('colors', [])),
#             ('🔀 Variety Classes', result.get('variety_classes', [])),
#         ]
        
#         for label, data in metadata_items:
#             if data:
#                 display_data = ', '.join(str(item) for item in data) if isinstance(data, list) else str(data)
#                 print(f"   {label}: {display_data}")
        
#         # Boolean flags
#         if result.get('has_varieties'):
#             print(f"   ✅ Has varieties")
#         if result.get('is_guanacaste'):
#             print(f"   🌎 Guanacaste province")
#         if result.get('has_technical_specs'):
#             print(f"   🔧 Has technical specs")
            
#         # Text preview
#         text = result.get('text', '')
#         preview = text[:300] + "..." if len(text) > 300 else text
#         print(f"   📝 Text preview: {preview}")
#         print(f"   {'─' * 50}")

# # Test 1: Basic search without filters (original test enhanced)
# print("🧪 TEST 1: Basic Hybrid Search (No Filters)")
# query = "Costa Rica 1907 2 colones stamp with original gum. Scott 68 issue of 1907"

# results = search_chunks_semantic(
#     client=client, 
#     query=query, 
#     collection_name="Oxcart", 
#     limit=20,
#     filters=[],  # No filters
#     mode="hybrid",
#     alpha=0.35
# )

# display_search_results(results, query)

# print(f"\n💡 This test shows unfiltered results. Now let's test with specific filters...")

In [18]:
# # Test: Combined Filters (Advanced Testing) - UPDATED with Multiple Scott Numbers
# print("🧪 TEST 6: Combined Filters (Advanced Testing)")
# print("=" * 80)

# # Test complex filter combinations for precise searches
# combined_tests = [
#     {
#         "name": "1907 stamps with varieties",
#         "query": "1907 Costa Rica stamps with varieties or errors",
#         "filters": {
#             "year_range": (1907, 1907)
#         }
#     },
#     {
#         "name": "1934 Costa Rica stamps",
#         "query": "List all 1934 Costa Rica Stamps",
#         "filters": {
#             "year_range": (1934, 1934)
#         }
#     },
#     {
#         "name": "Costa Rica First Issue Scott 1-5 (MULTIPLE SCOTT NUMBERS TEST)",
#         "query": "Costa Rica First Issue Scott 1 2 3 4 5",
#         "filters": {
#             "catalog_system": "Scott",
#             "scott_numbers": ["1", "2", "3", "4", "5"]  # TEST: Multiple Scott numbers as list
#         }
#     }
# ]

# for i, test in enumerate(combined_tests, 1):
#     print(f"\n🔬 COMBINED TEST {i}: {test['name']}")
#     print(f"{'─' * 60}")
#     print(f"🎯 Filters: {test['filters']}")
    
#     # Special logging for multiple Scott numbers test
#     if "scott_number" in test['filters'] and isinstance(test['filters']['scott_number'], list):
#         print(f"🔢 TESTING MULTIPLE SCOTT NUMBERS: {test['filters']['scott_number']}")
#         print(f"📝 Expected: Should find documents with ANY of these Scott numbers (OR logic)")
    
#     # Execute search with combined filters
#     results = search_chunks_semantic(
#         client=client,
#         query=test['query'],
#         collection_name="Oxcart",
#         limit=15,  # Increased limit for multiple Scott test
#         filters=test['filters'],
#         mode="hybrid",
#         alpha=0.35
#     )
    
#     display_search_results(results, test['query'], filters_used=test['filters'])
    
#     # Detailed validation of filter application
#     if results:
#         print(f"\n   🔍 FILTER VALIDATION:")
#         for filter_key, filter_value in test['filters'].items():
#             validation_count = 0
            
#             # Special handling for multiple Scott numbers
#             if filter_key == "scott_numbers" and isinstance(filter_value, list):
#                 print(f"      🔢 Checking for ANY Scott number from: {filter_value}")
#                 for result in results:
#                     result_scotts = result.get('scott_numbers', [])
#                     # Check if ANY of the requested Scott numbers is in the result
#                     if any(scott_num in result_scotts for scott_num in filter_value):
#                         validation_count += 1
#                 print(f"      ✅ Documents with ANY requested Scott number: {validation_count}/{len(results)}")
                
#                 # Show which specific Scott numbers were found
#                 found_scotts = set()
#                 for result in results:
#                     found_scotts.update(result.get('scott_numbers', []))
#                 matching_scotts = [s for s in filter_value if s in found_scotts]
#                 print(f"      📋 Requested Scott numbers found: {matching_scotts}")
#                 print(f"      📋 All Scott numbers in results: {sorted(found_scotts)}")
                
#             elif filter_key == "year_range":
#                 result_years = result.get('years', [])
#                 if any(filter_value[0] <= year <= filter_value[1] for year in result_years):
#                     validation_count += 1
#             elif filter_key == "catalog_system":
#                 if filter_value in result.get('catalog_systems', []):
#                     validation_count += 1
#             elif filter_key == "chunk_type":
#                 if result.get('chunk_type') == filter_value:
#                     validation_count += 1
#             elif filter_key in ["has_varieties", "is_guanacaste", "has_technical_specs"]:
#                 if result.get(filter_key) == filter_value:
#                     validation_count += 1
            
#             # Show validation for non-Scott filters
#             if filter_key != "scott_number":
#                 print(f"      ✅ {filter_key}: {validation_count}/{len(results)} results match")
    
#     print(f"\n{'═' * 80}")

### Gradio Interface

In [19]:
def format_timing_display(
          basic_time, advanced_time, total_time,
          basic_results, advanced_results,
          filter_msg="",
          basic_metadata=None, advanced_metadata=None
      ):
    """Enhanced timing display with cost comparison (robust casting)"""
    try:
        # --- helpers seguros ---
        def as_float(x, default=0.0):
            try:
                return float(x)
            except (TypeError, ValueError):
                return default

        def as_int(x, default=0):
            try:
                # evita ints tipo '1_234' si viniera así
                return int(float(x))
            except (TypeError, ValueError):
                return default

        # tiempos
        basic_time = as_float(basic_time)
        advanced_time = as_float(advanced_time)
        total_time = as_float(total_time)

        # costos - ARREGLO AQUÍ para manejar estructura anidada
        basic_cost = 0.0
        advanced_cost = 0.0

        if basic_metadata and 'cost_info' in basic_metadata:
            basic_cost = as_float(basic_metadata['cost_info'].get('estimated_cost_usd', 0))

        if advanced_metadata and 'cost_info' in advanced_metadata:
            # Detectar si es estructura anidada
            adv_cost_info = advanced_metadata['cost_info']
            if 'total' in adv_cost_info and isinstance(adv_cost_info['total'], dict):
                # Estructura anidada - usar el total
                advanced_cost = as_float(adv_cost_info['total'].get('estimated_cost_usd', 0))
            else:
                # Estructura simple
                advanced_cost = as_float(adv_cost_info.get('estimated_cost_usd', 0))

        total_cost = basic_cost + advanced_cost

        # tokens (¡forzar int!) - ARREGLO AQUÍ para manejar estructura anidada
        basic_tokens = 0
        advanced_tokens = 0

        if basic_metadata and 'token_usage' in basic_metadata:
            basic_tokens = as_int(basic_metadata['token_usage'].get('total_tokens', 0))

        if advanced_metadata and 'token_usage' in advanced_metadata:
            # Detectar si es estructura anidada
            adv_token_usage = advanced_metadata['token_usage']
            if 'total' in adv_token_usage and isinstance(adv_token_usage['total'], dict):
                # Estructura anidada - usar el total
                advanced_tokens = as_int(adv_token_usage['total'].get('total_tokens', 0))
            else:
                # Estructura simple
                advanced_tokens = as_int(adv_token_usage.get('total_tokens', 0))

        total_tokens = basic_tokens + advanced_tokens

        # faster
        if basic_time > 0 and advanced_time > 0:
            if basic_time < advanced_time:
                faster = f"🏆 Basic search was {advanced_time/basic_time:.1f}x faster"
            elif advanced_time < basic_time:
                faster = f"🏆 Advanced search was {basic_time/advanced_time:.1f}x faster"
            else:
                faster = "⚡ Both approaches took similar time"
        else:
            faster = "⏱️ Timing comparison not available"

        # cost effectiveness
        cost_comparison = ""
        if basic_cost > 0 and advanced_cost > 0:
            if basic_cost < advanced_cost:
                cost_comparison = f"💵 Basic search was ${advanced_cost - basic_cost:.6f} cheaper"
            elif advanced_cost < basic_cost:
                cost_comparison = f"💵 Advanced search was ${basic_cost - advanced_cost:.6f} cheaper"
            else:
                cost_comparison = "💵 Both approaches had similar costs"

        # speeds
        basic_speed = f"{basic_results/basic_time:.1f}" if basic_time > 0 else "N/A"
        advanced_speed = f"{advanced_results/advanced_time:.1f}" if advanced_time > 0 else "N/A"

        timing_display = f"""⏱️ EXECUTION TIMING & COST COMPARISON
            ================================================

            📋 SEARCH CONFIGURATION
            • {filter_msg}

            🔍 BASIC HYBRID SEARCH
            • Processing Time: {basic_time:.2f} seconds
            • Documents Found: {basic_results}
            • Speed: {basic_speed} docs/sec
            • Tokens Used: {basic_tokens:,}
            • Cost: ${basic_cost:.6f}
            • Status: ✅ Complete

            🚀 ADVANCED COMPRESSION SEARCH
            • Processing Time: {advanced_time:.2f} seconds
            • Documents Found: {advanced_results}
            • Speed: {advanced_speed} docs/sec
            • Tokens Used: {advanced_tokens:,}
            • Cost: ${advanced_cost:.6f}
            • Status: ✅ Complete

            📊 OVERALL PERFORMANCE
            • Total Execution: {total_time:.2f} seconds
            • Total Tokens: {total_tokens:,}
            • Total Cost: ${total_cost:.6f}
            • Execution Mode: Sequential (Basic → Advanced)
            • {faster}
            • {cost_comparison}

            💡 PERFORMANCE NOTES:
            • Basic search: Fast initial results, lower cost
            • Advanced search: Enhanced quality, higher token usage
            • Costs shown are for GPT-4.1-Nano(advanced seach with multiquery and compression) & GPT-5-mini (generation phase) models
            • Sequential execution allows progressive viewing
            • Filters are optional and only applied when provided"""
        return timing_display

    except Exception as e:
        return f"❌ Error formatting timing data: {e}"

In [20]:
# Función para convertir Markdown a HTML con corrección de rutas de imágenes
def markdown_to_html(text):
    """Convert markdown text to HTML with lazy base64 loading"""
    if not text:
        return "<p><em>No content</em></p>"
    
    import re
    import os
    import base64
    
    base_path = r"C:\Users\VM-SERVER\Desktop\Oxcart RAG\results\markdown\figures"
    
    def image_to_base64_lazy(match):
        alt_text = match.group(1)
        filename = match.group(2).split('/')[-1].split('\\')[-1]
        full_path = os.path.join(base_path, filename)
        
        if os.path.exists(full_path):
            try:
                with open(full_path, "rb") as img_file:
                    b64_string = base64.b64encode(img_file.read()).decode()
                    ext = filename.split('.')[-1].lower()
                    mime_type = f"image/{ext}" if ext != 'jpg' else "image/jpeg"
                    return f'<img style="max-width: 100%; height: auto; display: block; margin: 10px auto; border: 1px solid #ddd; border-radius: 4px;" alt="{alt_text}" src="data:{mime_type};base64,{b64_string}" />'
            except Exception as e:
                print(f"Error loading image {filename}: {e}")
                return f'<p>[Image not found: {filename}]</p>'
        else:
            return f'<p>[Image not found: {filename}]</p>'
    
    # Primero convertir markdown a HTML
    html = markdown.markdown(text, extensions=['tables', 'fenced_code'])
    
    # Luego reemplazar las imágenes en el HTML
    html = re.sub(
        r'<img[^>]*alt="([^"]*)"[^>]*src="[^"]*?([^/\\">]+\.(?:png|jpg|jpeg|gif))"[^>]*>',
        image_to_base64_lazy,
        html
    )
    
    return html

In [21]:
def format_search_results(results, approach_name):
    """Format search results for display in Markdown with figure handling"""
    if not results:
        return f"*No results found with {approach_name}*"

    lines = []
    lines.append(f"### {approach_name} Results")
    lines.append(f"**Found {len(results)} documents**\n")
    lines.append("---")
    
    for i, r in enumerate(results):
        doc_id = r.get("doc_id", "N/A")
        chunk_type_val = r.get("chunk_type", "N/A")
        page_number = r.get("page_number", "N/A")
        score = r.get("score", 0.0)
        catalogs = r.get("catalog_systems", [])
        scotts = r.get("scott_numbers", [])
        years = r.get("years", [])

        # Get full text (including figures)
        text = r.get("text", "")
        
        # Check if text contains figures
        has_figures = "![Figure]" in text or "![" in text
        
        # Extract just the text preview (without figures)
        import re
        text_without_figures = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text).strip()
        preview = (text_without_figures[:300] + "...") if len(text_without_figures) > 300 else text_without_figures
        
        # Extract all figure references
        figure_pattern = r'(!\[([^\]]*)\]\([^)]+\))'
        figures = re.findall(figure_pattern, text)

        lines.append(f"\n#### 📄 Result {i+1}")
        lines.append(f"**Score:** `{score:.3f}`")
        
        if has_figures:
            lines.append("🖼️ **This result contains figures**\n")
        
        # Create a table for metadata
        lines.append("| Field | Value |")
        lines.append("|-------|-------|")
        lines.append(f"| Document | `{doc_id}` |")
        lines.append(f"| Type | {chunk_type_val} |")
        lines.append(f"| Page | {page_number} |")
        
        if catalogs:
            lines.append(f"| Catalogs | {', '.join(catalogs)} |")
        if scotts:
            lines.append(f"| Scott Numbers | **{', '.join(scotts)}** |")
        if years:
            lines.append(f"| Years | {', '.join(str(y) for y in years)} |")
        
        # Always show preview
        lines.append(f"\n**Preview:**")
        lines.append(f"> {preview}")
        
        # Always show figures if they exist
        if has_figures and figures:
            lines.append(f"\n**Figures in this result:**\n")
            for figure_match in figures:
                lines.append(figure_match[0])  # Add the complete figure markdown
        
        lines.append("\n---")
    
    return "\n".join(lines)

In [22]:
def format_metadata(metadata, execution_time):
    """Format metadata for display in Markdown including token usage and costs (supports both basic and advanced structures)."""
    if not metadata:
        return "*No metadata available*"

    # Helpers
    def as_float(x, default=None):
        try:
            return float(x)
        except (TypeError, ValueError):
            return default

    def as_int(x, default=None):
        try:
            return int(float(x))
        except (TypeError, ValueError):
            return default

    out = []
    out.append("### Search Metadata\n")

    # Basic info
    out.append("#### 📊 Search Information\n")
    out.append("| Property | Value |")
    out.append("|----------|-------|")
    out.append(f"| **Approach** | {metadata.get('approach', 'Unknown')} |")
    query_val = str(metadata.get('query', 'N/A')).replace("\n", " ")
    # (opcional) escapar pipes para no romper la tabla
    query_val = query_val.replace("|", "\\|")
    out.append(f"| **Query** | `{query_val}` |")

    total_results = as_int(metadata.get('total_results'), 0)
    out.append(f"| **Results found** | {total_results} |")

    context_docs_count = as_int(metadata.get('context_docs_count'))
    out.append(f"| **Context docs** | {context_docs_count if context_docs_count is not None else 'N/A'} |")

    context_length = as_int(metadata.get('context_length'))
    out.append(
        f"| **Context length** | {context_length:,} chars |"
        if context_length is not None else
        "| **Context length** | N/A |"
    )

    if metadata.get('filters_used'):
        filters_str = str(metadata['filters_used']).replace('{', '').replace('}', '')
        filters_str = filters_str.replace("|", "\\|")
        out.append(f"| **Filters** | `{filters_str}` |")

    if 'compressed_docs' in metadata:
        out.append(f"| **Compressed docs** | {metadata['compressed_docs']} |")

    out.append("")  # Add space after basic info table

    # Detect token usage structure type
    token_usage = metadata.get('token_usage') or {}
    cost_info = metadata.get('cost_info') or {}

    # Check if this is nested structure (advanced search) or simple structure (basic search)
    is_nested = (
        isinstance(token_usage, dict) and
        any(key in token_usage for key in ['compression', 'rag_response', 'total'])
    )

    if token_usage:
        if is_nested:
            # Advanced search - nested structure
            out.append("#### 🎯 Token Usage Breakdown\n")

            # Compression tokens
            compression_tokens = token_usage.get('compression', {})
            if compression_tokens:
                comp_in = as_int(compression_tokens.get('input_tokens'), 0)
                comp_out = as_int(compression_tokens.get('output_tokens'), 0)
                comp_total = as_int(compression_tokens.get('total_tokens'), (comp_in or 0) + (comp_out or 0))

                out.append("**Document Compression:**\n")
                out.append("| Token Type | Count |")
                out.append("|------------|-------|")
                out.append(f"| Input tokens | {comp_in:,} |")
                out.append(f"| Output tokens | {comp_out:,} |")
                out.append(f"| Total tokens | {comp_total:,} |")
                out.append("")  # Add space after table

            # RAG response tokens
            rag_tokens = token_usage.get('rag_response', {})
            if rag_tokens:
                rag_in = as_int(rag_tokens.get('input_tokens'), 0)
                rag_out = as_int(rag_tokens.get('output_tokens'), 0)
                rag_total = as_int(rag_tokens.get('total_tokens'), (rag_in or 0) + (rag_out or 0))

                out.append("**RAG Response Generation:**\n")
                out.append("| Token Type | Count |")
                out.append("|------------|-------|")
                out.append(f"| Input tokens | {rag_in:,} |")
                out.append(f"| Output tokens | {rag_out:,} |")
                out.append(f"| Total tokens | {rag_total:,} |")
                out.append("")  # Add space after table

            # Total tokens
            total_tokens = token_usage.get('total', {})
            if total_tokens:
                total_in = as_int(total_tokens.get('input_tokens'), 0)
                total_out = as_int(total_tokens.get('output_tokens'), 0)
                total_total = as_int(total_tokens.get('total_tokens'), (total_in or 0) + (total_out or 0))

                out.append("**Combined Total:**\n")
                out.append("| Token Type | Count |")
                out.append("|------------|-------|")
                out.append(f"| **Total input tokens** | **{total_in:,}** |")
                out.append(f"| **Total output tokens** | **{total_out:,}** |")
                out.append(f"| **Grand total tokens** | **{total_total:,}** |")
                out.append("")  # Add space after table

        else:
            # Basic search - simple structure
            in_tok = as_int(token_usage.get('input_tokens'), 0)
            out_tok = as_int(token_usage.get('output_tokens'), 0)
            tot_tok = as_int(token_usage.get('total_tokens'), (in_tok or 0) + (out_tok or 0))

            out.append("#### 🎯 Token Usage\n")
            out.append("| Token Type | Count |")
            out.append("|------------|-------|")
            out.append(f"| **Input tokens** | {in_tok:,} |")
            out.append(f"| **Output tokens** | {out_tok:,} |")
            out.append(f"| **Total tokens** | {tot_tok:,} |")
            out.append("")  # Add space after table

    # Cost info - handle both nested and simple structures
    if cost_info:
        is_cost_nested = (
            isinstance(cost_info, dict) and
            any(key in cost_info for key in ['compression', 'rag_response', 'total'])
        )

        if is_cost_nested:
            # Advanced search - nested cost structure
            out.append("#### 💰 Cost Analysis Breakdown\n")

            # Compression costs
            compression_cost = cost_info.get('compression', {})
            if compression_cost:
                comp_in_cost = as_float(compression_cost.get('input_cost'), 0.0)
                comp_out_cost = as_float(compression_cost.get('output_cost'), 0.0)
                comp_total_cost = as_float(compression_cost.get('estimated_cost_usd'),
                                        (comp_in_cost or 0.0) + (comp_out_cost or 0.0))

                out.append("**Document Compression:**\n")
                out.append("| Cost Component | USD |")
                out.append("|----------------|-----|")
                out.append(f"| Input cost | ${comp_in_cost:.6f} |")
                out.append(f"| Output cost | ${comp_out_cost:.6f} |")
                out.append(f"| Subtotal | ${comp_total_cost:.6f} |")
                out.append("")  # Add space after table

            # RAG response costs
            rag_cost = cost_info.get('rag_response', {})
            if rag_cost:
                rag_in_cost = as_float(rag_cost.get('input_cost'), 0.0)
                rag_out_cost = as_float(rag_cost.get('output_cost'), 0.0)
                rag_total_cost = as_float(rag_cost.get('estimated_cost_usd'),
                                        (rag_in_cost or 0.0) + (rag_out_cost or 0.0))

                out.append("**RAG Response Generation:**\n")
                out.append("| Cost Component | USD |")
                out.append("|----------------|-----|")
                out.append(f"| Input cost | ${rag_in_cost:.6f} |")
                out.append(f"| Output cost | ${rag_out_cost:.6f} |")
                out.append(f"| Subtotal | ${rag_total_cost:.6f} |")
                out.append("")  # Add space after table

            # Total costs
            total_cost = cost_info.get('total', {})
            if total_cost:
                total_in_cost = as_float(total_cost.get('input_cost'), 0.0)
                total_out_cost = as_float(total_cost.get('output_cost'), 0.0)
                total_total_cost = as_float(total_cost.get('estimated_cost_usd'),
                                        (total_in_cost or 0.0) + (total_out_cost or 0.0))

                out.append("**Combined Total:**\n")
                out.append("| Cost Component | USD |")
                out.append("|----------------|-----|")
                out.append(f"| **Total input cost** | **${total_in_cost:.6f}** |")
                out.append(f"| **Total output cost** | **${total_out_cost:.6f}** |")
                out.append(f"| **Grand total cost** | **${total_total_cost:.6f}** |")
                out.append("")  # Add space after table

        else:
            # Basic search - simple cost structure
            in_cost = as_float(cost_info.get('input_cost'), 0.0)
            out_cost = as_float(cost_info.get('output_cost'), 0.0)
            est_cost = as_float(cost_info.get('estimated_cost_usd'), (in_cost or 0.0) + (out_cost or 0.0))

            out.append("#### 💰 Cost Analysis\n")
            out.append("| Cost Component | USD |")
            out.append("|----------------|-----|")
            out.append(f"| **Input cost** | ${in_cost:.6f} |")
            out.append(f"| **Output cost** | ${out_cost:.6f} |")
            out.append(f"| **Total cost** | **${est_cost:.6f}** |")
            out.append("")  # Add space after table

    # Performance
    out.append("#### ⏱️ Performance\n")
    out.append("| Metric | Time |")
    out.append("|--------|------|")

    gen_time = as_float(metadata.get('generation_time'))
    if gen_time is not None:
        out.append(f"| **Generation time** | {gen_time:.2f}s |")

    # Error handling
    if metadata.get('error'):
        err = str(metadata['error']).replace("`", "'")
        out.append(f"\n⚠️ **Error:** `{err}`")

    return "\n".join(out)

In [23]:
def progress_card(title: str, percent: int, note: str = "") -> str:
    pct = max(0, min(100, int(percent)))
    note_html = f'<div class="vf-note">{note}</div>' if note else ""
    return f"""
    <div class="vf-progress-card" role="status" aria-live="polite" aria-atomic="true">
      <div class="vf-title">{title}</div>
      <progress class="vf-progress" value="{pct}" max="100"></progress>
      {note_html}
    </div>
    """

In [24]:
def run_basic_search(
    query: str,
    year_start: Optional[str],
    year_end: Optional[str],
    scott_numbers: Optional[str],
    max_results: int,
    st_ctx: Dict[str, Any],                # gr.State dict para pasar datos a la fase 2
    progress: gr.Progress = gr.Progress(track_tqdm=True),
):
    """
    Ejecuta SOLO la búsqueda básica, pinta resultados rápidos y deja todo listo
    para que la avanzada continúe en un job separado.
    Devuelve 7 outputs visibles + 1 estado (st_ctx).
    """
    # Validaciones mínimas
    progress(0.02, desc="Validating...")
    if not query or not query.strip():
        err = markdown_to_html("❌ Please enter a query")
        return err, "", "", err, "", "", "No timing data available", st_ctx

    # Filtrado opcional (idéntico a tu lógica original, pero resumido aquí)
    processed_year_start = None
    processed_year_end = None
    if year_start and year_end:
        try:
            ys = int(str(year_start).strip())
            ye = int(str(year_end).strip())
            if 1800 <= ys <= 2025 and 1800 <= ye <= 2025:
                processed_year_start, processed_year_end = ys, ye
        except Exception:
            pass

    processed_scott_numbers = None
    if scott_numbers and str(scott_numbers).strip():
        nums = [s.strip() for s in str(scott_numbers).split(",") if s.strip()]
        processed_scott_numbers = nums or None

    filters_status = []
    if processed_year_start and processed_year_end:
        filters_status.append(f"Years: {processed_year_start}-{processed_year_end}")
    if processed_scott_numbers:
        filters_status.append(f"Scott: {', '.join(processed_scott_numbers)}")
    filter_msg = "Filters applied: " + (", ".join(filters_status) if filters_status else "None (searching all documents)")
    st_ctx = {
        "t0": time.time(),
        "filter_msg": filter_msg,
    }

    # Mensajes iniciales
    progress(0.08, desc="Preparing search…")
    gr.Info("Starting quick search...\n (wait time less than 60 seconds)")

    loading_basic = progress_card("🔄 Running Basic Hybrid Search…", 10, filter_msg)
    loading_advanced = progress_card("⏳ Waiting for Basic to complete…", 5, "Deep Analysis will start automatically.")


    # Un primer retorno (stream) para pintar "waiting"
    yield (
        loading_basic,  # basic_answer_html
        "",             # basic_search_html
        "",             # basic_metadata_html
        loading_advanced,  # advanced_answer_html (placeholder)
        "",             # advanced_search_html
        "",             # advanced_metadata_html
        f"⏱️ Basic search in progress...\n{filter_msg}",  # timing
        st_ctx          # state
    )

    # --------- Ejecutar BÚSQUEDA BÁSICA ----------
    progress(0.35, desc="Quick Search…")
    t_basic_start = time.time()
    # Llama a tu función real:
    basic_results_data = search_and_answer_basic(  # type: ignore[name-defined]
        query=query,
        rag_system=rag_system,                     # usa el capturado en create_gradio_interface
        year_start=processed_year_start,
        year_end=processed_year_end,
        scott_numbers=processed_scott_numbers,
        max_results=int(max_results),
    )
    t_basic_end = time.time()

    basic_answer = basic_results_data["answer"]
    basic_results = basic_results_data["results"]
    basic_metadata = basic_results_data["metadata"]
    basic_execution_time = basic_metadata.get("generation_time", t_basic_end - t_basic_start)

    # Render
    basic_answer_html = markdown_to_html(basic_answer)
    basic_search_html = markdown_to_html(format_search_results(basic_results, "Basic Hybrid Search"))
    basic_metadata_html = markdown_to_html(format_metadata(basic_metadata, basic_execution_time))

    timing_partial = (
        "⏱️ EXECUTION TIMING (Partial)\n\n"
        "Basic Hybrid Search: ✅ COMPLETED\n"
        f"• Time: {basic_execution_time:.2f}s\n"
        f"• Results: {len(basic_results)}\n"
        f"• {filter_msg}\n\n"
        "Advanced Search: ⏳ STARTING...\n"
    )
    progress(0.6, desc="Crafting the Deep Research…")
    gr.Info("Done Quick Search \n Beginning advanced research (wait 3-5 minutes) …")

    # Actualiza el state para la fase 2
    st_ctx.update({
        "basic_metadata": basic_metadata,
        "basic_exec": float(basic_execution_time),
        "basic_results_count": int(len(basic_results)),
    })

    # Devuelve resultados básicos + placeholder para avanzada
    yield (
        basic_answer_html,
        basic_search_html,
        basic_metadata_html,
        progress_card("🚀 Deep Analysis running…", 70, filter_msg),
        "",
        "",
        timing_partial,
        st_ctx
    )



In [25]:
def run_advanced_search(
    query: str,
    max_results: int,
    st_ctx: Dict[str, Any],
    progress: gr.Progress = gr.Progress(track_tqdm=True),
):
    progress(0.05, desc="Beginning deep research…")
    if not query or not query.strip():
        err = markdown_to_html("❌ Please enter a query")
        # frame final directo (sin dejar pending)
        yield err, "", "", "No timing data available", st_ctx
        return

    t0 = st_ctx.get("t0", time.time())
    filter_msg = st_ctx.get("filter_msg", "Filters applied: None")
    basic_metadata = st_ctx.get("basic_metadata", {})
    basic_exec = float(st_ctx.get("basic_exec", 0.0))
    basic_results_count = int(st_ctx.get("basic_results_count", 0))

    # 👉 placeholder visible (reemplaza lo anterior)
    yield (
        progress_card("🚀 Deep Analysis running…", 70, filter_msg),
        "",  # advanced_search_html
        "",  # advanced_metadata_html
        f"⏱️ EXECUTION TIMING (Partial)\n\nBasic: ✅ {basic_exec:.2f}s  |  Advanced: ⏳ running…\n{filter_msg}",
        st_ctx
    )

    try:
        # --------- Ejecutar BÚSQUEDA AVANZADA ----------
        progress(0.4, desc="Deep analysis…")
        t_adv_start = time.time()
        advanced_results_data = search_and_answer_advanced(  # type: ignore[name-defined]
            query=query,
            rag_system=rag_system,
            max_results=int(max_results),
        )
        t_adv_end = time.time()

        advanced_answer = advanced_results_data.get("answer", "")
        advanced_results = advanced_results_data.get("results", [])
        advanced_metadata = advanced_results_data.get("metadata", {})
        advanced_execution_time = advanced_metadata.get("generation_time", t_adv_end - t_adv_start)

        # Render
        advanced_answer_html = markdown_to_html(advanced_answer or "ℹ️ No answer produced.")
        advanced_search_html = markdown_to_html(format_search_results(advanced_results, "Advanced Compression Search"))
        advanced_metadata_html = markdown_to_html(format_metadata(advanced_metadata, advanced_execution_time))

        total_execution_time = time.time() - t0
        timing_final = format_timing_display(
            basic_exec,
            float(advanced_execution_time),
            float(total_execution_time),
            basic_results_count,
            len(advanced_results),
            filter_msg,
            basic_metadata,
            advanced_metadata
        )

        progress(1.0, desc="Done")
        gr.Info("In-depth analysis completed ✅")

        # ✅ FRAME FINAL: usa YIELD (no return) para reemplazar la barra
        yield (
            advanced_answer_html,
            advanced_search_html,
            advanced_metadata_html,
            timing_final,
            st_ctx
        )

    except Exception as e:
        # ✅ Si algo falla, reemplaza también el placeholder
        err_html = markdown_to_html(f"❌ Advanced phase failed:\n\n```\n{e}\n```")
        yield err_html, "", "", "Advanced phase failed — see error above.", st_ctx


In [26]:
# THEME - Solo lo básico (paleta "sage")
philatelic_theme = gr.themes.Soft().set(
    button_primary_background_fill="#6b8e7f",
    button_primary_background_fill_hover="#4a6558",
    button_primary_text_color="#ffffff",
    button_secondary_background_fill="#f5f5f5",
    button_secondary_background_fill_hover="#e8e8e8",
    button_secondary_text_color="#1f2937",
    body_background_fill="#ffffff",
    input_background_fill="#ffffff",
    input_border_color="#bfc8c3",
    slider_color="#6b8e7f",
    # opcionales si tu versión lo permite:
    # link_text_color="#4a6558",
    # link_text_color_hover="#2c5530",
)



In [27]:
css = """
/* ========================================
   CONFIGURACIÓN BASE - TEMA FILATELIA
   ======================================== */

/* Paleta base */
:root{
  --vf-ink:#1f2937;           /* texto principal */
  --vf-muted:#475569;         /* texto secundario */
  --vf-accent:#6b8e7f;        /* verde suave (primario) */
  --vf-accent-dark:#4a6558;   /* verde oscuro (hover/foco) */
  --vf-surface:#f3f4f6;       /* superficies suaves */
  --vf-border:#cfd8d4;        /* bordes suaves */
}

/* Reset & tipografía base */
body, .gradio-container{
  font-family:'Georgia','Times New Roman',serif !important;
  font-size:20px !important;
  color:var(--vf-ink) !important;
  line-height:1.6 !important;
  -webkit-font-smoothing:antialiased !important;
  -moz-osx-font-smoothing:grayscale !important;
  text-rendering:optimizeLegibility !important;
}
.gradio-container *{ font-size:inherit !important; }
.gradio-container svg{ font-size:initial !important; }

/* Enlaces (por si los usas en Markdown) */
a{ color:var(--vf-accent) !important; text-decoration:none !important; }
a:hover{ text-decoration:underline !important; }

/* ========================================
   ENCABEZADOS
   ======================================== */
h1{
  color:var(--vf-ink) !important;
  font-weight:700 !important;
  font-size:36px !important;
  margin:0 0 20px !important;
  letter-spacing:.2px !important; /* nitidez en Windows */
}
h2,h3{
  color:var(--vf-ink) !important;
  font-weight:700 !important;
  font-size:26px !important;
  margin:20px 0 12px !important;
  letter-spacing:.2px !important;
}

/* ========================================
   TEXTO GENERAL
   ======================================== */
p,.markdown-text{ color:var(--vf-ink) !important; font-size:22px !important; line-height:1.7 !important; }
label{
  font-size:22px !important; font-weight:700 !important; color:var(--vf-ink) !important;
  margin-bottom:8px !important; display:block !important;
}

/* ========================================
   INPUTS Y FORMULARIOS
   ======================================== */
input[type="text"], textarea{
  font-size:22px !important; padding:16px !important; color:var(--vf-ink) !important;
  background:#fff !important; border:2px solid var(--vf-border) !important; border-radius:8px !important;
  transition:border-color .2s ease, box-shadow .2s ease !important;
}
input[type="text"]:focus, textarea:focus{
  border-color:var(--vf-accent) !important;
  outline:none !important;
  box-shadow:0 0 0 3px rgba(107,142,127,.18) !important;
}
input::placeholder, textarea::placeholder{ color:#9ca3af !important; font-size:20px !important; }
select{
  font-size:20px !important; padding:14px !important; color:var(--vf-ink) !important;
  background:#fff !important; border:2px solid var(--vf-border) !important; border-radius:8px !important;
}

/* ========================================
   SLIDER
   ======================================== */
.gr-box:has(input[type="range"]) > .block > .wrap > span:first-child{
  font-size:20px !important; font-weight:700 !important; display:block !important; margin-bottom:10px !important;
}
.gr-box:has(input[type="range"]) .wrap{
  display:flex !important; flex-direction:row !important; align-items:center !important; gap:15px !important; width:100% !important;
}
.gr-box input[type="number"]{
  width:80px !important; height:40px !important; padding:8px !important; font-size:18px !important;
  border:1px solid var(--vf-border) !important; border-radius:6px !important; background:#fff !important; font-weight:700 !important; text-align:center !important;
}
input[type="range"]{
  flex:1 !important; -webkit-appearance:none !important; height:8px !important;
  background:#e5e7eb !important; border-radius:9999px !important; outline:none !important;
}
input[type="range"]::-webkit-slider-thumb{
  -webkit-appearance:none !important; width:20px !important; height:20px !important;
  background:var(--vf-accent) !important; border-radius:50% !important; cursor:pointer !important;
  transition:transform .15s ease, background .15s ease !important;
}
input[type="range"]::-webkit-slider-thumb:hover{ background:#5a7a6b !important; transform:scale(1.03) !important; }
input[type="range"]::-moz-range-thumb{
  width:20px !important; height:20px !important; background:var(--vf-accent) !important;
  border-radius:50% !important; border:none !important; cursor:pointer !important;
}
.gr-box:has(input[type="range"]) input[readonly],
.gr-box:has(input[type="range"]) span:last-child{
  font-size:20px !important; font-weight:700 !important; color:var(--vf-ink) !important;
  background:#f5f5f5 !important; padding:10px 14px !important; border:1px solid var(--vf-border) !important; border-radius:6px !important; text-align:center !important; min-width:72px !important;
}

/* ========================================
   BOTONES
   ======================================== */
button{
  font-size:22px !important; font-weight:700 !important; padding:16px 32px !important; border-radius:10px !important;
  transition:transform .15s ease, box-shadow .15s ease, background .2s ease !important; cursor:pointer !important;
}
#main-search-btn, button.primary{
  background:linear-gradient(135deg, var(--vf-accent) 0%, #5a7a6b 100%) !important;
  color:#fff !important; border:2px solid #5a7a6b !important; letter-spacing:.3px !important;
}
#main-search-btn:hover, button.primary:hover{
  transform:translateY(-2px) !important; box-shadow:0 6px 12px rgba(107,142,127,.3) !important;
  background:linear-gradient(135deg, #5a7a6b 0%, var(--vf-accent-dark) 100%) !important;
}
button.secondary, button:not(.primary):not([role="tab"]):not([aria-expanded]){
  background:#f5f5f5 !important; color:var(--vf-ink) !important; border:2px solid var(--vf-border) !important;
}
button.secondary:hover, button:not(.primary):not([role="tab"]):not([aria-expanded]):hover{
  background:#e8e8e8 !important; border-color:var(--vf-accent) !important;
}
/* Focus visible (accesible) */
button:focus-visible, input:focus-visible, textarea:focus-visible, select:focus-visible{
  outline:3px solid rgba(107,142,127,.45) !important; outline-offset:2px !important;
}

/* ========================================
   TABS
   ======================================== */
button[role="tab"]{
  background:#f5f5f5 !important; color:var(--vf-ink) !important; border:1px solid var(--vf-border) !important;
  font-size:20px !important; padding:14px 20px !important; margin-right:4px !important; border-radius:10px 10px 0 0 !important;
}
button[role="tab"][aria-selected="true"]{
  background:var(--vf-accent) !important; color:#fff !important; border-color:var(--vf-accent) !important;
}
button[role="tab"]:hover:not([aria-selected="true"]){ background:#e8e8e8 !important; }

/* ========================================
   ACCORDIONS
   ======================================== */
.accordion{
  border:1px solid #e5e7eb !important; border-radius:12px !important; margin:16px 0 !important; background:#fff !important; overflow:hidden !important;
}
button[aria-expanded]{
  background:#fafafa !important; border:none !important; padding:16px 24px !important; width:100% !important; text-align:left !important;
  font-size:20px !important; font-weight:700 !important; color:var(--vf-ink) !important; transition:background .2s !important;
}
button[aria-expanded]:hover{ background:#f0f0f0 !important; }
button[aria-expanded="true"]{
  background:linear-gradient(135deg, #f0f5f3 0%, #e5ede9 100%) !important; color:#2c5530 !important;
}

/* ========================================
   PROGRESS Y ESTADOS
   ======================================== */
.progress-bar{
  background:linear-gradient(90deg, var(--vf-accent) 0%, #8fa89e 50%, var(--vf-accent) 100%) !important;
  height:4px !important; border-radius:2px !important; opacity:.95 !important;
}
.progress-container{ background:var(--vf-surface) !important; border-radius:6px !important; margin:10px 0 !important; }
.progress-text{ color:#4b5563 !important; font-size:16px !important; font-weight:600 !important; margin:8px 0 !important; }

/* <progress> nativo (para progress_card) */
progress{
  appearance:none; -webkit-appearance:none; width:100%; height:10px; border:none; background:transparent;
}
progress::-webkit-progress-bar{ background:#e5e7eb; border-radius:6px; }
progress::-webkit-progress-value{ background:linear-gradient(90deg, var(--vf-accent), var(--vf-accent-dark)); border-radius:6px; }
progress::-moz-progress-bar{ background:linear-gradient(90deg, var(--vf-accent), var(--vf-accent-dark)); border-radius:6px; }

/* Estado “generating” */
.generating{
  border:2px solid var(--vf-accent) !important;
  background:linear-gradient(135deg, #f0f5f3 0%, #e5ede9 100%) !important;
  animation:pulse-border 2s infinite !important;
}
@keyframes pulse-border{
  0%,100%{ border-color:var(--vf-accent); box-shadow:0 0 0 0 rgba(107,142,127,.4); }
  50%{ border-color:#8fa89e; box-shadow:0 0 0 4px rgba(107,142,127,.1); }
}

/* ========================================
   INDICADORES Y MISC
   ======================================== */
#status-indicator{
  background:linear-gradient(135deg, var(--vf-accent) 0%, #5a7a6b 100%) !important;
  color:#fff !important; padding:20px !important; border-radius:12px !important; text-align:center !important;
  font-size:18px !important; font-weight:700 !important; margin:20px 0 !important; box-shadow:0 4px 8px rgba(107,142,127,.2) !important;
}
#status-indicator.success{ background:linear-gradient(135deg, #7fb069 0%, #6b9956 100%) !important; }
.timer-display{
  font-family:'Courier New',monospace !important; font-size:16px !important; color:var(--vf-ink) !important;
  padding:10px !important; background:var(--vf-surface) !important; border-radius:6px !important; text-align:center !important;
}

/* ========================================
   UTILIDADES
   ======================================== */
.loading-text{ color:var(--vf-muted) !important; font-style:normal !important; font-size:18px !important; }
hr{ border:none !important; border-top:2px solid #e5e7eb !important; margin:30px 0 !important; }
.group{ background:#fff !important; border-radius:10px !important; padding:20px !important; margin:10px 0 !important; }
input[type="checkbox"]+label, input[type="radio"]+label{ font-size:20px !important; margin-left:8px !important; }
.results-container{ padding:20px !important; background:#fafafa !important; border-radius:10px !important; margin:10px 0 !important; }

/* Progress-card con contraste alto */
.vf-progress-card{ color:var(--vf-ink) !important; }
.vf-progress-card .vf-title{ font-weight:600; letter-spacing:.2px; margin-bottom:6px; color:var(--vf-ink) !important; }
.vf-progress-card .vf-note{ margin-top:6px; font-size:16px; color:var(--vf-ink) !important; opacity:.9 !important; }
.vf-progress{ width:100%; height:10px; background:transparent; appearance:none; -webkit-appearance:none; }
.vf-progress::-webkit-progress-bar{ background:#e7eeea !important; border-radius:6px; }
.vf-progress::-webkit-progress-value{ background:linear-gradient(90deg, var(--vf-accent), var(--vf-accent-dark)) !important; border-radius:6px; }
.vf-progress::-moz-progress-bar{ background:linear-gradient(90deg, var(--vf-accent), var(--vf-accent-dark)) !important; border-radius:6px; }

/* Quita panel gris SOLO al H2 del Step 1 (Group .input-section) */
.input-section .gr-box:has(.gr-markdown){
  background:transparent !important; border:none !important; box-shadow:none !important; padding:0 !important;
}
.input-section .gr-markdown h2, .input-section .gr-markdown h3{ margin-top:0 !important; color:var(--vf-ink) !important; }

/* Quitar pill azul de labels de inputs (Soft theme) */
.gradio-container label,
.gradio-container label span,
.gradio-container .label,
.gradio-container .label span,
.gradio-container .wrap > label,
.gradio-container .block .wrap > label {
  background: transparent !important;
  box-shadow: none !important;
  border: none !important;
}

/* Ajuste fino del texto del label */
.gradio-container label span,
.gradio-container .label span {
  padding: 0 !important;
  border-radius: 0 !important;
  color: var(--vf-ink) !important;
  font-weight: 700 !important;
  letter-spacing: .2px !important; /* mejora nitidez en Windows */
}

/* --- Quitar la barra gris del Step 1 (Group: .input-section) --- */

/* Aplana cualquier caja/panel que Gradio mete dentro del Group */
.input-section .gr-box,
.input-section .gr-panel,
.input-section .gr-box > .form,
.input-section .gr-box > .container,
.input-section .prose {
  background: transparent !important;
  border: none !important;
  box-shadow: none !important;
}

/* El bloque específico que contiene el Markdown del título */
.input-section .gr-box:has(.gr-markdown){
  background: transparent !important;
  border: none !important;
  box-shadow: none !important;
  padding: 0 !important;
}

/* Opcional: un separador sutil debajo del H2 para que no se vea “flotando” */
.input-section .gr-markdown h2{
  margin: 0 0 8px !important;
  padding-bottom: 8px !important;
  border-bottom: 1px solid #e5e7eb !important;
}

/* Evita “píldoras” o fondos raros en labels dentro del Step 1 */
.input-section label,
.input-section label span {
  background: transparent !important;
  box-shadow: none !important;
  border: none !important;
  padding: 0 !important;
  color: var(--vf-ink) !important;
}


/* Tarjeta blanca suave para el área de Step 1 (opcional) */
.input-section{
  background: #fff !important;
  border: 1px solid #eef2f0 !important;
  border-radius: 12px !important;
  padding: 16px !important;
}

/* --- Alto contraste para Markdown: evita itálica gris en .prose --- */
.gradio-container .prose em,
.gradio-container .prose i {
  font-style: normal !important;
  color: var(--vf-ink) !important;
  opacity: 1 !important;
}
.gradio-container .prose strong { color: var(--vf-ink) !important; }

/* --- Progress plano (sin degradado) y más visible --- */
:root{
  --vf-progress-bg:#e1e7e4;      /* pista */
  --vf-progress-fill:#5f7f72;    /* relleno sólido */
}

.vf-progress{
  width:100%; height:10px;
  appearance:none; -webkit-appearance:none;
  background:transparent; border:none;
}
.vf-progress::-webkit-progress-bar{
  background: var(--vf-progress-bg) !important;
  border-radius: 6px;
}
.vf-progress::-webkit-progress-value{
  background: var(--vf-progress-fill) !important;   /* <- sólido */
  border-radius: 6px;
}
.vf-progress::-moz-progress-bar{
  background: var(--vf-progress-fill) !important;   /* <- sólido */
  border-radius: 6px;
}

/* Por si algún progress sin clase aparece, aplica el mismo estilo */
progress{
  appearance:none; -webkit-appearance:none;
  width:100%; height:10px; background:transparent; border:none;
}
progress::-webkit-progress-bar{ background: var(--vf-progress-bg); border-radius:6px; }
progress::-webkit-progress-value{ background: var(--vf-progress-fill); border-radius:6px; }
progress::-moz-progress-bar{ background: var(--vf-progress-fill); border-radius:6px; }

/* Títulos/nota del card bien nítidos */
.vf-progress-card .vf-title{
  font-weight:600; letter-spacing:.2px; margin-bottom:6px; color:var(--vf-ink) !important;
}
.vf-progress-card .vf-note{
  margin-top:6px; font-size:16px; color:var(--vf-ink) !important; opacity:.92 !important;
}

/* El panel gris viene del .gr-box que envuelve el Markdown dentro del Group.
   Lo aplanamos SOLO cuando contiene #step1_title */
.input-section .gr-box:has(#step1_title) {
  background: transparent !important;
  border: none !important;
  box-shadow: none !important;
  padding: 0 !important;
}
/* Opcional: separador sutil bajo el H2 */
#step1_title .prose h2 {
  margin: 0 0 8px !important;
  padding-bottom: 8px !important;
  border-bottom: 1px solid #e5e7eb !important;
}

/* Fuerza alto contraste SOLO en los outputs donde metes el progress_card */
#basic_answer .vf-progress-card,
#advanced_answer .vf-progress-card,
#basic_answer .vf-progress-card *,
#advanced_answer .vf-progress-card * {
  color: var(--vf-ink) !important;
  opacity: 1 !important;
  font-style: normal !important;
  text-shadow: none !important;
}

/* Barra de progreso sólida y visible */
:root{
  --vf-progress-bg: #d9e3df;   /* pista */
  --vf-progress-fill: #3f5f52; /* relleno */
}
#basic_answer .vf-progress,
#advanced_answer .vf-progress {
  appearance: none; -webkit-appearance: none;
  width: 100%; height: 10px; border: none; background: transparent;
  opacity: 1 !important; filter: none !important;
}
#basic_answer .vf-progress::-webkit-progress-bar,
#advanced_answer .vf-progress::-webkit-progress-bar { 
  background: var(--vf-progress-bg) !important; border-radius: 6px; 
}
#basic_answer .vf-progress::-webkit-progress-value,
#advanced_answer .vf-progress::-webkit-progress-value { 
  background: var(--vf-progress-fill) !important; border-radius: 6px; 
}
#basic_answer .vf-progress::-moz-progress-bar,
#advanced_answer .vf-progress::-moz-progress-bar {
  background: var(--vf-progress-fill) !important; border-radius: 6px;
}

/* === Quitar fade de Gradio en outputs mientras están "pending" === */
#basic_answer .html-container.pending,
#advanced_answer .html-container.pending,
#basic_answer .pending,
#advanced_answer .pending {
  opacity: 1 !important;
  filter: none !important;
}

/* Asegura que el wrapper de markdown tampoco reduzca contraste */
#basic_answer .prose,
#advanced_answer .prose {
  opacity: 1 !important;
  color: var(--vf-ink) !important;
}

/* Por si el progress queda afectado por herencia/opacidad */
#basic_answer .vf-progress,
#advanced_answer .vf-progress {
  opacity: 1 !important;
  filter: none !important;
}

/* ==== STEP 1: elimina la franja gris del wrapper .styler y del block del título ==== */

/* Apaga los “gaps” grises solo en el styler que contiene #step1_title */
.styler:has(#step1_title) {
  --layout-gap: 0px !important;
  --form-gap-width: 0px !important;
}

/* Aplana cualquier fondo/sombra que el theme ponga alrededor del título y sus contenedores */
#step1_title.block,
#step1_title.block .svelte-vuh1yp,
#step1_title [data-testid="markdown"],
#step1_title [data-testid="markdown"] .prose,
#step1_title [data-testid="markdown"] .md {
  background: transparent !important;
  border: none !important;
  box-shadow: none !important;
  padding: 0 !important;
  margin: 0 !important;
}

/* El .form de Step 1 (el área del textarea) también sin fondo */
.styler:has(#step1_title) .form {
  background: transparent !important;
  border: none !important;
  box-shadow: none !important;
}

/* Título con separador sutil (opcional) */
#step1_title .prose h2 {
  margin: 0 0 8px !important;
  padding-bottom: 8px !important;
  border-bottom: 1px solid #e5e7eb !important;
}

/* ==== PROGRESS CARD: mata el fade del wrapper “pending” y sube contraste ==== */

/* Gradio desvanece outputs con .pending: quítalo donde pintas el progress_card */
#basic_answer .html-container.pending,
#advanced_answer .html-container.pending,
#basic_answer .pending,
#advanced_answer .pending {
  opacity: 1 !important;
  filter: none !important;
}

/* Evita que .prose herede opacidad baja/itálica del theme */
#basic_answer .prose, #advanced_answer .prose {
  opacity: 1 !important;
  color: var(--vf-ink) !important;
  font-style: normal !important;
  text-shadow: none !important;
}

/* Fuerza contraste dentro del card */
#basic_answer .vf-progress-card,
#advanced_answer .vf-progress-card,
#basic_answer .vf-progress-card * ,
#advanced_answer .vf-progress-card * {
  color: var(--vf-ink) !important;
  opacity: 1 !important;
  font-style: normal !important;
}

/* Barra sólida (sin degradado), más oscura para que se note bien */
:root{
  --vf-progress-bg: #cbd7d2;   /* pista un poco más marcada */
  --vf-progress-fill: #2f4a41; /* relleno sólido oscuro */
}
#basic_answer .vf-progress, #advanced_answer .vf-progress {
  appearance: none; -webkit-appearance: none;
  width: 100%; height: 10px; border: none; background: transparent;
}
#basic_answer .vf-progress::-webkit-progress-bar,
#advanced_answer .vf-progress::-webkit-progress-bar { 
  background: var(--vf-progress-bg) !important; border-radius: 6px; 
}
#basic_answer .vf-progress::-webkit-progress-value,
#advanced_answer .vf-progress::-webkit-progress-value { 
  background: var(--vf-progress-fill) !important; border-radius: 6px; 
}
#basic_answer .vf-progress::-moz-progress-bar,
#advanced_answer .vf-progress::-moz-progress-bar {
  background: var(--vf-progress-fill) !important; border-radius: 6px;
}

/* === STEP 1: mata la franja gris del bloque del título === */

/* 1) Anula cualquier fondo/borde/sombra del bloque #step1_title */
#step1_title.block {
  /* variables que usa el Soft theme para pintar paneles */
  --block-background-fill: transparent !important;
  --panel-background-fill: transparent !important;
  --section-background-fill: transparent !important;
  --block-label-background-fill: transparent !important;
  --block-title-background-fill: transparent !important;
  --block-border-width: 0px !important;

  background: transparent !important;
  background-image: none !important;
  border: none !important;
  box-shadow: none !important;
  padding: 0 !important;     /* el .padded agrega relleno */
  margin: 0 !important;
  overflow: visible !important; /* evita “tiras” de fondo por overflow */
}

/* 2) Por si el tema usa pseudo-elementos para el panel */
#step1_title.block::before,
#step1_title.block::after {
  content: none !important;
  display: none !important;
  background: transparent !important;
  box-shadow: none !important;
  border: none !important;
}

/* 3) Aplana wrappers internos que en tu DOM rodean al h2 */
#step1_title .svelte-vuh1yp,
#step1_title [data-testid="markdown"],
#step1_title [data-testid="markdown"] .prose,
#step1_title [data-testid="markdown"] .md {
  background: transparent !important;
  background-image: none !important;
  border: none !important;
  box-shadow: none !important;
  padding: 0 !important;
  margin: 0 !important;
}

/* 4) Si el gris venía del “gap” del layout, desactívalo en este bloque */
#step1_title.block[style*="--block-border-width"] {
  --block-border-width: 0px !important;
}
#step1_title.block {
  --layout-gap: 0px !important;
  --form-gap-width: 0px !important;
}

/* 5) La clase .padded puede dar fondo en algunos temas */
#step1_title.padded { background: transparent !important; }

/* 6) (Opcional) separador sutil bajo el H2 para no dejarlo “flotando” */
#step1_title .prose h2 {
  margin: 0 0 8px !important;
  padding-bottom: 8px !important;
  border-bottom: 1px solid #e5e7eb !important;
}

/* ==== 1) Mata el gap/fondo del contenedor que rodea al Step 1 ==== */
/* Pinta el contenedor en blanco y pone gap=0 para que no se vea una franja */
.styler:has(#step1_title) {
  background: #fff !important;
  gap: 0 !important;
  row-gap: 0 !important;
  column-gap: 0 !important;
  --layout-gap: 0 !important;
  --form-gap-width: 0 !important;
}
.styler:has(#step1_title)::before,
.styler:has(#step1_title)::after {
  content: none !important;
  background: #fff !important;
  box-shadow: none !important;
  border: none !important;
}

/* ==== 2) Aplana el panel del propio bloque del título ==== */
#step1_title {
  /* Si el theme usa variables para fondos/bordes del bloque, anúlalas aquí */
  --block-background-fill: transparent !important;
  --panel-background-fill: transparent !important;
  --section-background-fill: transparent !important;
  --block-label-background-fill: transparent !important;
  --block-title-background-fill: transparent !important;
  --block-border-width: 0px !important;

  background: #fff !important;        /* el bloque en sí, blanco sólido */
  border: none !important;
  box-shadow: none !important;
  padding: 0 !important;               /* .padded suele meter relleno */
  margin: 0 !important;
  overflow: visible !important;
}
#step1_title::before,
#step1_title::after {
  content: none !important;
  background: transparent !important;
  box-shadow: none !important;
  border: none !important;
}

/* Wrappers internos del markdown del H2: sin fondo alguno */
#step1_title .svelte-vuh1yp,
#step1_title [data-testid="markdown"],
#step1_title [data-testid="markdown"] .prose,
#step1_title [data-testid="markdown"] .md {
  background: transparent !important;
  background-image: none !important;
  border: none !important;
  box-shadow: none !important;
  padding: 0 !important;
  margin: 0 !important;
}

/* (Opcional) separador sutil bajo el H2 */
#step1_title .prose h2{
  margin: 0 0 8px !important;
  padding-bottom: 8px !important;
  border-bottom: 1px solid #e5e7eb !important;
}





"""

In [28]:
def create_gradio_interface(rag_system: Dict[str, Any]) -> gr.Blocks:
    """
    UI con dos fases encadenadas: rápida -> avanzada.
    Progress bar visible pero no intrusivo, accordions colapsables.
    """
    # Tema/CSS con fallback
    try:
        theme = philatelic_theme  # type: ignore[name-defined]
    except NameError:
        theme = gr.themes.Soft()

    try:
        css_styles = css
    except NameError:
        css_styles = """
        .progress-bar {
            background: #3b82f6 !important;
            height: 4px !important;
        }
        .generating {
            border: 2px solid #3b82f6 !important;
            background: #f0f9ff !important;
        }
        """

    # Ejemplos
    def set_example_1(): return "Tell me about the Costa Rica 1907 inverted centers"
    def set_example_2(): return "Show me Costa Rican overprinted stamps with varieties or errors"
    def set_example_3(): return "1934 airmail definitive issue with catalog values C15-C27"
    def set_example_4(): return "Tell me about the first issue crack plate varieties"
    def set_example_5(): return "Costa Rican stamps with perforation errors or printing varieties"
    def set_example_6(): return "Research mirror impression stamps from Costa Rica"

    collection_name = rag_system.get("collection_name", "Oxcart")
    total_docs = rag_system.get("total_documents", 0)
    total_chunks = rag_system.get("total_chunks", 0)

    with gr.Blocks(title="Costa Rica Philatelic Research System", theme=theme, css=css_styles) as interface:
        gr.Markdown(
            "# 🔍 Costa Rica Philatelic Research System\n"
            "**The smart way to research stamps** - AI-driven exploration of Costa Rican philately"
        )

        # =================== STEP 1: Query ===================
        with gr.Group(elem_classes=["input-section"]):
            gr.Markdown("## 📝 Step 1: Enter Your Question", elem_id="step1_title")
            query_input = gr.Textbox(
                label="What would you like to know about Costa Rican stamps?",
                placeholder="Type your question here... (e.g., What are the Guanacaste vertical overprints?)",
                lines=3,
                elem_id="main-query-input"
            )
            search_btn = gr.Button("🔍 SEARCH NOW", variant="primary", size="lg", elem_id="main-search-btn")

            with gr.Accordion("💡 Need ideas? Click here for example questions", open=False):
                gr.Markdown("Click any example below to use it:")
                with gr.Row():
                    with gr.Column():
                        btn1 = gr.Button("📮 1907 inverted centers", variant="secondary", size="sm")
                        btn2 = gr.Button("📮 Overprinted varieties", variant="secondary", size="sm")
                        btn3 = gr.Button("📮 1934 airmail stamps", variant="secondary", size="sm")
                    with gr.Column():
                        btn4 = gr.Button("📮 First issue crack plate", variant="secondary", size="sm")
                        btn5 = gr.Button("📮 Perforation errors", variant="secondary", size="sm")
                        btn6 = gr.Button("📮 Mirror impression stamps", variant="secondary", size="sm")

        # =================== Advanced Options ===================
        with gr.Accordion("⚙️ Advanced Options (Optional - Click to expand)", open=False, elem_classes=["accordion"]):
            gr.Markdown("**These filters are completely optional.** Leave them empty to search all documents.")
            with gr.Row():
                with gr.Column():
                    year_start = gr.Textbox(label="Filter by Start Year", value="", placeholder="e.g., 1907 (optional)")
                    year_end = gr.Textbox(label="Filter by End Year", value="", placeholder="e.g., 1910 (optional)")
                with gr.Column():
                    scott_numbers = gr.Textbox(
                        label="Filter by Scott Numbers",
                        placeholder="e.g., 1,2,3 or C15 (optional)",
                        value=""
                    )
                    max_results = gr.Slider(
                        minimum=20, maximum=100, value=60, step=10,
                        label="Number of results to show",
                        elem_id="max-results-slider"
                    )

        # =================== Results Section ===================
        gr.Markdown("---")
        gr.Markdown("## 📊 Step 2: View Your Results")

        # =================== QUICK SEARCH RESULTS (ACCORDION) ===================
        with gr.Accordion("📄 Quick Search Results - Fast vector & keyword matching", open=True, elem_classes=["accordion"]):
            gr.Markdown("### 💬 AI Answer:")
            basic_answer_output = gr.HTML(
                value="<p class='loading-text'>Answer will appear here after search</p>",
                elem_id="basic_answer"
            )
            
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 📚 Source Documents:")
                    basic_search_output = gr.HTML(
                        "<p class='loading-text'>Documents will appear here</p>",
                        elem_id="basic_search"
                    )
                with gr.Column(scale=1):
                    gr.Markdown("### ℹ️ Search Details:")
                    basic_metadata_output = gr.HTML(
                        "<p class='loading-text'>Details will appear here</p>",
                        elem_id="basic_metadata"
                    )

        # =================== DEEP ANALYSIS RESULTS (ACCORDION) ===================  
        with gr.Accordion("🚀 Deep Analysis Results - Comprehensive AI-powered analysis", open=True, elem_classes=["accordion"]):
            gr.Markdown("### 💬 AI Answer:")
            advanced_answer_output = gr.HTML(
                value="<p class='loading-text'>Answer will appear here after search</p>",
                elem_id="advanced_answer"
            )
            
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 📚 Source Documents:")
                    advanced_search_output = gr.HTML(
                        "<p class='loading-text' style='color: #6b7280; font-style: italic;'>Documents will appear here</p>",
                        elem_id="advanced_search"
                    )
                with gr.Column(scale=1):
                    gr.Markdown("### ℹ️ Search Details:")
                    advanced_metadata_output = gr.HTML(
                        "<p class='loading-text' style='color: #6b7280; font-style: italic;'>Details will appear here</p>",
                        elem_id="advanced_metadata"
                    )

        # =================== Additional Information ===================
        with gr.Accordion("📈 Search Performance & Cost Details", open=False, elem_classes=["accordion"]):
            timing_display = gr.Textbox(
                label="Detailed Timing & Cost Analysis",
                lines=30,
                interactive=False,
                value="Performance metrics will appear here after search...",
                elem_id="timing-display"
            )

        with gr.Accordion("ℹ️ System Information", open=False, elem_classes=["accordion"]):
            gr.Markdown(
                f"**System Status:** ✅ Operational\n\n"
                f"• **Collection:** {collection_name}\n"
                f"• **Documents:** {total_docs:,}\n"
                f"• **Chunks:** {total_chunks:,}\n\n"
                f"**About the Search Methods:**\n"
                f"• **Quick Results**: Fast hybrid search, good for specific catalog numbers\n"
                f"• **Deep Analysis**: Thorough AI analysis, best for complex research questions\n\n"
                f"**Tips for best results:**\n"
                f"• Most searches work best without filters\n"
                f"• Results appear automatically after clicking SEARCH\n"
                f"• Both search methods run to give you the best possible answers"
            )

        # ---------- Estados para pasar info entre fases ----------
        st_ctx = gr.State({})

        # ---------- Eventos SIN progress bar de Gradio ----------
        # Click en botón de búsqueda
        search_btn.click(
            fn=run_basic_search,
            inputs=[query_input, year_start, year_end, scott_numbers, max_results, st_ctx],
            outputs=[
                basic_answer_output, basic_search_output, basic_metadata_output,
                advanced_answer_output, advanced_search_output, advanced_metadata_output,
                timing_display, st_ctx
            ],
            show_progress=False  # DESHABILITAMOS completamente el progress bar
        ).then(
            fn=run_advanced_search,
            inputs=[query_input, max_results, st_ctx],
            outputs=[advanced_answer_output, advanced_search_output, advanced_metadata_output, timing_display, st_ctx],
            show_progress=False  # DESHABILITAMOS completamente el progress bar
        )

        # Enter en textbox
        query_input.submit(
            fn=run_basic_search,
            inputs=[query_input, year_start, year_end, scott_numbers, max_results, st_ctx],
            outputs=[
                basic_answer_output, basic_search_output, basic_metadata_output,
                advanced_answer_output, advanced_search_output, advanced_metadata_output,
                timing_display, st_ctx
            ],
            show_progress=False  # DESHABILITAMOS completamente el progress bar
        ).then(
            fn=run_advanced_search,
            inputs=[query_input, max_results, st_ctx],
            outputs=[advanced_answer_output, advanced_search_output, advanced_metadata_output, timing_display, st_ctx],
            show_progress=False  # DESHABILITAMOS completamente el progress bar
        )

        # Botones de ejemplo
        btn1.click(fn=set_example_1, outputs=[query_input])
        btn2.click(fn=set_example_2, outputs=[query_input])
        btn3.click(fn=set_example_3, outputs=[query_input])
        btn4.click(fn=set_example_4, outputs=[query_input])
        btn5.click(fn=set_example_5, outputs=[query_input])
        btn6.click(fn=set_example_6, outputs=[query_input])

        # Habilitar cola para mejor manejo de eventos
        interface.queue()

    return interface

In [29]:
# ---- Enhanced launcher ----
if rag_system and rag_system.get("success", False):
    print("\\n" + "=" * 60)
    print("🚀 LAUNCHING COSTA RICA PHILATELIC RAG INTERFACE")
    print("=" * 60)

    gradio_app = create_gradio_interface(rag_system)

    GRADIO_PORT = int(os.getenv("GRADIO_PORT", 7860))
    GRADIO_SHARE = os.getenv("GRADIO_SHARE", "false").lower() == "true"

    print(f"⚙️ Port: {GRADIO_PORT}")
    print(f"🌍 Public URL: {'⚠️ Attempting...' if GRADIO_SHARE else '❌ Disabled (more secure)'}")
    
    try:
        print("🔄 Starting Gradio server...")
        
        if GRADIO_SHARE:
            print("⏳ Attempting to create public tunnel...")
            try:
                demo = gradio_app.launch(
                    server_port=GRADIO_PORT,
                    share=True,
                    inbrowser=False,
                    show_error=True,
                    prevent_thread_lock=False,
                    quiet=False,
                )
                
                print("\\n🎉 SUCCESS! Public tunnel created")
                print(f"🌐 AVAILABLE URLS:")
                print(f"   📱 Local: http://localhost:{GRADIO_PORT}")
                
                if hasattr(demo, 'share_url') and demo.share_url:
                    print(f"   🌍 Public: {demo.share_url}")
                    print(f"\\n🔗 **PUBLIC URL:** {demo.share_url}")
                else:
                    print(f"   🌍 Public: Check Gradio output above ☝️")
                
            except Exception as share_error:
                print(f"⚠️ Error creating public tunnel: {share_error}")
                print("🔄 Switching to local mode only...")
                
                demo = gradio_app.launch(
                    server_port=GRADIO_PORT,
                    share=False,
                    inbrowser=True,
                    show_error=True,
                    prevent_thread_lock=False
                )
                
                print(f"\\n✅ LOCAL SERVER OPERATIONAL:")
                print(f"   📱 Local URL: http://localhost:{GRADIO_PORT}")
                print(f"   ⚠️ Public URL: Not available (tunnel error)")
                
        else:
            demo = gradio_app.launch(
                server_port=GRADIO_PORT,
                share=False,
                inbrowser=True,
                show_error=True,
                prevent_thread_lock=False
            )
            
            print(f"\\n✅ LOCAL SERVER OPERATIONAL:")
            print(f"   📱 Local URL: http://localhost:{GRADIO_PORT}")
            print(f"   💡 For public URL, set GRADIO_SHARE=true in .env")
        
        print(f"\\n📋 COSTA RICA PHILATELIC FEATURES:")
        print(f"   • Specialized Costa Rica stamp queries")
        print(f"   • Scott catalog number search")
        print(f"   • Variety and error detection")
        print(f"   • Dual search approaches for comprehensive results")
        print(f"   • Performance timing comparison")
        print(f"   • To stop: gr.close_all()")
        
        print(f"\\n{'='*60}")
        print(f"🇨🇷 COSTA RICA PHILATELIC RAG INTERFACE READY!")
        print(f"{'='*60}")
        
    except Exception as e:
        print(f"❌ Critical error launching Gradio: {e}")
        print("\\n🔧 SUGGESTED SOLUTIONS:")
        print("   1. Run: gr.close_all()")
        print("   2. Change port: GRADIO_PORT=7861 in .env")
        print("   3. Verify no other services on the port")
        print("   4. Restart the notebook")
        
else:
    print("\\n⚠️  Cannot create Gradio interface:")
    if not rag_system:
        print("   • RAG system not configured")
    else:
        print(f"   • RAG error: {rag_system.get('error', 'Unknown error')}")
    print("\\n🔧 To resolve:")
    print("   1. Verify Weaviate is running")
    print("   2. Configure OPENAI_API_KEY in .env") 
    print("   3. Run document indexing")
    print("   4. Restart this notebook")

🚀 LAUNCHING COSTA RICA PHILATELIC RAG INTERFACE
⚙️ Port: 7860
🌍 Public URL: ⚠️ Attempting...
🔄 Starting Gradio server...
⏳ Attempting to create public tunnel...


  from websockets.server import WebSocketServerProtocol


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://e5e8f81ca02757fa56.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


\n🎉 SUCCESS! Public tunnel created
🌐 AVAILABLE URLS:
   📱 Local: http://localhost:7860
   🌍 Public: Check Gradio output above ☝️
\n📋 COSTA RICA PHILATELIC FEATURES:
   • Specialized Costa Rica stamp queries
   • Scott catalog number search
   • Variety and error detection
   • Dual search approaches for comprehensive results
   • Performance timing comparison
   • To stop: gr.close_all()
🇨🇷 COSTA RICA PHILATELIC RAG INTERFACE READY!


In [30]:
#gr.close_all()

[DEBUG] No filters applied - searching all documents
