## Imports y configuraci√≥n

In [None]:
import asyncio
import pandas as pd
from src.core.domains.products.catalog_service import CatalogRetriever, CATEGORY_DESCRIPTIONS
from src.chains.assistant.schemas import ConversationSlots, UserIntent

# Inicializar CatalogRetriever
catalog_retriever = CatalogRetriever("repuesto_motos_mundibot")
print("‚úÖ CatalogRetriever inicializado correctamente")

## Validar conexi√≥n y obtener categor√≠as reales


In [None]:
# Probar conexi√≥n b√°sica
try:
    brands = await catalog_retriever.get_unique_brands()
    print(f"‚úÖ Conexi√≥n exitosa - {len(brands)} marcas encontradas")
    
    # Obtener categor√≠as reales de la colecci√≥n
    categories_with_desc = await catalog_retriever.get_unique_categories()
    real_categories = [cat['name'] for cat in categories_with_desc]
    
    print(f"‚úÖ {len(real_categories)} categor√≠as encontradas en la colecci√≥n")
    print("\nüìÇ Categor√≠as reales en la colecci√≥n:")
    for i, cat in enumerate(sorted(real_categories), 1):
        print(f"  {i:2d}. {cat}")
        
except Exception as e:
    print(f"‚ùå Error de conexi√≥n: {e}")

## Comparar categor√≠as reales vs descripciones sem√°nticas

In [None]:
# Categor√≠as en CATEGORY_DESCRIPTIONS
semantic_categories = set(CATEGORY_DESCRIPTIONS.keys())
real_categories_set = set(real_categories)

print("üîç AN√ÅLISIS DE COBERTURA DE CATEGOR√çAS")
print("=" * 60)

# Categor√≠as que est√°n en la colecci√≥n pero NO en las descripciones
missing_descriptions = real_categories_set - semantic_categories
if missing_descriptions:
    print(f"\n‚ùå FALTAN DESCRIPCIONES ({len(missing_descriptions)}):")
    for cat in sorted(missing_descriptions):
        print(f"  ‚Ä¢ {cat}")
else:
    print("\n‚úÖ Todas las categor√≠as reales tienen descripci√≥n sem√°ntica")

# Categor√≠as que est√°n en las descripciones pero NO en la colecci√≥n
extra_descriptions = semantic_categories - real_categories_set
if extra_descriptions:
    print(f"\n‚ö†Ô∏è  DESCRIPCIONES EXTRA ({len(extra_descriptions)}):")
    for cat in sorted(extra_descriptions):
        print(f"  ‚Ä¢ {cat}")
else:
    print("\n‚úÖ No hay descripciones sem√°nticas extra")

# Categor√≠as que coinciden perfectamente
matching_categories = real_categories_set & semantic_categories
print(f"\n‚úÖ CATEGOR√çAS COINCIDENTES ({len(matching_categories)}):")
for cat in sorted(matching_categories):
    print(f"  ‚Ä¢ {cat}")

# Resumen
print(f"\nüìä RESUMEN:")
print(f"  ‚Ä¢ Categor√≠as en colecci√≥n: {len(real_categories_set)}")
print(f"  ‚Ä¢ Descripciones sem√°nticas: {len(semantic_categories)}")
print(f"  ‚Ä¢ Coincidencias: {len(matching_categories)}")
print(f"  ‚Ä¢ Cobertura: {len(matching_categories)/len(real_categories_set)*100:.1f}%")

## Validar funcionalidad completa del CatalogRetriever

In [None]:
print("üß™ PRUEBAS DE FUNCIONALIDAD DEL CATALOGRETRIEVER")
print("=" * 60)

# Test 1: Marcas √∫nicas
try:
    brands = await catalog_retriever.get_unique_brands()
    print(f"‚úÖ get_unique_brands(): {len(brands)} marcas")
    print(f"   Primeras 5: {brands[:5]}")
except Exception as e:
    print(f"‚ùå get_unique_brands() fall√≥: {e}")

# Test 2: Modelos por marca (YAMAHA)
try:
    yamaha_models = await catalog_retriever.get_models_by_brand("YAMAHA")
    print(f"‚úÖ get_models_by_brand('YAMAHA'): {len(yamaha_models)} modelos")
    print(f"   Primeros 5: {yamaha_models[:5]}")
except Exception as e:
    print(f"‚ùå get_models_by_brand('YAMAHA') fall√≥: {e}")

# Test 3: Categor√≠as con descripciones
try:
    categories = await catalog_retriever.get_unique_categories()
    print(f"‚úÖ get_unique_categories(): {len(categories)} categor√≠as con descripciones")
    print(f"   Primera categor√≠a: {categories[0]['name']}")
    print(f"   Descripci√≥n: {categories[0]['description'][:80]}...")
except Exception as e:
    print(f"‚ùå get_unique_categories() fall√≥: {e}")

# Test 4: Subcategor√≠as por categor√≠a
try:
    frenos_subcats = await catalog_retriever.get_subcategories_by_category("FRENOS")
    print(f"‚úÖ get_subcategories_by_category('FRENOS'): {len(frenos_subcats)} subcategor√≠as")
    print(f"   Primeras 3: {frenos_subcats[:3]}")
except Exception as e:
    print(f"‚ùå get_subcategories_by_category('FRENOS') fall√≥: {e}")

# Test 5: Obtener todos los cat√°logos en paralelo
try:
    import time
    start_time = time.time()
    all_catalogs = await catalog_retriever.get_all_catalogs()
    end_time = time.time()
    
    print(f"‚úÖ get_all_catalogs(): {(end_time-start_time)*1000:.1f}ms")
    print(f"   Total marcas: {all_catalogs['total_brands']}")
    print(f"   Total categor√≠as: {all_catalogs['total_categories']}")
except Exception as e:
    print(f"‚ùå get_all_catalogs() fall√≥: {e}")

## Validar descripciones sem√°nticas espec√≠ficas

In [None]:
print("üìñ VALIDACI√ìN DE DESCRIPCIONES SEM√ÅNTICAS")
print("=" * 60)

# Categor√≠as m√°s importantes para validar
categorias_importantes = [
    "ELECTRICO / ENCENDIDO",
    "FRENOS", 
    "MOTOR INTERNO",
    "TRANSMISION SECUNDARIA",
    "LLANTA",
    "CARROCERIA / PLASTICOS"
]

for categoria in categorias_importantes:
    if categoria in CATEGORY_DESCRIPTIONS:
        desc = CATEGORY_DESCRIPTIONS[categoria]
        print(f"\nüîß **{categoria}**")
        print(f"   Longitud: {len(desc)} caracteres")
        print(f"   Descripci√≥n: {desc}")
        
        # Verificar si la categor√≠a existe en la colecci√≥n real
        if categoria in real_categories_set:
            print(f"   ‚úÖ Existe en la colecci√≥n")
        else:
            print(f"   ‚ùå NO existe en la colecci√≥n")
    else:
        print(f"\n‚ùå **{categoria}** - SIN DESCRIPCI√ìN SEM√ÅNTICA")

print(f"\nüìä Total descripciones validadas: {len(categorias_importantes)}")

## Test de performance y cache

In [None]:
print("‚ö° PRUEBAS DE PERFORMANCE Y CACHE")
print("=" * 60)

import time

# Test sin cache
catalog_retriever.clear_cache()
print("üóëÔ∏è Cache limpiado")

# Primera ejecuci√≥n (sin cache)
start_time = time.time()
brands_1 = await catalog_retriever.get_unique_brands(use_cache=True)
categories_1 = await catalog_retriever.get_unique_categories(use_cache=True)
time_1 = (time.time() - start_time) * 1000

print(f"‚è±Ô∏è  Primera ejecuci√≥n (sin cache): {time_1:.1f}ms")
print(f"   Marcas: {len(brands_1)}, Categor√≠as: {len(categories_1)}")

# Segunda ejecuci√≥n (con cache)
start_time = time.time()
brands_2 = await catalog_retriever.get_unique_brands(use_cache=True)
categories_2 = await catalog_retriever.get_unique_categories(use_cache=True)
time_2 = (time.time() - start_time) * 1000

print(f"‚è±Ô∏è  Segunda ejecuci√≥n (con cache): {time_2:.1f}ms")
print(f"   Marcas: {len(brands_2)}, Categor√≠as: {len(categories_2)}")

# Verificar que los resultados son id√©nticos
brands_match = brands_1 == brands_2
categories_match = len(categories_1) == len(categories_2)

print(f"\n‚úÖ Resultados id√©nticos: Marcas={brands_match}, Categor√≠as={categories_match}")
print(f"üöÄ Mejora de performance: {time_1/time_2:.1f}x m√°s r√°pido con cache")

In [None]:
# Celda 1: Validar configuraci√≥n de la colecci√≥n
from qdrant_client import QdrantClient
from src.core.domains.products.schemas import PRODUCTS_SCHEMA
import json

# Conectar a Qdrant
client = QdrantClient("localhost", port=6333)
collection_name = "repuesto_motos_mundibot"

print("üîç VALIDANDO CONFIGURACI√ìN DE LA COLECCI√ìN")
print("=" * 60)

try:
    # Obtener informaci√≥n de la colecci√≥n
    collection_info = client.get_collection(collection_name)
    
    print(f"üìä Informaci√≥n de la colecci√≥n '{collection_name}':")
    print(f"  ‚Ä¢ Estado: {collection_info.status}")
    print(f"  ‚Ä¢ Total de puntos: {collection_info.points_count:,}")
    print(f"  ‚Ä¢ Vectores configurados: {len(collection_info.config.params.vectors)}")
    
    # Mostrar configuraci√≥n de vectores
    print(f"\nüéØ Configuraci√≥n de Vectores:")
    for vector_name, vector_config in collection_info.config.params.vectors.items():
        print(f"  ‚Ä¢ {vector_name}:")
        print(f"    - Tama√±o: {vector_config.size}")
        print(f"    - Distancia: {vector_config.distance}")
    
    # Validar payload schema
    payload_schema = collection_info.config.payload_schema
    
    print(f"\nüìã Payload Schema Configurado:")
    if payload_schema:
        print(f"  ‚Ä¢ Total campos indexados: {len(payload_schema)}")
        
        # Comparar con PRODUCTS_SCHEMA esperado
        expected_fields = set(PRODUCTS_SCHEMA.keys())
        configured_fields = set(payload_schema.keys())
        
        missing_fields = expected_fields - configured_fields
        extra_fields = configured_fields - expected_fields
        matching_fields = expected_fields & configured_fields
        
        print(f"  ‚Ä¢ Campos coincidentes: {len(matching_fields)}")
        print(f"  ‚Ä¢ Campos faltantes: {len(missing_fields)}")
        print(f"  ‚Ä¢ Campos extra: {len(extra_fields)}")
        
        if missing_fields:
            print(f"\n‚ùå CAMPOS FALTANTES:")
            for field in sorted(missing_fields):
                expected_type = PRODUCTS_SCHEMA[field]
                print(f"    ‚Ä¢ {field} ({expected_type})")
        
        if extra_fields:
            print(f"\n‚ö†Ô∏è  CAMPOS EXTRA (no en PRODUCTS_SCHEMA):")
            for field in sorted(extra_fields):
                configured_type = payload_schema[field]
                print(f"    ‚Ä¢ {field} ({configured_type})")
        
        print(f"\n‚úÖ CAMPOS CORRECTAMENTE CONFIGURADOS:")
        for field in sorted(matching_fields):
            expected_type = PRODUCTS_SCHEMA[field]
            configured_type = payload_schema[field]
            match_status = "‚úÖ" if str(expected_type) == str(configured_type) else "‚ö†Ô∏è"
            print(f"    {match_status} {field}: {configured_type} (esperado: {expected_type})")
    
    else:
        print("  ‚ùå No hay payload schema configurado")
    
    # Validar con muestra de datos
    print(f"\nüîç VALIDANDO CON MUESTRA DE DATOS:")
    
    # Obtener algunos puntos para ver la estructura real
    sample_points = client.scroll(
        collection_name=collection_name,
        limit=3,
        with_payload=True,
        with_vectors=False
    )
    
    if sample_points[0]:
        sample_point = sample_points[0][0]
        actual_payload = sample_point.payload
        
        print(f"  ‚Ä¢ Campos en datos reales: {len(actual_payload)}")
        print(f"  ‚Ä¢ Muestra de campos:")
        
        for i, (field, value) in enumerate(list(actual_payload.items())[:10]):
            value_type = type(value).__name__
            is_indexed = field in (payload_schema or {})
            index_status = "üîç" if is_indexed else "‚ùå"
            print(f"    {index_status} {field}: {value_type} = {str(value)[:50]}...")
            
        # Verificar campos cr√≠ticos
        critical_fields = ["marca", "categoria", "precio", "es_llanta", "marcas_lista"]
        print(f"\nüéØ CAMPOS CR√çTICOS:")
        for field in critical_fields:
            if field in actual_payload:
                value = actual_payload[field]
                is_indexed = field in (payload_schema or {})
                status = "‚úÖ" if is_indexed else "‚ùå"
                print(f"    {status} {field}: {type(value).__name__} = {value}")
            else:
                print(f"    ‚ùå {field}: FALTANTE en datos")
    
    else:
        print("  ‚ùå No hay datos en la colecci√≥n para validar")

except Exception as e:
    print(f"‚ùå Error accediendo a la colecci√≥n: {e}")

In [None]:
# Celda 2: Inspecci√≥n detallada de la configuraci√≥n
print("\nüîß INSPECCI√ìN DETALLADA DE CONFIGURACI√ìN")
print("=" * 60)

try:
    # Obtener configuraci√≥n completa
    collection_info = client.get_collection(collection_name)
    
    # Convertir a dict para mejor visualizaci√≥n
    config_dict = {
        "collection_name": collection_name,
        "status": collection_info.status,
        "points_count": collection_info.points_count,
        "config": {
            "vectors": {},
            "payload_schema": {},
            "optimizer_config": collection_info.config.optimizer_config,
            "wal_config": collection_info.config.wal_config,
            "hnsw_config": collection_info.config.hnsw_config
        }
    }
    
    # Extraer configuraci√≥n de vectores
    for vector_name, vector_config in collection_info.config.params.vectors.items():
        config_dict["config"]["vectors"][vector_name] = {
            "size": vector_config.size,
            "distance": str(vector_config.distance),
            "on_disk": getattr(vector_config, 'on_disk', None)
        }
    
    # Extraer payload schema
    if collection_info.config.payload_schema:
        for field_name, field_type in collection_info.config.payload_schema.items():
            config_dict["config"]["payload_schema"][field_name] = str(field_type)
    
    # Mostrar configuraci√≥n formateada
    print(json.dumps(config_dict, indent=2, ensure_ascii=False))
    
    # Estad√≠sticas de uso
    print(f"\nüìà ESTAD√çSTICAS DE USO:")
    print(f"  ‚Ä¢ Colecci√≥n: {collection_name}")
    print(f"  ‚Ä¢ Puntos totales: {collection_info.points_count:,}")
    print(f"  ‚Ä¢ Vectores por tipo:")
    
    for vector_name in config_dict["config"]["vectors"]:
        print(f"    - {vector_name}: {collection_info.points_count:,} vectores")
    
    print(f"  ‚Ä¢ Campos indexados: {len(config_dict['config']['payload_schema'])}")
    print(f"  ‚Ä¢ Memoria estimada: ~{collection_info.points_count * 1536 * 4 / 1024 / 1024:.1f} MB (solo vectores densos)")

except Exception as e:
    print(f"‚ùå Error en inspecci√≥n detallada: {e}")