In [1]:
%load_ext autoreload
%autoreload 2

import logging
import ipywidgets as widgets
from IPython.display import display, clear_output
from PIL import Image, ImageOps, ImageDraw

# Import architecture modules
from src.config import settings
from src.indexer.curator import DataCurator
from src.indexer.vector_db import VectorIndex
from src.retriever.engine import SearchEngine

# Configure logging to show progress in the notebook output
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.INFO)

print(f"Architecture Loaded. Running on {settings.DEVICE}")

Architecture Loaded. Running on cpu


In [None]:
def run_indexing_pipeline():
    """
    Orchestrates the ETL process: 
    Raw Images -> Florence-2 Captions -> SigLIP Embeddings -> LanceDB
    """
    print("--- 1. Starting Feature Extraction (Florence-2) ---")
    curator = DataCurator()
    # Checks if metadata exists to avoid re-running expensive captioning
    if not settings.METADATA_PATH.exists():
        curator.process_directory(settings.IMAGES_DIR, settings.METADATA_PATH)
    else:
        print(f"Metadata found at {settings.METADATA_PATH}. Skipping curation.")

    print("\n--- 2. Starting Vector Storage (SigLIP + LanceDB) ---")
    indexer = VectorIndex()
    # Ingests the metadata into the vector database
    indexer.build_index(settings.METADATA_PATH)
    print("\nIndexing Complete. System ready for retrieval.")

# Run the pipeline
run_indexing_pipeline()

In [2]:
def render_grid(results, target_height=300):
    """
    Visualization Helper: Stitches search results into a horizontal grid.
    """
    if not results:
        print("‚ùå No matches found.")
        return

    images = []
    labels = []

    for res in results:
        try:
            # Load and process image
            img = Image.open(res['path']).convert("RGB")
            
            # Resize preserving aspect ratio
            aspect_ratio = img.width / img.height
            new_width = int(target_height * aspect_ratio)
            img = img.resize((new_width, target_height), Image.Resampling.LANCZOS)
            
            # Add visual border
            img = ImageOps.expand(img, border=4, fill='#333333')
            images.append(img)
            labels.append(f"{res['score']:.3f}")
            
        except Exception as e:
            # If path is wrong, we log it but don't crash
            logging.error(f"Could not load image {res['filename']}: {e}")

    if not images:
        print("‚ö†Ô∏è Images found in DB but files could not be loaded from disk.")
        return

    # Create canvas
    total_width = sum(img.width for img in images)
    # Height + 30px for the text at bottom
    grid_img = Image.new('RGB', (total_width, target_height + 30), color='white')
    
    x_offset = 0
    draw = ImageDraw.Draw(grid_img)
    
    for i, img in enumerate(images):
        grid_img.paste(img, (x_offset, 0))
        # Draw confidence score below image
        draw.text((x_offset + 5, target_height + 5), f"Conf: {labels[i]}", fill="black")
        x_offset += img.width
        
    display(grid_img) 
    
    # Print textual context details below the image
    print("\nüîç Result Details:")
    for i, res in enumerate(results):
        print(f"[{i+1}] {res['filename']} | {res['caption'][:128]}...")

In [None]:
from src.retriever.engine import SearchEngine
engine = SearchEngine()


style = {'description_width': 'initial'}
search_box = widgets.Text(

    placeholder='E.g. "red dress" or "suit | office"',
    description='<b>Search:</b>',
    style=style,
    layout=widgets.Layout(width='60%')
)

out = widgets.Output()

def on_search_submit(change):
    query_str = change.new
    if not query_str: return
    with out:
        clear_output()

        if "|" in query_str:
            text_query, context = query_str.split("|", 1)
            text_query = text_query.strip()
            context = context.strip().lower() # Normalize filter

        else:
            text_query = query_str
            context = None

        print(f"üîé Searching for: '{text_query}'" + (f" (Filter: '{context}')" if context else ""))

        results = engine.search(text_query, context_filter=context, k=5)
        render_grid(results)


search_box.observe(on_search_submit, names='value')
print("Retriever Ready. Enter query below (use '|' for filters).")
display(search_box, out)


‚úÖ Retriever Ready. Enter query below (use '|' for filters).


Text(value='', description='<b>Search:</b>', layout=Layout(width='60%'), placeholder='E.g. "red dress" or "sui‚Ä¶

Output()