In [1]:
import tkinter as tk
from tkinter import filedialog, messagebox
from PIL import Image, ImageTk, ImageOps
import os
import torch
import torch.nn.functional as F
import numpy as np
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
from transformers import SiglipProcessor, SiglipModel
from sklearn.metrics.pairwise import cosine_similarity
from io import BytesIO
import requests
from dotenv import load_dotenv
from deep_translator import GoogleTranslator

load_dotenv()

# ----- Models -----
e5_model = SentenceTransformer("intfloat/e5-large-v2")
_siglip_name = "google/siglip-so400m-patch14-384"
_siglip_device = "cpu"
_siglip_processor = SiglipProcessor.from_pretrained(_siglip_name)
_siglip_model = SiglipModel.from_pretrained(_siglip_name).to(_siglip_device).eval()

# ----- Qdrant -----
COLLECTION_NAME = "products"
qdrant = QdrantClient(
    url=os.getenv("QDRANT_URL"),
    api_key=os.getenv("QDRANT_API_KEY")
)

# ----- Utilities -----
def pad_to_square(img: Image.Image, fill_color=(255, 255, 255)) -> Image.Image:
    w, h = img.size
    size = max(w, h)
    return ImageOps.pad(img, (size, size), color=fill_color, centering=(0.5, 0.5))

def cosine_sim(a, b):
    return float(cosine_similarity([a], [b])[0][0])

def get_text_embedding(text):
    input_text = f"query: {text.strip()}"
    embedding = e5_model.encode(input_text, normalize_embeddings=True)
    return embedding.tolist()

def get_image_embedding(img: Image.Image):
    img = pad_to_square(img.convert("RGB"))
    inputs = _siglip_processor(images=img, return_tensors="pt").to(_siglip_device)
    with torch.no_grad():
        image_emb = _siglip_model.get_image_features(**inputs)
        image_emb = F.normalize(image_emb, p=2, dim=-1)
    return image_emb[0].cpu().tolist()

# ----- Qdrant Search -----
ALPHA = 0.8

def search_qdrant(query_vector, search_type):
    search_vector_name = {
        "text": "text_gen_desc",
        "image": "image_cropped"
    }[search_type]

    search_result = qdrant.search(
        collection_name=COLLECTION_NAME,
        query_vector=(search_vector_name, query_vector),
        limit=50,
        search_params={"hnsw_ef": 128},
        with_payload=True,
        with_vectors = True
    )

    reranked = []
    for item in search_result:
        vecs = item.vector if isinstance(item.vector, dict) else {search_vector_name: item.vector}

        if search_type == "text":
            v1 = vecs.get("text_gen_desc", [])
            v2 = vecs.get("text_raw_desc", [])
            
        else:
            v1 = vecs.get("image_cropped", [])
            v2 = vecs.get("image_original", [])
            

        sim1 = cosine_sim(query_vector, v1)
        sim2 = cosine_sim(query_vector, v2)

        item.score = ALPHA * sim1 + (1 - ALPHA) * sim2
        reranked.append(item)

    reranked.sort(key=lambda x: x.score, reverse=True)
    return reranked[:3]

# ----- GUI -----
# ----- Build UI (modern, threaded) -----
import tkinter.ttk as ttk
import threading, queue, webbrowser

# keep references to images to avoid GC
_img_cache = {}
_result_queue = queue.Queue()
_search_lock = threading.Lock()

def open_url(url: str):
    if url:
        webbrowser.open_new_tab(url)

def set_busy(is_busy: bool):
    if is_busy:
        search_btn_text.config(state="disabled")
        search_btn_image.config(state="disabled")
        text_entry.config(state="disabled")
        progress.grid(row=3, column=0, columnspan=3, pady=(6, 6), sticky="ew")
        progress.start(10)
        status_var.set("Searching…")
    else:
        search_btn_text.config(state="normal")
        search_btn_image.config(state="normal")
        text_entry.config(state="normal")
        progress.stop()
        progress.grid_forget()
        status_var.set("Ready")

def clear_results():
    for w in results_container.winfo_children():
        w.destroy()

def create_result_card(parent, title, img_url, product_url):
    card = ttk.Frame(parent, padding=8, style="Card.TFrame")
    card.grid_propagate(False)

    # Title
    ttl = ttk.Label(card, text=title or "No title", style="Title.TLabel", wraplength=220, justify="center")
    ttl.grid(row=0, column=0, sticky="ew", pady=(0, 6))

    # Image
    if img_url:
        try:
            r = requests.get(img_url, timeout=12)
            r.raise_for_status()
            pil_img = Image.open(BytesIO(r.content)).convert("RGB").resize((220, 220))
            tk_img = ImageTk.PhotoImage(pil_img)
            _img_cache[img_url] = tk_img  # prevent GC
            img_lbl = ttk.Label(card, image=tk_img)
            img_lbl.grid(row=1, column=0, pady=(0, 6))
        except Exception:
            ttk.Label(card, text="Image not available", style="Dim.TLabel").grid(row=1, column=0, pady=(0, 6))
    else:
        ttk.Label(card, text="Image not available", style="Dim.TLabel").grid(row=1, column=0, pady=(0, 6))

    # Clickable product URL
    if product_url:
        link = ttk.Label(card, text="View product ↗", style="Link.TLabel", cursor="hand2")
        link.bind("<Button-1>", lambda e, u=product_url: open_url(u))
        link.grid(row=2, column=0, pady=(0, 2))
        # small muted raw URL (optional)
        ttk.Label(card, text=product_url, style="Url.TLabel", wraplength=220, justify="center").grid(row=3, column=0)

    return card

# override your earlier show_results with a richer one
def show_results(results):
    clear_results()
    if not results:
        ttk.Label(results_container, text="No matches found.", style="Dim.TLabel").grid(row=0, column=0, padx=8, pady=8)
        return

    cols = 3  # grid columns
    for i, r in enumerate(results):
        title = r.payload.get("title", "No title")
        img_url = r.payload.get("image_url")
        product_url = r.payload.get("product_url")  # uses your payload key
        card = create_result_card(results_container, title, img_url, product_url)
        card.configure(width=240, height=340)
        row, col = divmod(i, cols)
        card.grid(row=row, column=col, padx=8, pady=8, sticky="nsew")

    for c in range(cols):
        results_container.grid_columnconfigure(c, weight=1)

def run_search_async(query_vector, mode):
    def worker():
        try:
            hits = search_qdrant(query_vector, mode)  # uses your existing logic
            _result_queue.put(("ok", hits))
        except Exception as e:
            _result_queue.put(("err", str(e)))
    threading.Thread(target=worker, daemon=True).start()

def poll_results():
    try:
        status, payload = _result_queue.get_nowait()
    except queue.Empty:
        root.after(80, poll_results)
        return

    set_busy(False)

    if status == "ok":
        show_results(payload)
    else:
        messagebox.showerror("Search Error", payload)

def on_search_text():
    if _search_lock.locked():
        return
    q = text_entry.get().strip()
    if not q:
        messagebox.showwarning("Input error", "Please enter text")
        return
    try:
        q = GoogleTranslator(source='auto', target='en').translate(q)
    except Exception as e:
        print("Translation error:", e)
    def embed_then_search():
        try:
            vec = get_text_embedding(q)
            _result_queue.put(("emb_ok", ("text", vec)))
        except Exception as e:
            _result_queue.put(("err", str(e)))

    with _search_lock:
        set_busy(True)
        clear_results()
        threading.Thread(target=embed_then_search, daemon=True).start()
        root.after(80, poll_embeddings_then_search)

def on_search_image():
    if _search_lock.locked():
        return
    path = filedialog.askopenfilename(filetypes=[("Image files", "*.png;*.jpg;*.jpeg;*.webp;*.bmp")])
    if not path:
        return

    def embed_then_search():
        try:
            img = Image.open(path)
            vec = get_image_embedding(img)
            _result_queue.put(("emb_ok", ("image", vec)))
        except Exception as e:
            _result_queue.put(("err", str(e)))

    with _search_lock:
        set_busy(True)
        clear_results()
        threading.Thread(target=embed_then_search, daemon=True).start()
        root.after(80, poll_embeddings_then_search)

def poll_embeddings_then_search():
    try:
        status, payload = _result_queue.get_nowait()
    except queue.Empty:
        root.after(80, poll_embeddings_then_search)
        return

    if status == "emb_ok":
        mode, vec = payload
        run_search_async(vec, mode)
        root.after(80, poll_results)
    elif status == "err":
        set_busy(False)
        messagebox.showerror("Embedding Error", payload)
    else:
        root.after(80, poll_embeddings_then_search)

# --- UI scaffold ---
root = tk.Tk()
root.title("Hodomak – Local Fashion Search")
root.geometry("920x760")
root.minsize(820, 680)

style = ttk.Style()
try:
    style.theme_use("clam")
except Exception:
    pass
style.configure("Title.TLabel", font=("Segoe UI", 11, "bold"))
style.configure("Dim.TLabel", foreground="#666")
style.configure("Link.TLabel", foreground="#1a73e8")
style.configure("Url.TLabel", font=("Segoe UI", 8), foreground="#888")
style.configure("Card.TFrame", relief="groove")

container = ttk.Frame(root, padding=12)
container.pack(fill="both", expand=True)

# Header
header = ttk.Frame(container)
header.grid(row=0, column=0, sticky="ew", pady=(0, 10))
header.grid_columnconfigure(1, weight=1)
ttk.Label(header, text="Hodomak Search Engine", font=("Segoe UI", 16, "bold")).grid(row=0, column=0, sticky="w")
status_var = tk.StringVar(value="Ready")
ttk.Label(header, textvariable=status_var, style="Dim.TLabel").grid(row=0, column=2, sticky="e")

# Controls
controls = ttk.Frame(container)
controls.grid(row=1, column=0, sticky="ew", pady=(0, 8))
controls.grid_columnconfigure(1, weight=1)

ttk.Label(controls, text="Search by Text", font=("Segoe UI", 11, "bold")).grid(row=0, column=0, sticky="w")
text_entry = ttk.Entry(controls)
text_entry.grid(row=0, column=1, sticky="ew", padx=8)
search_btn_text = ttk.Button(controls, text="Search", command=on_search_text)
search_btn_text.grid(row=0, column=2, padx=(6, 0))
text_entry.bind("<Return>", lambda e: on_search_text())

ttk.Label(controls, text="or", style="Dim.TLabel").grid(row=1, column=1, pady=6)
search_btn_image = ttk.Button(controls, text="Upload Image & Search", command=on_search_image)
search_btn_image.grid(row=2, column=1, pady=(0, 6))

# Progress bar (hidden by default)
progress = ttk.Progressbar(container, mode="indeterminate")

# Scrollable results
results_wrap = ttk.Frame(container)
results_wrap.grid(row=4, column=0, sticky="nsew")
container.grid_rowconfigure(4, weight=1)
container.grid_columnconfigure(0, weight=1)

canvas = tk.Canvas(results_wrap, highlightthickness=0)
vsb = ttk.Scrollbar(results_wrap, orient="vertical", command=canvas.yview)
results_container = ttk.Frame(canvas)

results_container.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))
canvas.create_window((0, 0), window=results_container, anchor="nw")
canvas.configure(yscrollcommand=vsb.set)

canvas.pack(side="left", fill="both", expand=True)
vsb.pack(side="right", fill="y")

set_busy(False)
root.mainloop()



  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
  search_result = qdrant.search(
  search_result = qdrant.search(
  search_result = qdrant.search(
  search_result = qdrant.search(
  search_result = qdrant.search(
  search_result = qdrant.search(
