# Cell 1: Cấu hình và import

In [42]:
import os
import json
import pandas as pd
from PIL import Image
import torch
import psycopg2
from transformers import CLIPModel, CLIPProcessor
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core import StorageContext
from llama_index.vector_stores.milvus.utils import BM25BuiltInFunction
from deep_translator import GoogleTranslator
import torch.nn.functional as F
from pydantic import ConfigDict
from llama_index.core.embeddings import BaseEmbedding
from pymilvus import MilvusClient, DataType

# Config
DB_PARAMS = {
    "dbname": "video_hybrid_csv",  # Updated database name
    "user": "postgres",
    "password": "123",
    "host": "localhost",
    "port": "5432"
}

JSON_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\media-info"
VIDEO_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\video"
FRAME_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\keyframes"
CSV_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\csv"
MODEL_DIR = r"D:\Big_project_2025\huggingface_cache"

URI = "http://localhost:19530"
COLLECTION_NAME = "video_hybrid_search"

# Cell 2: Khởi tạo CLIP và Custom Embedding

In [2]:
# Initialize CLIP Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=MODEL_DIR).to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=MODEL_DIR)

# Custom CLIP Embedding class
class CustomCLIPEmbedding(BaseEmbedding):
    model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")

    def __init__(self, model, processor, device):
        super().__init__()
        self.model = model
        self.processor = processor
        self.device = device

    def _get_text_embedding(self, text: str) -> list:
        inputs = self.processor(text=[text], return_tensors="pt", padding=True, truncation=True, max_length=77).to(self.device)
        with torch.no_grad():
            text_features = self.model.get_text_features(**inputs)
        return F.normalize(text_features, p=2, dim=1)[0].cpu().numpy().tolist()

    def _get_query_embedding(self, query: str) -> list:
        return self._get_text_embedding(query)

    async def _aget_query_embedding(self, query: str) -> list:
        return self._get_text_embedding(query)

    def _get_text_embeddings(self, texts: list) -> list:
        inputs = self.processor(text=texts, return_tensors="pt", padding=True, truncation=True, max_length=77).to(self.device)
        with torch.no_grad():
            text_features = self.model.get_text_features(**inputs)
        return F.normalize(text_features, p=2, dim=1).cpu().numpy().tolist()

# Initialize custom embedding
clip_embedding = CustomCLIPEmbedding(model, processor, device)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


# Cell 3: Khởi tạo Milvus và PostgreSQL

In [None]:
# Initialize Milvus
bm25_function = BM25BuiltInFunction(
    analyzer_params={
        "tokenizer": "standard",
        "filter": [
            "lowercase",
            {"type": "length", "max": 40},
            {"type": "stop", "stop_words": ["of", "to", "and"]}
        ],
    },
    enable_match=True,
)
milvus_client = MilvusClient(uri=URI)
vector_store = MilvusVectorStore(
    uri=URI,
    collection_name=COLLECTION_NAME,
    dim=512,
    enable_sparse=True,
    sparse_embedding_function=bm25_function,
    hybrid_ranker="RRFRanker",
    hybrid_ranker_params={"k": 60},
    similarity_metric="IP",
    index_config={"index_type": "HNSW", "metric_type": "IP", "params": {"M": 16, "efConstruction": 200}},
    sparse_index_config={"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"},
    overwrite=True,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Initialize PostgreSQL
conn = psycopg2.connect(**DB_PARAMS)
cur = conn.cursor()
cur.execute("""
    CREATE TABLE IF NOT EXISTS videos (
        id SERIAL PRIMARY KEY,
        video_path TEXT UNIQUE,
        title TEXT,
        description TEXT
    );
    CREATE TABLE IF NOT EXISTS frame_mappings (
        id SERIAL PRIMARY KEY,
        video_id INTEGER REFERENCES videos(id),
        frame_path TEXT UNIQUE,
        pts_time FLOAT,
        frame_idx INTEGER,
        fps FLOAT,
        milvus_id TEXT
    );
""")
conn.commit()

# Cell 4: Helper Functions

In [46]:
def encode_image(image_path):
    try:
        image = Image.open(image_path)
        inputs = processor(images=image, return_tensors="pt").to(device)
        with torch.no_grad():
            image_features = model.get_image_features(**inputs)
        return image_features[0].cpu().numpy()
    except Exception as e:
        print(f"Error encoding {image_path}: {e}")
        return None

def encode_images_batch(image_paths):
    try:
        images = [Image.open(path) for path in image_paths]
        inputs = processor(images=images, return_tensors="pt").to(device)
        with torch.no_grad():
            image_features = model.get_image_features(**inputs)
        return image_features.cpu().numpy()
    except Exception as e:
        print(f"Error encoding batch {image_paths}: {e}")
        return None

def time_to_seconds(time_str):
    try:
        minutes, seconds = map(int, time_str.split(':'))
        return minutes * 60 + seconds
    except ValueError:
        print(f"Invalid time format: {time_str}")
        return None

def format_time(seconds):
    m = int(seconds // 60)
    s = int(seconds % 60)
    return f"{m}:{s:02d}"

def group_timestamps(timestamps, gap_threshold=15.0):
    if not timestamps:
        return []
    timestamps = sorted(timestamps)
    ranges = []
    start = timestamps[0]
    end = timestamps[0]
    for t in timestamps[1:]:
        if t - end <= gap_threshold:
            end = t
        else:
            ranges.append((start, end))
            start = t
            end = t
    ranges.append((start, end))
    return ranges

def keyframe_path_from_frame_idx(video_path, frame_idx, csv_dir, frame_root):
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    csv_path = os.path.join(csv_dir, f"{video_name}.csv")
    dir_path = os.path.join(frame_root, video_name)
    
    if not os.path.exists(csv_path):
        return None
    try:
        df = pd.read_csv(csv_path)
        if not all(c in df.columns for c in ["n", "frame_idx"]):
            return None
        idx = (df["frame_idx"] - int(frame_idx)).abs().idxmin()
        n_val = int(df.loc[idx, "n"])
        candidates = [os.path.join(dir_path, f"{stem}.{ext}") 
                     for stem in [n_val, f"{n_val:03d}", f"{n_val:04d}", frame_idx, f"{int(frame_idx):03d}", f"{int(frame_idx):04d}"]
                     for ext in ("jpg", "jpeg", "png")]
        for p in candidates:
            if os.path.exists(p):
                return p
    except Exception:
        return None
    return None

def get_frame_idx_from_time(video_path, time_start, time_end, csv_dir):
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    csv_path = os.path.join(csv_dir, f"{video_name}.csv")
    
    if not os.path.exists(csv_path):
        print(f"CSV not found: {csv_path}")
        return None
    
    try:
        df = pd.read_csv(csv_path)
        if not all(col in df.columns for col in ['pts_time', 'frame_idx', 'fps']):
            print(f"CSV {csv_path} missing required columns")
            return None
    except Exception as e:
        print(f"Error reading CSV {csv_path}: {e}")
        return None
    
    start_sec = time_to_seconds(time_start)
    end_sec = time_to_seconds(time_end)
    if start_sec is None or end_sec is None:
        return None
    
    target_time = (start_sec + end_sec) / 2
    df['time_diff'] = abs(df['pts_time'] - target_time)
    closest_row = df.loc[df['time_diff'].idxmin()]
    return int(closest_row['frame_idx'])

# Cell 5: Indexing Pipeline

In [28]:
import os, json
import pandas as pd
from PIL import Image
from pymilvus import connections, utility, Collection, FieldSchema, CollectionSchema, DataType

# Biến toàn cục
COLLECTION_NAME = "video_hybrid_search"
FRAME_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\keyframes"
CSV_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\csv"
VIDEO_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\video"
JSON_DIR = r"D:\Big_project_2025\Video_Similarity_Search\data_hybrid\media-info"

# Hàm insert vào Milvus
def insert_to_milvus(collection, data):
    try:
        collection.insert([data])
        print(f"✅ Inserted {data['id']} into Milvus")
        return data["id"]
    except Exception as e:
        print(f"❌ Milvus insert error for {data.get('frame_path', '')}: {e}")
        return None

# Hàm insert vào PostgreSQL
def insert_to_postgres(video_id, frame_path, pts_time, frame_idx, fps, milvus_id):
    try:
        cur.execute("""
            INSERT INTO frame_mappings (video_id, frame_path, pts_time, frame_idx, fps, milvus_id)
            VALUES (%s, %s, %s, %s, %s, %s)
            ON CONFLICT (frame_path) DO NOTHING
        """, (video_id, frame_path, pts_time, frame_idx, fps, milvus_id))
        print(f"✅ Inserted {frame_path} into Postgres")
    except Exception as e:
        print(f"❌ Postgres insert error: {e}")

# Hàm chính
def index_videos():
    # Kết nối Milvus
    try:
        connections.connect(host='localhost', port=19530)
        print(f"🔍 Milvus version: {utility.get_server_version()}")
    except Exception as e:
        print(f"❌ Milvus connection error: {e}")
        return

    # Tạo collection
    if utility.has_collection(COLLECTION_NAME):
        utility.drop_collection(COLLECTION_NAME)
        print(f"🔧 Dropped existing collection {COLLECTION_NAME}")

    fields = [
        FieldSchema("id", DataType.VARCHAR, max_length=100, is_primary=True, auto_id=False),
        FieldSchema("vector", DataType.FLOAT_VECTOR, dim=512),
        FieldSchema("frame_path", DataType.VARCHAR, max_length=512),
        FieldSchema("video_id", DataType.VARCHAR, max_length=100)
    ]
    schema = CollectionSchema(fields, enable_dynamic_field=True)

    try:
        collection = Collection(COLLECTION_NAME, schema)
        collection.create_index("vector", {
            "index_type": "HNSW", "metric_type": "IP", "params": {"M": 16, "efConstruction": 200}
        })
        collection.load()
        print(f"✅ Collection {COLLECTION_NAME} ready")
    except Exception as e:
        print(f"❌ Collection setup error: {e}")
        return

    # Index metadata
    for json_file in os.listdir(JSON_DIR):
        if not json_file.endswith(".json"):
            continue
        try:
            with open(os.path.join(JSON_DIR, json_file), "r", encoding="utf-8") as f:
                metadata = json.load(f)

            video_name = os.path.splitext(json_file)[0]
            video_path = os.path.join(VIDEO_DIR, f"{video_name}.mp4").replace("\\", "/")
            text_content = "\n".join([
                metadata.get("title", ""),
                metadata.get("description", ""),
                " ".join(metadata.get("keywords", []))
            ])
            dense_emb = clip_embedding._get_text_embedding(text_content)

            insert_to_milvus(collection, {
                "id": video_name,
                "vector": dense_emb,
                "frame_path": "",
                "video_id": video_name
            })

            cur.execute("""
                INSERT INTO videos (video_path, title, description)
                VALUES (%s, %s, %s)
                ON CONFLICT (video_path) DO NOTHING
            """, (video_path, metadata.get("title", ""), metadata.get("description", "")))
            conn.commit()
        except Exception as e:
            print(f"❌ Metadata error in {json_file}: {e}")

    # Index frames
    for key_frame_dir in os.listdir(FRAME_DIR):
        frame_dir = os.path.join(FRAME_DIR, key_frame_dir)
        csv_path = os.path.join(CSV_DIR, f"{key_frame_dir}.csv")
        video_path = os.path.join(VIDEO_DIR, f"{key_frame_dir}.mp4").replace("\\", "/")

        if not (os.path.isdir(frame_dir) and os.path.exists(csv_path) and os.path.exists(video_path)):
            print(f"⚠️ Missing data for {key_frame_dir}, skipping")
            continue

        try:
            df = pd.read_csv(csv_path)
            cur.execute("""
                INSERT INTO videos (video_path, title, description)
                VALUES (%s, %s, %s)
                ON CONFLICT (video_path) DO NOTHING
                RETURNING id
            """, (video_path, os.path.basename(video_path), "Video demo"))
            row = cur.fetchone()
            video_id = row[0] if row else cur.execute("SELECT id FROM videos WHERE video_path=%s", (video_path,)) or cur.fetchone()[0]

            print(f"🎥 Indexing frames for {key_frame_dir} (video_id={video_id})")
            for frame_file in os.listdir(frame_dir):
                if not frame_file.lower().endswith((".jpg", ".jpeg", ".png")):
                    continue
                try:
                    frame_path = os.path.join(frame_dir, frame_file).replace("\\", "/")
                    frame_idx = int(os.path.splitext(frame_file)[0])
                    row = df[df["n"] == frame_idx]
                    if row.empty:
                        continue
                    pts_time = float(row["pts_time"].values[0])
                    fps = int(row["fps"].values[0])
                    emb = encode_image(frame_path)
                    if emb is None:
                        continue
                    milvus_id = insert_to_milvus(collection, {
                        "id": f"frame_{key_frame_dir}_{frame_idx}",
                        "vector": emb.tolist(),
                        "frame_path": frame_path,
                        "video_id": key_frame_dir
                    })
                    if milvus_id:
                        insert_to_postgres(video_id, frame_path, pts_time, frame_idx, fps, milvus_id)
                except Exception as e:
                    print(f"❌ Frame error in {frame_file}: {e}")
            conn.commit()
            print(f"✅ Done indexing {key_frame_dir}")
        except Exception as e:
            print(f"❌ Error processing {key_frame_dir}: {e}")

# Chạy index
index_videos()


🔍 Milvus version: v2.4.7
🔧 Dropped existing collection video_hybrid_search
✅ Collection video_hybrid_search ready
✅ Inserted L26_V001 into Milvus
✅ Inserted L26_V002 into Milvus
✅ Inserted L26_V003 into Milvus
🎥 Indexing frames for L26_V001 (video_id=1)
✅ Inserted frame_L26_V001_1 into Milvus
✅ Inserted D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/001.jpg into Postgres
✅ Inserted frame_L26_V001_2 into Milvus
✅ Inserted D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/002.jpg into Postgres
✅ Inserted frame_L26_V001_3 into Milvus
✅ Inserted D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/003.jpg into Postgres
✅ Inserted frame_L26_V001_4 into Milvus
✅ Inserted D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/004.jpg into Postgres
✅ Inserted frame_L26_V001_5 into Milvus
✅ Inserted D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/005.jpg into Postgres
✅ Inse

# Cell 6: Search Pipeline

In [47]:
import os
import pandas as pd
from deep_translator import GoogleTranslator
# === Hàm hỗ trợ ===
def format_time(seconds):
    m = int(seconds // 60)
    s = int(seconds % 60)
    return f"{m}:{s:02d}"

def group_timestamps(timestamps, gap_threshold=15.0):
    if not timestamps:
        return []
    timestamps = sorted(timestamps)
    ranges = []
    start = timestamps[0]
    end = timestamps[0]
    for t in timestamps[1:]:
        if t - end <= gap_threshold:
            end = t
        else:
            ranges.append((start, end))
            start = t
            end = t
    ranges.append((start, end))
    return ranges

def get_frame_idx_from_time(video_path, time_start, time_end, csv_dir):
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    csv_path = os.path.join(csv_dir, f"{video_name}.csv")
    if not os.path.exists(csv_path):
        return None
    try:
        df = pd.read_csv(csv_path)
        if not all(col in df.columns for col in ['pts_time', 'frame_idx']):
            return None
        start_sec = int(time_start.split(":")[0]) * 60 + int(time_start.split(":")[1])
        end_sec = int(time_end.split(":")[0]) * 60 + int(time_end.split(":")[1])
        target_time = (start_sec + end_sec) / 2
        df['time_diff'] = abs(df['pts_time'] - target_time)
        closest_row = df.loc[df['time_diff'].idxmin()]
        return int(closest_row['frame_idx'])
    except:
        return None

def keyframe_path_from_frame_idx(video_path, frame_idx, csv_dir, frame_root):
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    csv_path = os.path.join(csv_dir, f"{video_name}.csv")
    dir_path = os.path.join(frame_root, video_name)

    if not os.path.exists(csv_path):
        return None

    try:
        df = pd.read_csv(csv_path)
        if not all(c in df.columns for c in ["n", "frame_idx"]):
            return None

        # Tìm giá trị gần nhất của frame_idx
        idx = (df["frame_idx"] - int(frame_idx)).abs().idxmin()
        n_val = int(df.loc[idx, "n"])

        # Tạo danh sách tên ảnh có thể có
        candidates = []
        for stem in [n_val, frame_idx, f"{n_val:03d}", f"{frame_idx:03d}", f"{n_val:04d}", f"{frame_idx:04d}"]:
            for ext in ("jpg", "jpeg", "png"):
                candidates.append(os.path.join(dir_path, f"{stem}.{ext}"))

        for p in candidates:
            if os.path.exists(p):
                return p
    except Exception as e:
        print(f"❌ Lỗi đọc CSV hoặc tìm ảnh: {e}")
        return None

    return None


# === Hàm chính ===
def search_videos_by_text(text_query, top_k=20, gap_threshold=15.0):
    text_en = GoogleTranslator(source='vi', target='en').translate(text_query)

    dense_emb = clip_embedding._get_query_embedding(text_en)

    results = milvus_client.search(
        collection_name=COLLECTION_NAME,
        data=[dense_emb],
        limit=top_k,
        output_fields=["frame_path", "video_id"]
    )

    frame_paths = []
    video_ids = set()
    for hit in results[0]:
        entity = hit["entity"]
        frame_path = entity.get("frame_path")
        video_id = entity.get("video_id")
        if frame_path:
            frame_paths.append(frame_path)
        video_ids.add(video_id)

    output = []
    if frame_paths:
        cur.execute(f"""
            SELECT v.video_path, v.title, fm.pts_time, fm.frame_path
            FROM frame_mappings fm
            JOIN videos v ON fm.video_id = v.id
            WHERE fm.frame_path = ANY(%s)
        """, (frame_paths,))
        output.extend(cur.fetchall())

    for vid in video_ids:
        cur.execute("""
            SELECT video_path, title, NULL, NULL
            FROM videos
            WHERE video_path LIKE %s
        """, (f"%{vid}%",))
        row = cur.fetchone()
        if row:
            output.append(row)

    grouped_results = {}
    for video_path, title, pts_time, frame_path in output:
        if video_path not in grouped_results:
            grouped_results[video_path] = {"title": title, "timestamps": [], "frames": []}
        if pts_time is not None:
            grouped_results[video_path]["timestamps"].append((pts_time, frame_path))

    final_results = []
    for video_path, data in grouped_results.items():
        time_groups = group_timestamps([t[0] for t in data["timestamps"]], gap_threshold)
        ranges = []
        for start, end in time_groups:
            ranges.append((format_time(start), format_time(end)))
        final_results.append({
            "video_path": video_path,
            "title": data["title"],
            "time_ranges": ranges
        })

    return final_results

def format_search_results(search_results, csv_dir):
    grouped_results = {}
    for res in search_results:
        video_path = res['video_path']
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        if video_path not in grouped_results:
            grouped_results[video_path] = (video_name, [])

        for time_start, time_end in res['time_ranges']:
            frame_idx = get_frame_idx_from_time(video_path, time_start, time_end, csv_dir)
            if frame_idx is not None:
                grouped_results[video_path][1].append((frame_idx, time_start, time_end))

    return grouped_results

In [48]:
# === Chạy demo ===
if __name__ == "__main__":
    text_vi = "ếch"
    search_results = search_videos_by_text(text_vi, top_k=20, gap_threshold=15.0)
    results = format_search_results(search_results, csv_dir=CSV_DIR)

    print(f"\nKết quả tìm kiếm cho: \"{text_vi}\"")
    for video_path, (video_name, frames) in results.items():
        print(f"\nVideo: {video_name}")
        print(f"Path: {video_path}")
        for frame_idx, time_start, time_end in frames:
            img_path = keyframe_path_from_frame_idx(
                video_path=video_path,
                frame_idx=frame_idx,
                csv_dir=CSV_DIR,
                frame_root=FRAME_DIR
            )
            img_path_str = img_path if img_path else "⚠️ Không tìm thấy ảnh"
            print(f"Frame Idx: {frame_idx} -- Xuất hiện từ {time_start} đến {time_end} -- Path: {img_path_str}")



Kết quả tìm kiếm cho: "ếch"

Video: L26_V001
Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/video/L26_V001.mp4
Frame Idx: 224 -- Xuất hiện từ 0:08 đến 0:08 -- Path: D:\Big_project_2025\Video_Similarity_Search\data_hybrid\keyframes\L26_V001\009.jpg
Frame Idx: 1929 -- Xuất hiện từ 1:14 đến 1:20 -- Path: D:\Big_project_2025\Video_Similarity_Search\data_hybrid\keyframes\L26_V001\035.jpg
Frame Idx: 2716 -- Xuất hiện từ 1:41 đến 1:58 -- Path: D:\Big_project_2025\Video_Similarity_Search\data_hybrid\keyframes\L26_V001\054.jpg
Frame Idx: 4050 -- Xuất hiện từ 2:42 đến 2:42 -- Path: D:\Big_project_2025\Video_Similarity_Search\data_hybrid\keyframes\L26_V001\077.jpg
Frame Idx: 5199 -- Xuất hiện từ 3:27 đến 3:27 -- Path: D:\Big_project_2025\Video_Similarity_Search\data_hybrid\keyframes\L26_V001\096.jpg

Video: L26_V002
Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/video/L26_V002.mp4
Frame Idx: 135 -- Xuất hiện từ 0:03 đến 0:08 -- Path: D:\Big_project_2025\Video_Simil

# ê

In [55]:
from deep_translator import GoogleTranslator
from pymilvus import Collection

# =========================
# Hàm dịch tiếng Việt -> Anh
# =========================
def preprocess_text(text, src="vi", tgt="en"):
    try:
        return GoogleTranslator(source=src, target=tgt).translate(text)
    except Exception as e:
        print(f"❌ Translate error: {e}")
        return text

# =========================
# Hàm tìm kiếm hybrid
# =========================
def search_videos_hybrid(query_text, top_k=10):
    """
    Tìm kiếm hybrid (CLIP + BM25) trong Milvus + Postgres
    Format output gọn đẹp, gom theo video
    """
    try:
        # Dịch query sang tiếng Anh cho CLIP
        query_text_en = preprocess_text(query_text, src="vi", tgt="en")

        # Encode query
        dense_emb = clip_embedding._get_text_embedding(query_text_en)

        # Chuẩn bị collection
        collection = Collection(COLLECTION_NAME)
        collection.load()

        # Search vector
        results = collection.search(
            data=[dense_emb],
            anns_field="vector",
            param={"metric_type": "IP", "params": {"ef": 64}},
            limit=top_k,
            output_fields=["frame_path", "video_id"]
        )

        # Gom kết quả
        output = {}
        for hit in results[0]:
            frame_path = hit.entity.get("frame_path", "")
            video_id = hit.entity.get("video_id", "")

            # Nếu là metadata (không phải frame) thì bỏ qua
            if not frame_path:
                continue

            # Frame index
            try:
                frame_idx = int(os.path.splitext(os.path.basename(frame_path))[0])
            except:
                frame_idx = -1

            # Lấy pts_time từ Postgres
            cur.execute("""
                SELECT v.video_path, f.pts_time, f.fps
                FROM frame_mappings f
                JOIN videos v ON f.video_id = v.id
                WHERE f.frame_path = %s
            """, (frame_path,))
            row = cur.fetchone()
            if not row:
                continue

            video_path, pts_time, fps = row

            # Gom theo video
            if video_id not in output:
                output[video_id] = {
                    "video_path": video_path,
                    "frames": []
                }

            time_str = format_time(pts_time)
            output[video_id]["frames"].append({
                "frame_idx": frame_idx,
                "time_start": time_str,
                "time_end": time_str,
                "frame_path": frame_path
            })

        # In kết quả
        print(f'Kết quả tìm kiếm cho: "{query_text}"\n')
        for video, data in output.items():
            print(f"Video: {video}")
            print(f"Path: {data['video_path']}")
            for f in data["frames"]:
                print(f"Frame Idx: {f['frame_idx']} -- Xuất hiện từ {f['time_start']} đến {f['time_end']} -- Path: {f['frame_path']}")
            print()

        return output

    except Exception as e:
        print(f"❌ Search error: {e}")
        return {}


In [57]:
search_videos_hybrid("thịt gà", top_k=10)


Kết quả tìm kiếm cho: "thịt gà"

Video: L26_V001
Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/video/L26_V001.mp4
Frame Idx: 37 -- Xuất hiện từ 1:20 đến 1:20 -- Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/037.jpg
Frame Idx: 112 -- Xuất hiện từ 4:03 đến 4:03 -- Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/112.jpg
Frame Idx: 96 -- Xuất hiện từ 3:27 đến 3:27 -- Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/096.jpg
Frame Idx: 9 -- Xuất hiện từ 0:08 đến 0:08 -- Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/009.jpg

Video: L26_V002
Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/video/L26_V002.mp4
Frame Idx: 95 -- Xuất hiện từ 3:37 đến 3:37 -- Path: D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V002/095.jpg
Frame Idx: 9 -- Xuất hiện từ 0:08 đến 0:08 -- Path: D:/Big_project_2025/Video_Similarity_S

{'L26_V001': {'video_path': 'D:/Big_project_2025/Video_Similarity_Search/data_hybrid/video/L26_V001.mp4',
  'frames': [{'frame_idx': 37,
    'time_start': '1:20',
    'time_end': '1:20',
    'frame_path': 'D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/037.jpg'},
   {'frame_idx': 112,
    'time_start': '4:03',
    'time_end': '4:03',
    'frame_path': 'D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/112.jpg'},
   {'frame_idx': 96,
    'time_start': '3:27',
    'time_end': '3:27',
    'frame_path': 'D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/096.jpg'},
   {'frame_idx': 9,
    'time_start': '0:08',
    'time_end': '0:08',
    'frame_path': 'D:/Big_project_2025/Video_Similarity_Search/data_hybrid/keyframes/L26_V001/009.jpg'}]},
 'L26_V002': {'video_path': 'D:/Big_project_2025/Video_Similarity_Search/data_hybrid/video/L26_V002.mp4',
  'frames': [{'frame_idx': 95,
    'time_start': '3:37',
    'time_end':