# Video Frame Search System with BLIP & Pinecone

This notebook sets up a complete video semantic search engine that:
- Extracts frames from videos
- Generates captions using BLIP
- Stores embeddings in Pinecone
- Enables natural language search

---


 ## Step 1: Setup - Clone Repository & Install Dependencies



In [55]:
# Clone the repository
!git clone https://github.com/pranavacchu/capstone-BLIP.git
%cd capstone-BLIP

# Install dependencies
print("📦 Installing dependencies... This will take 3-5 minutes")
!pip install -q opencv-python-headless pillow numpy pandas tqdm python-dotenv
!pip install -q torch torchvision transformers sentence-transformers
!pip install -q pinecone FlagEmbedding

print("\n✅ Installation complete!")

# Check GPU availability
import torch
if torch.cuda.is_available():
    print(f"\n🚀 GPU detected: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("\n⚠️ No GPU detected. Using CPU (slower but works)")

Cloning into 'capstone-BLIP'...
remote: Enumerating objects: 62, done.[K
remote: Counting objects: 100% (62/62), done.[K
remote: Compressing objects: 100% (46/46), done.[K
remote: Total 62 (delta 27), reused 51 (delta 16), pack-reused 0 (from 0)[K
Receiving objects: 100% (62/62), 94.06 KiB | 6.72 MiB/s, done.
Resolving deltas: 100% (27/27), done.
/content/capstone-BLIP/capstone-BLIP/capstone-BLIP/capstone-BLIP/capstone-BLIP/capstone-BLIP
📦 Installing dependencies... This will take 3-5 minutes

✅ Installation complete!

🚀 GPU detected: Tesla T4
   Memory: 15.8 GB


In [56]:
#!/usr/bin/env python3
"""
Hotfix script to fix known issues in the video search system
Run this in Colab after cloning the repository
"""

print("🔧 Applying hotfixes...")

# Hotfix 1: Add deduplicate_embeddings method to TextEmbeddingGenerator
print("   - Adding deduplicate_embeddings method...")

with open('embedding_generator.py', 'r') as f:
    content = f.read()

if 'def deduplicate_embeddings' not in content:
    marker = '    def get_embedding_statistics'
    if marker in content:
        dedupe_method = '''    def deduplicate_embeddings(self,
                              embedded_frames: List[EmbeddedFrame],
                              similarity_threshold: float = 0.95) -> List[EmbeddedFrame]:
        """
        Remove duplicate embeddings based on similarity threshold

        Args:
            embedded_frames: List of EmbeddedFrame objects
            similarity_threshold: Minimum similarity to consider as duplicate (0.0 to 1.0)

        Returns:
            List of unique EmbeddedFrame objects
        """
        if not embedded_frames:
            return []

        if len(embedded_frames) <= 1:
            return embedded_frames

        logger.info(f"Deduplicating {len(embedded_frames)} embeddings with threshold {similarity_threshold}")

        # Convert to numpy array for efficient computation
        embeddings = np.array([ef.embedding for ef in embedded_frames])

        # Track which embeddings to keep
        keep_mask = np.ones(len(embedded_frames), dtype=bool)

        # Compare each embedding with subsequent ones
        for i in range(len(embeddings)):
            if not keep_mask[i]:
                continue

            # Compute similarity with all subsequent embeddings
            for j in range(i + 1, len(embeddings)):
                if not keep_mask[j]:
                    continue

                # Compute cosine similarity
                if self.normalize:
                    # If normalized, use dot product
                    similarity = np.dot(embeddings[i], embeddings[j])
                else:
                    # Compute cosine similarity manually
                    similarity = np.dot(embeddings[i], embeddings[j]) / (
                        np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[j])
                    )

                # Mark as duplicate if similarity exceeds threshold
                if similarity >= similarity_threshold:
                    keep_mask[j] = False

        # Filter embeddings based on keep mask
        unique_frames = [ef for ef, keep in zip(embedded_frames, keep_mask) if keep]

        removed_count = len(embedded_frames) - len(unique_frames)
        logger.info(f"Removed {removed_count} duplicate embeddings, kept {len(unique_frames)} unique")

        return unique_frames

'''
        content = content.replace(marker, dedupe_method + marker)

        with open('embedding_generator.py', 'w') as f:
            f.write(content)
        print("   ✓ Added deduplicate_embeddings method")
    else:
        print("   ⚠ Could not find insertion point")
else:
    print("   ✓ deduplicate_embeddings already exists")

# Hotfix 2: Fix Grounding DINO dtype mismatch
print("   - Fixing Grounding DINO dtype mismatch...")

with open('object_detector.py', 'r') as f:
    content = f.read()

if 'model_dtype = next(self.model.parameters()).dtype' not in content:
    old_code = '''            inputs = {k: v.to(self.device) for k, v in inputs.items()}

            # Run detection
            with torch.no_grad():
                outputs = self.model(**inputs)'''

    new_code = '''            # Ensure dtype matches model weights (fp16 on CUDA)
            model_dtype = next(self.model.parameters()).dtype
            casted_inputs = {}
            for k, v in inputs.items():
                v = v.to(self.device)
                if hasattr(v, 'dtype') and v.dtype.is_floating_point:
                    v = v.to(model_dtype)
                casted_inputs[k] = v

            # Run detection
            with torch.no_grad():
                outputs = self.model(**casted_inputs)'''

    if old_code in content:
        content = content.replace(old_code, new_code)
        with open('object_detector.py', 'w') as f:
            f.write(content)
        print("   ✓ Fixed Grounding DINO dtype mismatch")
    else:
        print("   ⚠ Could not find code to replace in object_detector.py")
else:
    print("   ✓ Grounding DINO dtype fix already applied")

print("\n✅ Hotfixes applied successfully!")
print("   You can now proceed with video processing")


🔧 Applying hotfixes...
   - Adding deduplicate_embeddings method...
   ✓ deduplicate_embeddings already exists
   - Fixing Grounding DINO dtype mismatch...
   ✓ Grounding DINO dtype fix already applied

✅ Hotfixes applied successfully!
   You can now proceed with video processing


In [57]:
# === RELOAD MODULES AFTER HOTFIX ===
print("🔄 Reloading modules to apply hotfixes...")
import sys
for m in [
    'embedding_generator','object_detector','video_search_engine',
    'caption_generator','frame_extractor','pinecone_manager','object_caption_pipeline'
]:
    if m in sys.modules:
        del sys.modules[m]
print("✅ Module cache cleared. Proceed to Step 2.")

🔄 Reloading modules to apply hotfixes...
✅ Module cache cleared. Proceed to Step 2.


In [58]:
# Install Grounding DINO dependencies
print("Installing Grounding DINO and dependencies...")
print("This may take 2-3 minutes...")

import subprocess
subprocess.run(['pip', 'install', '-q', 'timm'], check=False)
subprocess.run(['pip', 'install', '-q', 'supervision'], check=False)

print("\nGrounding DINO dependencies installed!")
print("Models will be downloaded automatically from Hugging Face on first use")

Installing Grounding DINO and dependencies...
This may take 2-3 minutes...

Grounding DINO dependencies installed!
Models will be downloaded automatically from Hugging Face on first use


## Step 2: Configure Pinecone API Key

Enter your Pinecone credentials below:
- **API Key**: Your Pinecone API key
- **Index Host**: Your index URL (from Pinecone dashboard)

Your current settings:
```
API Key: pcsk_51Fgoo_2S9NQf4CHi8LMpX7AXKv4TEHgRdXR3huZcCwBdJkr7BMvmdGHeRASrk5hkz4AH1
Host: https://capstone-b5a0x4x.svc.aped-4627-b74a.pinecone.io
```

In [59]:
import os

# Set your Pinecone credentials
PINECONE_API_KEY = "pcsk_51Fgoo_2S9NQf4CHi8LMpX7AXKv4TEHgRdXR3huZcCwBdJkr7BMvmdGHeRASrk5hkz4AH1"
PINECONE_HOST = "https://capstone-b5a0x4x.svc.aped-4627-b74a.pinecone.io"
PINECONE_ENVIRONMENT = "us-east-1"

# Write to .env file
with open('.env', 'w') as f:
    f.write(f"PINECONE_API_KEY={PINECONE_API_KEY}\n")
    f.write(f"PINECONE_HOST={PINECONE_HOST}\n")
    f.write(f"PINECONE_ENVIRONMENT={PINECONE_ENVIRONMENT}\n")

print("✅ Configuration saved!")

✅ Configuration saved!


##  Step 3: Test Connection to Pinecone



In [60]:
from video_search_engine import VideoSearchEngine

print("🔌 Connecting to Pinecone...")
engine = VideoSearchEngine()

# Get database stats
stats = engine.get_index_stats()

print("\n✅ Successfully connected to Pinecone!")
print(f"\n📊 Database Statistics:")
print(f"   Index: capstone")
print(f"   Total vectors: {stats.get('total_vectors', 0):,}")
print(f"   Dimension: {stats.get('dimension', 1024)}")
print(f"   Capacity: Serverless")

🔌 Connecting to Pinecone...

✅ Successfully connected to Pinecone!

📊 Database Statistics:
   Index: capstone
   Total vectors: 35
   Dimension: 1024
   Capacity: Serverless


## Step 4: Upload a Video File



In [61]:
from google.colab import files
import os
import subprocess
from urllib.parse import urlparse, parse_qs

print("📤 Choose how to get your video:\n")
print("1. Upload from computer (recommended for small files < 100MB)")
print("2. Download from URL (direct video file)")
print("3. Download from YouTube URL\n")

choice = input("Enter choice (1/2/3): ").strip()
video_path = None

if choice == "1":
    print("\n📁 Please select your video file...")
    uploaded = files.upload()
    if uploaded:
        video_path = list(uploaded.keys())[0]
        print(f"✅ Uploaded: {video_path}")
    else:
        print("❌ No file uploaded")

elif choice == "2":
    video_url = input("\nEnter video URL (direct link to .mp4, .avi, etc.): ").strip()

    if not video_url:
        print("❌ No URL provided")
    else:
        # Extract filename from URL or use default
        parsed_url = urlparse(video_url)
        url_filename = os.path.basename(parsed_url.path)

        # Use URL filename if it has an extension, otherwise use default
        if url_filename and '.' in url_filename:
            video_filename = url_filename
        else:
            video_filename = "downloaded_video.mp4"

        print(f"⬇️ Downloading from URL...")
        print(f"   Target file: {video_filename}")

        try:
            # Use subprocess for better control
            result = subprocess.run(
                ['wget', '-O', video_filename, video_url, '--no-check-certificate', '-q', '--show-progress'],
                capture_output=True,
                text=True,
                timeout=300
            )

            if result.returncode == 0 and os.path.exists(video_filename):
                if os.path.getsize(video_filename) > 0:
                    video_path = video_filename
                    print(f"✅ Downloaded successfully: {video_filename}")
                else:
                    print(f"❌ Download failed: File is empty")
                    if os.path.exists(video_filename):
                        os.remove(video_filename)
            else:
                print(f"❌ Download failed: wget returned code {result.returncode}")
                # Try alternative method with curl
                print("\n🔄 Trying alternative download method (curl)...")
                result2 = subprocess.run(
                    ['curl', '-L', '-o', video_filename, video_url, '--silent', '--show-error'],
                    capture_output=True,
                    text=True,
                    timeout=300
                )

                if result2.returncode == 0 and os.path.exists(video_filename) and os.path.getsize(video_filename) > 0:
                    video_path = video_filename
                    print(f"✅ Downloaded successfully with curl: {video_filename}")
                else:
                    print(f"❌ Alternative download also failed")
                    print("   Please check if the URL is accessible and try again")

        except subprocess.TimeoutExpired:
            print("❌ Download timed out (>5 minutes). File may be too large.")
        except Exception as e:
            print(f"❌ Download error: {e}")

elif choice == "3":
    youtube_url = input("\nEnter YouTube URL (video or shorts): ").strip()

    if not youtube_url:
        print("❌ No URL provided")
    else:
        print("⬇️ Downloading from YouTube...")
        print("   Installing yt-dlp (if needed)...")

        # Install yt-dlp if not present
        subprocess.run(['pip', 'install', '-q', 'yt-dlp'], check=False)

        video_filename = "youtube_video.mp4"

        try:
            print(f"   Fetching video info...")

            # Download with yt-dlp
            result = subprocess.run(
                [
                    'yt-dlp',
                    '-f', 'best[ext=mp4]/best',  # Best quality MP4
                    '-o', video_filename,
                    '--no-playlist',
                    '--quiet',
                    '--progress',
                    youtube_url
                ],
                capture_output=True,
                text=True,
                timeout=600  # 10 minute timeout for YouTube
            )

            if result.returncode == 0 and os.path.exists(video_filename):
                if os.path.getsize(video_filename) > 0:
                    video_path = video_filename
                    print(f"✅ Downloaded successfully: {video_filename}")
                else:
                    print(f"❌ Download failed: File is empty")
                    if os.path.exists(video_filename):
                        os.remove(video_filename)
            else:
                print(f"❌ YouTube download failed")
                if result.stderr:
                    print(f"   Error: {result.stderr[:300]}")
                print("\n💡 Troubleshooting tips:")
                print("   - Make sure the video is public and not age-restricted")
                print("   - Try using Option 1 to upload the video manually")
                print("   - Check if the URL is correct")

        except subprocess.TimeoutExpired:
            print("❌ Download timed out (>10 minutes).")
        except Exception as e:
            print(f"❌ Download error: {e}")

else:
    print("⚠️ Invalid choice. Please choose option 1, 2, or 3.")

# Validate the video file
if video_path:
    if os.path.exists(video_path):
        file_size = os.path.getsize(video_path) / (1024*1024)  # MB
        print(f"\n📹 Video ready: {video_path} ({file_size:.1f} MB)")

        # Verify it's a valid video file
        import cv2
        cap = cv2.VideoCapture(video_path)
        if cap.isOpened():
            fps = cap.get(cv2.CAP_PROP_FPS)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            duration = frame_count / fps if fps > 0 else 0
            print(f"   Duration: {duration:.1f} seconds")
            print(f"   FPS: {fps:.1f}")
            print(f"   Total frames: {frame_count:,}")
            cap.release()
        else:
            print("\n⚠️ Warning: Unable to read video file. It may be corrupted.")
            print("   Please try a different video or URL.")
            video_path = None
    else:
        print(f"\n❌ Error: File not found at {video_path}")
        video_path = None

if not video_path:
    print("\n❌ No valid video file available. Please run this cell again.")

📤 Choose how to get your video:

1. Upload from computer (recommended for small files < 100MB)
2. Download from URL (direct video file)
3. Download from YouTube URL

Enter choice (1/2/3): 3

Enter YouTube URL (video or shorts): https://www.youtube.com/shorts/QhlroYnundk
⬇️ Downloading from YouTube...
   Installing yt-dlp (if needed)...
   Fetching video info...
✅ Downloaded successfully: youtube_video.mp4

📹 Video ready: youtube_video.mp4 (2.2 MB)
   Duration: 7.6 seconds
   FPS: 30.0
   Total frames: 228


In [62]:
print("Choose your captioning method:\n")
print("1. Standard BLIP (faster, general scene captions)")
print("2. Object Detection + BLIP (slower, object-focused)")
print()

method_choice = input("Enter choice (1/2, default=1): ").strip() or "1"
use_object_detection = (method_choice == "2")

if use_object_detection:
    print("\nUsing Object Detection + BLIP pipeline")
    print("   Detects objects: bags, laptops, helmets, phones, etc.")
else:
    print("\nUsing Standard BLIP captioning")

Choose your captioning method:

1. Standard BLIP (faster, general scene captions)
2. Object Detection + BLIP (slower, object-focused)

Enter choice (1/2, default=1): 2

Using Object Detection + BLIP pipeline
   Detects objects: bags, laptops, helmets, phones, etc.


## Step 5: Process the Video

This will:
1. Extract frames from the video (removing redundant frames)
2. Generate captions using BLIP AI model
3. Create embeddings for semantic search
4. Upload to Pinecone database

**Expected time:**
- 1 minute video: ~2-3 minutes with GPU
- 5 minute video: ~8-10 minutes with GPU
- CPU mode: 3-5x slower

In [63]:
import time
from datetime import datetime

if 'video_path' not in locals() or not video_path:
    print("❌ Please upload a video first (run the previous cell)")
else:
    # Set video name
    video_name = input("Enter a name for this video (or press Enter for auto-name): ").strip()
    if not video_name:
        video_name = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

    print(f"\n🎬 Processing video: {video_name}")
    print("⏳ This will take a few minutes... Please wait.\n")
    print("=" * 60)

    start_time = time.time()

    try:
        # Process the video
        stats = engine.process_video(
            video_path=video_path,
            video_name=video_name,
            save_frames=False,  # Set to True to save frames
            upload_to_pinecone=True,
            use_object_detection=use_object_detection  # ADD THIS LINE
        )

        processing_time = time.time() - start_time

        print("\n" + "=" * 60)
        print("\n✅ VIDEO PROCESSING COMPLETE!\n")
        print(f"📊 Processing Statistics:")
        print(f"   Video name: {video_name}")
        print(f"   Frames extracted: {stats['total_frames_extracted']:,}")
        print(f"   Frames with captions: {stats['frames_with_captions']:,}")
        print(f"   Captions before dedupe: {stats.get('captions_before_dedupe', stats['frames_with_captions']):,}")
        print(f"   Unique embeddings: {stats.get('embeddings_generated', 0):,}")
        print(f"   ✅ Actually uploaded: {stats['embeddings_uploaded']:,}")
        print(f"   Processing time: {processing_time/60:.1f} minutes")
        print(f"\n   Frame reduction: {stats.get('frame_reduction_percent', 0):.1f}%")

        # Save video_name for next steps
        processed_video_name = video_name

    except Exception as e:
        print(f"\n❌ Error processing video: {e}")
        print("\nTroubleshooting tips:")
        print("- If GPU memory error: Restart runtime and try again")
        print("- If video format error: Convert video to MP4 format")

Enter a name for this video (or press Enter for auto-name): test4

🎬 Processing video: test4
⏳ This will take a few minutes... Please wait.



Extracting frames: 100%|██████████| 228/228 [00:01<00:00, 142.93it/s]
Processing frames:   0%|          | 0/5 [00:00<?, ?it/s]ERROR:object_detector:Error during object detection: mat1 and mat2 must have the same dtype, but got Float and Half
ERROR:object_detector:Float32 retry failed: GroundingDinoProcessor.post_process_grounded_object_detection() got an unexpected keyword argument 'box_threshold'
Processing frames:  20%|██        | 1/5 [00:01<00:04,  1.12s/it]ERROR:object_detector:Error during object detection: GroundingDinoProcessor.post_process_grounded_object_detection() got an unexpected keyword argument 'box_threshold'
Processing frames:  40%|████      | 2/5 [00:01<00:02,  1.05it/s]ERROR:object_detector:Error during object detection: GroundingDinoProcessor.post_process_grounded_object_detection() got an unexpected keyword argument 'box_threshold'
Processing frames:  60%|██████    | 3/5 [00:02<00:01,  1.12it/s]ERROR:object_detector:Error during object detection: GroundingDinoProce

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Uploading to Pinecone: 100%|██████████| 1/1 [00:00<00:00,  1.39it/s]




✅ VIDEO PROCESSING COMPLETE!

📊 Processing Statistics:
   Video name: test4
   Frames extracted: 5
   Frames with captions: 5
   Captions before dedupe: 5
   Unique embeddings: 3
   ✅ Actually uploaded: 3
   Processing time: 0.3 minutes

   Frame reduction: 0.0%


## Step 6: Search Your Video!

Now you can search for content using natural language queries.

**Example queries:**
- "person walking"
- "black bag"
- "someone talking on phone"
- "car driving"
- "red shirt"

The system will return timestamps where that content appears!

In [40]:
# Single search query
query = input("🔍 Enter your search query: ")

print(f"\nSearching for: '{query}'...")
print("=" * 60)

results = engine.search(
    query=query,
    top_k=5,
    similarity_threshold=0.5
)

if results:
    print(f"\n✅ Found {len(results)} results:\n")

    for i, result in enumerate(results, 1):
        print(f"{i}. ⏱️ Timestamp: {result['time_formatted']}")
        print(f"   📝 Caption: {result['caption']}")
        print(f"   📊 Confidence: {result['similarity_score']:.1%}")
        print(f"   🎥 Video: {result['video_name']}")
        print()
else:
    print("\n❌ No results found. Try:")
    print("   - Different search terms")
    print("   - More general queries")
    print("   - Lowering the similarity threshold")

🔍 Enter your search query: white

Searching for: 'white'...

✅ Found 5 results:

1. ⏱️ Timestamp: 00:00.03
   📝 Caption: A white tiled floor.
   📊 Confidence: 68.4%
   🎥 Video: test

2. ⏱️ Timestamp: 00:19.55
   📝 Caption: This is a person.
   📊 Confidence: 61.8%
   🎥 Video: bag_ad2

3. ⏱️ Timestamp: 00:00.03
   📝 Caption: A black and white photo of a woman in a white dress.
   📊 Confidence: 54.6%
   🎥 Video: bag_ad2

4. ⏱️ Timestamp: 00:11.51
   📝 Caption: The inside of a bag with a pattern on it.
   📊 Confidence: 54.5%
   🎥 Video: bag_ad2

5. ⏱️ Timestamp: 00:15.95
   📝 Caption: A purse with a handle.
   📊 Confidence: 53.8%
   🎥 Video: bag_ad2



## Step 7: Batch Search (Multiple Queries)

Search for multiple things at once!

In [41]:
# Define multiple queries
queries = [
    "person walking",
    "someone sitting",
    "black bag",
    "outdoor scene",
    "person talking"
]

print("🔍 Running batch search...\n")
print("=" * 60)

batch_results = engine.batch_search(queries, top_k=3)

for query, results in batch_results.items():
    print(f"\n📌 Query: '{query}'")
    print(f"   Found {len(results)} results")

    if results:
        for result in results[:2]:  # Show top 2
            print(f"   └─ {result['time_formatted']} - {result['caption'][:50]}... ({result['similarity_score']:.0%})")
    else:
        print("   └─ No results")

print("\n" + "=" * 60)

🔍 Running batch search...


📌 Query: 'person walking'
   Found 3 results
   └─ 00:19.55 - This is a person.... (82%)
   └─ 00:04.80 - Two people walking in front of a large building.... (69%)

📌 Query: 'someone sitting'
   Found 3 results
   └─ 00:19.55 - This is a person.... (84%)
   └─ 00:16.08 - The person is holding a guitar.... (70%)

📌 Query: 'black bag'
   Found 3 results
   └─ 00:11.51 - The inside of a bag with a pattern on it.... (75%)
   └─ 00:15.95 - A purse with a handle.... (73%)

📌 Query: 'outdoor scene'
   Found 3 results
   └─ 00:19.55 - This is a person.... (72%)
   └─ 00:33.07 - A man is seen in this surveillance image from a su... (68%)

📌 Query: 'person talking'
   Found 3 results
   └─ 00:19.55 - This is a person.... (80%)
   └─ 00:16.08 - The person is holding a guitar.... (63%)



## Step 8: Advanced Search with Filters

Search with additional filters:
- Filter by specific video
- Search within time range
- Adjust confidence threshold

In [42]:
# Advanced search example
query = input("Enter search query: ")

# Optional: Filter by time window (in seconds)
use_time_filter = input("Filter by time range? (y/n): ").lower() == 'y'

time_window = None
if use_time_filter:
    start_time = float(input("Start time (seconds): "))
    end_time = float(input("End time (seconds): "))
    time_window = (start_time, end_time)

# Optional: Filter by video name
video_filter = None
if 'processed_video_name' in locals():
    filter_video = input(f"Search only in '{processed_video_name}'? (y/n): ").lower() == 'y'
    if filter_video:
        video_filter = processed_video_name

# Perform search
print(f"\n🔍 Searching with filters...")
results = engine.search(
    query=query,
    top_k=10,
    similarity_threshold=0.4,  # Lower threshold for more results
    video_filter=video_filter,
    time_window=time_window
)

print(f"\n✅ Found {len(results)} results:\n")
for i, result in enumerate(results, 1):
    print(f"{i}. {result['time_formatted']} - {result['caption'][:60]}... ({result['similarity_score']:.1%})")

Enter search query: white


KeyboardInterrupt: Interrupted by user

## Step 9: Interactive Search Interface


In [None]:
print("🎯 INTERACTIVE VIDEO SEARCH")
print("=" * 60)
print("Enter your search queries (type 'quit' to exit)\n")

while True:
    query = input("\n🔍 Search: ").strip()

    if query.lower() in ['quit', 'exit', 'q']:
        print("\n👋 Goodbye!")
        break

    if not query:
        continue

    results = engine.search(query, top_k=5)

    if results:
        print(f"\n✅ Found {len(results)} results:")
        for i, result in enumerate(results, 1):
            score_emoji = "🟢" if result['similarity_score'] > 0.7 else "🟡" if result['similarity_score'] > 0.5 else "🟠"
            print(f"\n{i}. {score_emoji} {result['time_formatted']} ({result['similarity_score']:.0%})")
            print(f"   {result['caption']}")
    else:
        print("\n❌ No results found. Try a different query.")