In [None]:
# ============================================================================
# STEP 1: Check CLIP Model Availability
# ============================================================================
# Only tries the known compatible model for image embeddings

print('Checking available CLIP model candidates...')
from embedding_generator import load_image_model

candidates = [
    'clip-ViT-B-32',  # Known compatible model
]

image_model = load_image_model(candidates)
if image_model:
    print("‚úÖ Successfully loaded an image model.")
else:
    print("‚ùå No compatible image model could be loaded. Please check your configuration.")

In [None]:
# ============================================================================
# STEP 2: Set CLIP Model & Runtime Config Overrides
# ============================================================================

from video_search_config import Config

# Use a CLIP model id known to work for image embeddings with sentence-transformers.
# Recommended: 'clip-ViT-B-32' (do NOT use 'openai/clip-vit-base-patch32')
Config.CLIP_MODEL_NAME = 'clip-ViT-B-32'
print('Notebook override: Config.CLIP_MODEL_NAME =', Config.CLIP_MODEL_NAME)

# STEP 3: Install Dependencies & Clone Repository
# This section handles repo setup and dependency installation.

In [None]:
# ============================================================================
# STEP 3a: Clone Repository & Install Dependencies
# ============================================================================

# Commented out IPython magic to ensure Python compatibility.
# Clone the repository
# !git clone -b image-embedding https://github.com/pranavacchu/capstone-BLIP.git
# %cd capstone-BLIP

# Install dependencies
print("üì¶ Installing dependencies... This will take 3-5 minutes")
# !pip install -q opencv-python-headless pillow numpy pandas tqdm python-dotenv
# !pip install -q torch torchvision transformers sentence-transformers
# !pip install -q pinecone FlagEmbedding

print("\n‚úÖ Installation complete!")

# Check GPU availability
import torch

if torch.cuda.is_available():
    print(f"\nüöÄ GPU detected: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("\n‚ö†Ô∏è No GPU detected. Using CPU (slower but works)")

In [None]:
# ============================================================================
# STEP 3b (Optional): Install Object Detection Dependencies (Grounding DINO)
# ============================================================================
# Skip this if you don't need object detection; standard BLIP captioning is faster.

print("Installing Grounding DINO and dependencies...")
print("This may take 2-3 minutes...")
import subprocess

# !pip install -q supervision timm
# subprocess.run(['pip', 'install', '-q', 'timm'], check=False)
# subprocess.run(['pip', 'install', '-q', 'supervision'], check=False)

print("\nGrounding DINO dependencies installed!")
print("Models will be downloaded automatically from Hugging Face on first use")

# STEP 4: Configure Pinecone & Environment
# Set your Pinecone API key and connection details here.

# STEP 4a: Set Pinecone API Credentials

In [None]:
# ============================================================================
# STEP 4a (cont'd): Configure Pinecone
# ============================================================================

# Set your Pinecone credentials here (or load from .env)
PINECONE_API_KEY = "your_api_key_here"
PINECONE_HOST = "https://your-host.pinecone.io"
PINECONE_ENVIRONMENT = "us-east-1"

# Write to .env file
with open('.env', 'w') as f:
    f.write(f"PINECONE_API_KEY={PINECONE_API_KEY}\n")
    f.write(f"PINECONE_HOST={PINECONE_HOST}\n")
    f.write(f"PINECONE_ENVIRONMENT={PINECONE_ENVIRONMENT}\n")

print("‚úÖ Configuration saved!")

Cloning into 'capstone-BLIP'...
remote: Enumerating objects: 136, done.[K
remote: Counting objects: 100% (136/136), done.[K
remote: Compressing objects: 100% (96/96), done.[K
remote: Total 136 (delta 83), reused 93 (delta 40), pack-reused 0 (from 0)[K
Receiving objects: 100% (136/136), 189.59 KiB | 6.32 MiB/s, done.
Resolving deltas: 100% (83/83), done.
/content/capstone-BLIP/capstone-BLIP/capstone-BLIP/capstone-BLIP
üì¶ Installing dependencies... This will take 3-5 minutes

‚úÖ Installation complete!

üöÄ GPU detected: Tesla T4
   Memory: 15.8 GB


# STEP 5 (Optional): Start React API Server for Frontend Integration
# Skip if you only want to use the notebook. Keep running in background if using React UI.

In [None]:
# ============================================================================
# STEP 5a (Optional): Install & Start React API Server (FastAPI + ngrok)
# ============================================================================
# Only run if you want to use the React frontend integration.
# This cell will block and keep the server running. Run in a separate session if needed.

# !pip install -q fastapi uvicorn pyngrok nest-asyncio python-multipart

from pyngrok import ngrok

# Set your ngrok authtoken (get it from: https://dashboard.ngrok.com/get-started/your-authtoken)
NGROK_AUTH_TOKEN = "your_ngrok_token_here"
# ngrok.set_auth_token(NGROK_AUTH_TOKEN)
# print("‚úÖ ngrok authenticated successfully!")

# NOTE: Uncomment below and provide a real token to start the server
# For local development, you can skip this and just use the notebook directly.

print("‚è≠Ô∏è  Skipping React API server setup (optional step)")
print("   To enable: provide NGROK_AUTH_TOKEN and uncomment the above lines")

‚úÖ Configuration saved!


In [None]:
# ============================================================================
# STEP 6: Upload Video File
# ============================================================================
# Choose how to provide your video: upload from computer, URL, or YouTube

from google.colab import files
import os
import subprocess
from urllib.parse import urlparse, parse_qs

print("üì§ Choose how to get your video:\n")
print("1. Upload from computer (recommended for small files < 100MB)")
print("2. Download from URL (direct video file)")
print("3. Download from YouTube URL\n")

choice = input("Enter choice (1/2/3): ").strip()
video_path = None

if choice == "1":
    print("\nüìÅ Upload your video file...")
    uploaded = files.upload()
    for filename, file_data in uploaded.items():
        video_path = filename
        print(f"‚úÖ Uploaded: {video_path}")
        break

elif choice == "2":
    url = input("Enter video URL: ").strip()
    print(f"\n‚¨áÔ∏è  Downloading from {url}...")
    video_path = "downloaded_video.mp4"
    subprocess.run(['wget', '-q', url, '-O', video_path], check=False)
    if os.path.exists(video_path):
        print(f"‚úÖ Downloaded: {video_path}")
    else:
        print("‚ùå Failed to download")
        video_path = None

elif choice == "3":
    yt_url = input("Enter YouTube URL: ").strip()
    print(f"\n‚ñ∂Ô∏è  Downloading from YouTube...")
    # !pip install -q yt-dlp
    video_path = "youtube_video.mp4"
    # subprocess.run(['yt-dlp', '-f', 'best[ext=mp4]', yt_url, '-o', video_path], check=False)
    print(f"‚úÖ Downloaded: {video_path}")

else:
    print("Invalid choice")

# Validate the video file
if video_path:
    if os.path.exists(video_path):
        file_size = os.path.getsize(video_path) / (1024**2)
        print(f"\n‚úÖ Video file found: {video_path}")
        print(f"   Size: {file_size:.2f} MB")
    else:
        print(f"\n‚ùå File not found: {video_path}")
        video_path = None

if not video_path:
    print("\n‚ö†Ô∏è  No video file available. Please upload or download a video first.")

In [None]:
# =========================================================================
# STEP 7: Initialize Enhanced Components (Captioning, Embeddings, Temporal)
# =========================================================================

print("üîß INITIALIZING ENHANCED COMPONENTS")
print("=" * 70)

from caption_generator import BlipCaptionGenerator
from embedding_generator import TextEmbeddingGenerator, MultimodalEmbeddingGenerator
from temporal_bootstrapping import TemporalBootstrapper
from video_search_config import Config

# Default settings (can be customized here)
captioning_model = "base"  # or "large", "tiny", "object-focused"
# Enable dual embeddings (caption + image)
enable_dual_embeddings = True
# Enable temporal smoothing of confidences
enable_temporal_bootstrapping = True

# Enable multiple caption variants + CLIP reranking for better quality
generate_captions_variants = True
num_variants = 3

model_map = {
    "base": "Salesforce/blip-image-captioning-base",
    "large": "Salesforce/blip-image-captioning-large",
    "tiny": "Salesforce/blip-image-captioning-tiny",
    "object-focused": "Salesforce/blip-image-captioning-large"
}

# Initialize caption generator
logger_model = model_map.get(captioning_model, "Salesforce/blip-image-captioning-base")
print(f"\nüìù Caption generator: {captioning_model} ({logger_model})")

caption_gen = BlipCaptionGenerator(
    model_name=logger_model,
    model_type=captioning_model,
    batch_size=8,
    use_gpu=True,
    max_length=50,
    num_beams=4,
    generate_multiple_captions=generate_captions_variants,
    captions_per_frame=num_variants,
    compute_confidence=True,
    # Use CLIP-based reranking of caption variants against the image
    enable_clip_rerank=True,
    clip_rerank_model=Config.CLIP_MODEL_NAME
)
print("‚úÖ Caption generator ready")

# Initialize embedding generator
if enable_dual_embeddings:
    print(f"\nüñºÔ∏è  Multimodal embeddings enabled (caption + image)")
    embedding_gen = MultimodalEmbeddingGenerator(
        caption_model="intfloat/multilingual-e5-large",
        image_model=Config.CLIP_MODEL_NAME or "clip-ViT-B-32",
        batch_size=32,
        use_gpu=True
    )
else:
    print(f"\nüìÑ Text embeddings only")
    embedding_gen = TextEmbeddingGenerator(
        model_name="intfloat/multilingual-e5-large",
        batch_size=32,
        use_gpu=True
    )
print("‚úÖ Embedding generator ready")

# Initialize temporal bootstrapper
temporal_bootstrapper = None
if enable_temporal_bootstrapping:
    print(f"\n‚è±Ô∏è  Temporal bootstrapping enabled")
    temporal_bootstrapper = TemporalBootstrapper(
        temporal_window=5,
        confidence_threshold=0.5,
        consistency_weight=0.3,
        smoothing_sigma=1.5
    )
    print("‚úÖ Temporal bootstrapper ready")

print("\n" + "=" * 70)
print("‚úÖ All components initialized!")


In [None]:
# ============================================================================
# STEP 8: Process Video End-to-End
# ============================================================================
# This step:
# 1. Extracts frames from video (removing redundant ones)
# 2. Generates captions using BLIP
# 3. Creates embeddings (caption + image if enabled)
# 4. Computes confidence scores with temporal bootstrapping
# 5. Uploads to Pinecone for semantic search
#
# Expected time: 2-5 minutes per minute of video (with GPU)

from datetime import datetime
from video_search_engine import VideoSearchEngine

if 'video_path' not in locals() or not video_path:
    print("‚ùå No video file available. Please run STEP 6 first.")
else:
    print("\n" + "=" * 80)
    print("üé¨ VIDEO PROCESSING PIPELINE")
    print("=" * 80)
    print(f"Video file: {video_path}")
    print("=" * 80)

    # Initialize engine
    engine = VideoSearchEngine()

    # Get video name and date
    video_name = input("\nüìù Enter a name for this video (e.g., 'camera1_office'): ").strip() or "video_upload"
    video_date = input("üìÖ Enter video date (YYYY-MM-DD, default=today): ").strip() or datetime.today().strftime("%Y-%m-%d")

    # Force-enable object detection for object-focused captions
    use_object_detection = True
    print("\nüéØ Object detection + captioning is ENABLED by default for this run.")

    # Process video
    print("\n‚è≥ Processing video (this may take a few minutes)...")
    stats = engine.process_video(
        video_path=video_path,
        video_name=video_name,
        video_date=video_date,
        save_frames=False,
        upload_to_pinecone=True,
        use_object_detection=use_object_detection
    )

    print("\n" + "=" * 80)
    print("‚úÖ VIDEO PROCESSING COMPLETE")
    print("=" * 80)
    print(f"\nÔøΩ Processing Statistics:")
    print(f"   Total frames extracted: {stats.get('total_frames_extracted', 0)}")
    print(f"   Frames with captions: {stats.get('frames_with_captions', 0)}")
    print(f"   Embeddings uploaded: {stats.get('embeddings_uploaded', 0)}")
    print(f"   Processing time: {stats.get('processing_time_seconds', 0):.1f} seconds")
    print(f"   Frame reduction: {stats.get('frame_reduction_percent', 0):.1f}%")
    print("\n‚úÖ Your video is now indexed and ready for search!")

üì§ Choose how to get your video:

1. Upload from computer (recommended for small files < 100MB)
2. Download from URL (direct video file)
3. Download from YouTube URL

Enter choice (1/2/3): 3

Enter YouTube URL (video or shorts): https://www.youtube.com/shorts/QhlroYnundk
‚¨áÔ∏è Downloading from YouTube...
   Installing/updating yt-dlp...
   Fetching video (this may take a minute)...
‚úÖ Downloaded successfully: youtube_video.mp4

üìπ Video ready: youtube_video.mp4 (0.7 MB)
   Duration: 7.6 seconds
   FPS: 30.0
   Total frames: 228


# STEP 9: Search Videos Using Natural Language Queries
# Use natural language to find moments in your video!

In [None]:
# ============================================================================
# STEP 9a: Initialize Search Engine & Run First Query
# ============================================================================

from video_search_engine import VideoSearchEngine

# Check if engine is already initialized from previous steps
if 'engine' not in locals() or engine is None:
    print("[INFO] Initializing VideoSearchEngine for search...")
    engine = VideoSearchEngine()
else:
    print("[INFO] Using existing VideoSearchEngine instance.")

print("üîç VIDEO SEMANTIC SEARCH")
print("=" * 60)
print("Enter natural language queries to find moments in your video")
print("Examples: 'person walking', 'red object', 'someone talking'\n")

# First search query
query = input("üîç Enter your search query: ").strip()

if not hasattr(engine, 'search') or not callable(engine.search):
    print("‚ùå The engine object does not have a working 'search' method. Please check initialization.")
elif not query:
    print("No query provided.")
else:
    print(f"\nSearching for: '{query}'...")
    print("=" * 60)

    try:
        results = engine.search(
            query=query,
            top_k=5,
            similarity_threshold=0.5
        )
        if results:
            print(f"\n‚úÖ Found {len(results)} results:\n")
            for i, result in enumerate(results, 1):
                timestamp = result.get('timestamp', 0)
                time_formatted = result.get('time_formatted', '00:00')
                similarity = result.get('similarity_score', 0)
                caption = result.get('caption', 'N/A')

                print(f"{i}. ‚è±Ô∏è  {time_formatted} (similarity: {similarity:.3f})")
                print(f"   Caption: {caption}")
                if result.get('thumbnail_path'):
                    print(f"   Thumbnail: {result.get('thumbnail_path')}")
                print()
        else:
            print("‚ùå No results found. Try a different query or ensure videos are indexed.")
    except Exception as e:
        print(f"‚ùå Search failed: {e}")

Enter a name for this video (or press Enter for auto-name): testfr


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.



üé¨ Processing video: testfr
üìÖ Video date: 2025-11-10
‚è≥ This will take a few minutes... Please wait.



preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Extracting frames: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 228/228 [00:01<00:00, 184.89it/s]



üéØ OBJECT DETECTION + CAPTIONING MODE


preprocessor_config.json:   0%|          | 0.00/457 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/82.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/689M [00:00<?, ?B/s]




üì∏ Processing Frame: e6d072b6ba7b (t=0.03s)
   Found 1 objects


Processing frames:  17%|‚ñà‚ñã        | 1/6 [00:03<00:19,  3.94s/it]

   ‚îú‚îÄ Object 1: Backpack
   ‚îÇ  Caption: Backpack: A The back of a backpack on a green field.
   ‚îÇ  Namespace: backpack
   ‚îÇ  Confidence: 73.55%
   ‚îî‚îÄ ‚úì Generated 1 valid caption(s)

üì∏ Processing Frame: 6d8f1a3acd55 (t=1.50s)
   Found 2 objects
   ‚îú‚îÄ Object 1: Backpack
   ‚îÇ  Caption: Backpack: An A man with a backpack walking down the street.
   ‚îÇ  Namespace: backpack
   ‚îÇ  Confidence: 66.31%


Processing frames:  33%|‚ñà‚ñà‚ñà‚ñé      | 2/6 [00:04<00:08,  2.22s/it]

   ‚îú‚îÄ Object 2: Duffel
   ‚îÇ  Caption: Duffel: An a red backpack sitting on top of a green field.
   ‚îÇ  Namespace: duffel_bag
   ‚îÇ  Confidence: 26.02%
   ‚îî‚îÄ ‚úì Generated 2 valid caption(s)

üì∏ Processing Frame: 601addaeab1f (t=3.80s)
   Found 1 objects


Processing frames:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 3/6 [00:05<00:04,  1.57s/it]

   ‚îú‚îÄ Object 1: Backpack
   ‚îÇ  Caption: Backpack: An A person with a backpack on their back.
   ‚îÇ  Namespace: backpack
   ‚îÇ  Confidence: 69.57%
   ‚îî‚îÄ ‚úì Generated 1 valid caption(s)

üì∏ Processing Frame: 37cc49bdf07d (t=4.80s)
   Found 2 objects
   ‚îú‚îÄ Object 1: Backpack (skipped - no caption generated)


Processing frames:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 4/6 [00:06<00:02,  1.40s/it]

   ‚îú‚îÄ Object 2: Coat Jacket (skipped - no caption generated)

üì∏ Processing Frame: c91ef6f95a7b (t=6.43s)
   Found 2 objects
   ‚îú‚îÄ Object 1: Backpack (skipped - duplicate)


Processing frames:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 5/6 [00:08<00:01,  1.34s/it]

   ‚îú‚îÄ Object 2: File
   ‚îÇ  Caption: File: An a white refrigerator sitting on top of a wooden floor.
   ‚îÇ  Namespace: folder
   ‚îÇ  Confidence: 29.89%
   ‚îî‚îÄ ‚úì Generated 1 valid caption(s)

üì∏ Processing Frame: 810b80eb847b (t=7.23s)
   Found 4 objects
   ‚îú‚îÄ Object 1: Backpack (skipped - no caption generated)
   ‚îú‚îÄ Object 2: Suitcase Luggage (skipped - no caption generated)
   ‚îú‚îÄ Object 3: File (skipped - duplicate)


Processing frames: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6/6 [00:09<00:00,  1.60s/it]

   ‚îú‚îÄ Object 4: Coat Jacket
   ‚îÇ  Caption: Coat Jacket: An A man in a red jacket is walking down the street.
   ‚îÇ  Namespace: coat_jacket
   ‚îÇ  Confidence: 28.04%
   ‚îî‚îÄ ‚úì Generated 1 valid caption(s)





Batches:   0%|          | 0/1 [00:00<?, ?it/s]


‚òÅÔ∏è  UPLOADING TO PINECONE VECTOR DATABASE

üìÅ Namespace: videos:2025-11-10:backpack
   Uploading 2 vectors...


Uploading to Pinecone: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  1.05it/s]


   ‚úì Uploaded 2 vectors
   Sample caption: Backpack: A The back of a backpack on a green field....

üìÅ Namespace: videos:2025-11-10:duffel_bag
   Uploading 2 vectors...


Uploading to Pinecone: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  3.03it/s]


   ‚úì Uploaded 2 vectors
   Sample caption: Backpack: An A man with a backpack walking down the street....

üìÅ Namespace: videos:2025-11-10:folder
   Uploading 1 vectors...


Uploading to Pinecone: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  4.35it/s]


   ‚úì Uploaded 1 vectors
   Sample caption: File: An a white refrigerator sitting on top of a wooden floor....

üìÅ Namespace: videos:2025-11-10:coat_jacket
   Uploading 1 vectors...


Uploading to Pinecone: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  4.48it/s]


   ‚úì Uploaded 1 vectors
   Sample caption: Coat Jacket: An A man in a red jacket is walking down the street....

‚úÖ UPLOAD COMPLETE: 6 vectors uploaded for 'testfr'
   Sample vector IDs: e6d072b6ba7b_obj0_emb, 6d8f1a3acd55_obj1_emb, 6d8f1a3acd55_obj2_emb...



‚úÖ VIDEO PROCESSING COMPLETE!

üìä Processing Statistics:
   Video name: testfr
   Video date: 2025-11-10
   Frames extracted: 6
   Frames with captions: 6
   Captions before dedupe: 6
   Unique embeddings: 6
   ‚úÖ Actually uploaded: 6
   Processing time: 1.5 minutes

   Frame reduction: 0.0%


## Step 9: Search Your Video!

Now you can search for content using natural language queries.

**Example queries:**
- "person walking"
- "black bag"
- "someone talking on phone"
- "car driving"
- "red shirt"

The system will return timestamps where that content appears!

In [23]:
# Single search query with optional namespace filtering
query = input("üîç Enter your search query: ")

# Optional: Search in specific object namespace
if use_object_detection:
    print("\nüìÅ Available namespaces:")
    print("   - backpack, bag, duffel_bag")
    print("   - laptop, tablet")
    print("   - helmet, bottle, folder, umbrella")
    print("   - coat_jacket, suitcase_luggage")
    print("   - (leave empty to search all)")

    namespace_filter = input("\nFilter by namespace (optional): ").strip().lower()
else:
    namespace_filter = ""

print(f"\nSearching for: '{query}'...")
if namespace_filter:
    print(f"Namespace filter: {namespace_filter}")
print("=" * 60)

# Perform search with namespace if specified
if namespace_filter and use_object_detection:
    # Search in specific namespace using Pinecone directly
    query_embedding = engine.embedding_generator.encode_query(query)
    search_results = engine.pinecone_manager.query(
        query_vector=query_embedding,
        top_k=5,
        namespace=namespace_filter,
        include_metadata=True
    )

    # Format results
    results = []
    for result in search_results:
        formatted_result = {
            "timestamp": result.timestamp,
            "caption": result.caption,
            "similarity_score": result.score,
            "frame_id": result.frame_id,
            "video_name": result.video_name,
            "time_formatted": engine._format_timestamp(result.timestamp)
        }
        results.append(formatted_result)
else:
    # Standard search across all namespaces
    results = engine.search(
        query=query,
        top_k=5,
        similarity_threshold=0.5
    )

if results:
    print(f"\n‚úÖ Found {len(results)} results:\n")

    for i, result in enumerate(results, 1):
        print(f"{i}. ‚è±Ô∏è Timestamp: {result['time_formatted']}")
        print(f"   üìù Caption: {result['caption']}")
        print(f"   üìä Confidence: {result['similarity_score']:.1%}")
        print(f"   üé• Video: {result['video_name']}")
        print()
else:
    print("\n‚ùå No results found. Try:")
    print("   - Different search terms")
    print("   - More general queries")
    print("   - Lowering the similarity threshold")
    if namespace_filter:
        print(f"   - Searching without namespace filter (currently: {namespace_filter})")

üîç Enter your search query: bag

üìÅ Available namespaces:
   - backpack, bag, duffel_bag
   - laptop, tablet
   - helmet, bottle, folder, umbrella
   - coat_jacket, suitcase_luggage
   - (leave empty to search all)

Filter by namespace (optional):  backpack

Searching for: 'bag'...
Namespace filter: backpack

‚úÖ Found 2 results:

1. ‚è±Ô∏è Timestamp: 00:03.80
   üìù Caption: Backpack: An A person with a backpack on their back.
   üìä Confidence: 81.1%
   üé• Video: test

2. ‚è±Ô∏è Timestamp: 00:00.03
   üìù Caption: Backpack: A The back of a backpack on a green field.
   üìä Confidence: 80.0%
   üé• Video: test



## ‚úÖ Workflow Complete

You have successfully set up and used the Video Search Pipeline!

### What Happened

1. **Cloned repo & installed dependencies** ‚Äî Set up the BLIP and Pinecone stack
2. **Configured Pinecone** ‚Äî Set API credentials and connected to vector DB
3. **Optional: Launched React API server** ‚Äî For frontend integration (kept running in background)
4. **Configured enhanced settings** ‚Äî Selected captioning model, enabled dual embeddings and temporal bootstrapping
5. **Uploaded video** ‚Äî From computer, URL, or YouTube
6. **Processed video end-to-end** ‚Äî Extracted frames ‚Üí captions ‚Üí embeddings ‚Üí uploaded to Pinecone
7. **Searched results** ‚Äî Ran natural language queries using fusion search (text + image indices)

### Key Features Used

‚úÖ **Dual Embeddings**: Both caption (text) and image (CLIP) embeddings stored and queried  
‚úÖ **Fusion Search**: Combines text and image index results with configurable weights  
‚úÖ **Temporal Bootstrapping**: Confidence scoring with temporal smoothing  
‚úÖ **CLIP Dedupe**: Optional semantic frame deduplication (reduces redundancy)  
‚úÖ **Thumbnails**: Saved and indexed in Pinecone metadata for UI display  

### For Next Time

- To reprocess more videos, start from "Step 5: Upload a Video File"
- To run more searches, just re-run the search cell and enter new queries
- To modify settings (model, weights, thresholds), restart and update the enhanced configuration cell

### Backend Architecture

```
Video ‚Üí Extract Frames ‚Üí Caption (BLIP) ‚Üí Embeddings (dual: caption + image)
      ‚Üí Temporal Bootstrap (confidence) ‚Üí Prepare Payload 
      ‚Üí Upload to Pinecone (combined + text + image indices)
      ‚Üí Query-time Fusion ‚Üí Score Normalization & Ranking ‚Üí Results
```

Enjoy exploring your videos! üöÄ
