# AIC Video Retrieval System - Setup & Installation

This notebook sets up the complete AIC video retrieval system from GitHub and installs all dependencies.
It's designed to run independently on any cloud platform (Colab, Kaggle, etc.).

## Features
- 🚀 Self-contained setup from GitHub
- 📦 Automatic dependency installation
- 🔧 Environment validation
- 💾 GPU detection and setup
- 🗂️ Directory structure creation

In [None]:
# Check if we're running in Colab/Kaggle or other cloud platform
import os
import sys
import subprocess
from pathlib import Path

# Detect environment
IN_COLAB = 'google.colab' in sys.modules
IN_KAGGLE = 'kaggle' in os.environ.get('KAGGLE_URL_BASE', '')
IN_CLOUD = IN_COLAB or IN_KAGGLE or 'COLAB_GPU' in os.environ

print(f"Environment detected:")
print(f"  Google Colab: {IN_COLAB}")
print(f"  Kaggle: {IN_KAGGLE}")
print(f"  Cloud Platform: {IN_CLOUD}")

# Set working directory
WORK_DIR = Path('/content') if IN_CLOUD else Path.cwd()
print(f"  Working directory: {WORK_DIR}")

## Step 1: Clone Repository

In [None]:
# Configuration
REPO_URL = "https://github.com/danielqvu/AIC_FTML_dev.git"  # Update this to your repo URL
REPO_NAME = "AIC_FTML_dev"
REPO_DIR = WORK_DIR / REPO_NAME

# Clone or update repository
if REPO_DIR.exists():
    print(f"Repository already exists at {REPO_DIR}")
    print("Updating repository...")
    os.chdir(REPO_DIR)
    !git pull origin main
else:
    print(f"Cloning repository to {REPO_DIR}")
    os.chdir(WORK_DIR)
    !git clone {REPO_URL}
    
# Change to repo directory
os.chdir(REPO_DIR)
print(f"Current directory: {os.getcwd()}")

# Add to Python path
if str(REPO_DIR) not in sys.path:
    sys.path.insert(0, str(REPO_DIR))
print("✅ Repository setup complete")

## Step 2: System Information & GPU Setup

In [None]:
# Check system information
print("=== System Information ===")
!python --version
!pip --version

# Check GPU availability
print("\n=== GPU Information ===")
try:
    import torch
    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"CUDA version: {torch.version.cuda}")
        print(f"GPU count: {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
            print(f"    Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB")
except ImportError:
    print("PyTorch not yet installed")

# Memory information
print("\n=== Memory Information ===")
!free -h 2>/dev/null || echo "Memory info not available on this system"

# Disk space
print("\n=== Disk Space ===")
!df -h . | head -2

## Step 3: Install Dependencies

In [None]:
# Install system dependencies
print("Installing system dependencies...")
if IN_COLAB:
    !apt-get update -q
    !apt-get install -y ffmpeg libsm6 libxext6 libfontconfig1 libxrender1

print("\n=== Installing Python Dependencies ===")

# Upgrade pip first
!pip install --upgrade pip setuptools wheel

# Install requirements
if Path("requirements.txt").exists():
    print("Installing from requirements.txt...")
    !pip install -r requirements.txt
else:
    print("No requirements.txt found. Installing core dependencies...")
    # Core dependencies for the AIC system
    core_packages = [
        "torch",
        "torchvision", 
        "transformers",
        "sentence-transformers",
        "faiss-cpu",  # or faiss-gpu if available
        "pandas",
        "numpy",
        "pillow",
        "opencv-python",
        "tqdm",
        "scikit-learn",
        "matplotlib",
        "seaborn",
        "ipywidgets",
        "jupyterlab"
    ]
    
    for package in core_packages:
        print(f"Installing {package}...")
        !pip install {package}

print("✅ Dependencies installation complete")

## Step 4: Install GPU-Optimized FAISS (if available)

In [None]:
# Try to install GPU FAISS if CUDA is available
try:
    import torch
    if torch.cuda.is_available():
        print("CUDA detected. Installing GPU-optimized FAISS...")
        !pip uninstall -y faiss-cpu faiss-gpu
        !pip install faiss-gpu
        print("✅ GPU FAISS installed")
    else:
        print("No CUDA available. Using CPU FAISS.")
except Exception as e:
    print(f"Warning: Could not install GPU FAISS: {e}")
    print("Falling back to CPU FAISS")

## Step 5: Validate Installation

In [None]:
# Test core imports
print("=== Testing Core Imports ===")
try:
    import torch
    import torchvision
    import transformers
    import sentence_transformers
    import faiss
    import pandas as pd
    import numpy as np
    from PIL import Image
    import cv2
    print("✅ All core imports successful")
    
    # Print versions
    print(f"\nVersions:")
    print(f"  PyTorch: {torch.__version__}")
    print(f"  Transformers: {transformers.__version__}")
    print(f"  Sentence Transformers: {sentence_transformers.__version__}")
    print(f"  FAISS: {faiss.__version__}")
    print(f"  OpenCV: {cv2.__version__}")
    
except ImportError as e:
    print(f"❌ Import error: {e}")

# Test project imports
print("\n=== Testing Project Imports ===")
try:
    # Try to import project modules
    import config
    print("✅ Config module imported")
    
    # Test if we can import from src
    sys.path.append(str(Path.cwd() / "src"))
    from src.models.clip_encoder import CLIPEncoder
    print("✅ CLIP encoder imported")
    
    from src.indexing.vector_index import VectorIndex
    print("✅ Vector index imported")
    
except ImportError as e:
    print(f"⚠️ Project import warning: {e}")
    print("This may be normal if some modules are missing")

## Step 6: Setup Directory Structure

In [None]:
# Create necessary directories
print("=== Setting up Directory Structure ===")

directories = [
    "data",
    "data/dataset_metadata", 
    "artifacts",
    "output",
    "logs",
    "temp"
]

for directory in directories:
    dir_path = Path(directory)
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"✅ Created/verified: {directory}")

# Show current directory structure
print("\n=== Current Project Structure ===")
!find . -type d -name ".*" -prune -o -type d -print | head -20

## Step 7: Configuration Test

In [None]:
# Test configuration and create simple config if needed
print("=== Configuration Test ===")

try:
    import config
    print(f"✅ Config loaded")
    print(f"  Artifact dir: {config.ARTIFACT_DIR}")
    print(f"  Model name: {getattr(config, 'MODEL_NAME', 'Not set')}")
except:
    print("⚠️ Config module not found or has issues")
    
    # Create a basic config
    config_content = """
from pathlib import Path

# Directories
ARTIFACT_DIR = Path("./artifacts")
DATA_DIR = Path("./data")
OUTPUT_DIR = Path("./output")

# Model settings
MODEL_NAME = "openai/clip-vit-base-patch32"
BATCH_SIZE = 32
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Search settings
DEFAULT_K = 100
RERANK_TOP_K = 1000
"""
    
    with open("config.py", "w") as f:
        f.write(config_content)
    print("✅ Created basic config.py")

print("\n🎉 Setup complete! You can now proceed to the next notebooks.")
print("\nNext steps:")
print("1. Use 02_data_processing.ipynb to download and process data")
print("2. Use 03_search_and_evaluation.ipynb to test search functionality")
print("3. Use 04_training_and_reranking.ipynb to improve search results")

## Quick Health Check

In [None]:
# Final health check
print("=== Final Health Check ===")

# Check Python environment
print(f"Python: {sys.version}")
print(f"Working directory: {os.getcwd()}")

# Check if key files exist
key_files = [
    "config.py",
    "src/models/clip_encoder.py",
    "src/indexing/vector_index.py",
    "smart_pipeline.py",
    "search.py"
]

print("\nKey files check:")
for file_path in key_files:
    exists = Path(file_path).exists()
    status = "✅" if exists else "❌"
    print(f"  {status} {file_path}")

# Check GPU setup
print("\nGPU Status:")
try:
    import torch
    if torch.cuda.is_available():
        device = torch.cuda.get_device_name(0)
        memory = torch.cuda.get_device_properties(0).total_memory / 1e9
        print(f"  ✅ GPU available: {device} ({memory:.1f}GB)")
    else:
        print(f"  ⚠️ No GPU available, using CPU")
except:
    print(f"  ❌ Could not check GPU status")

print("\n" + "="*50)
print("🚀 SETUP COMPLETE! Ready for video retrieval tasks.")
print("="*50)