# Generate Synthetic Depth Maps using Depth-Anything-V2

**Purpose**: Generate synthetic depth maps from RGB images using Depth-Anything-V2 model

**Model**: Depth-Anything-V2-Large (from HuggingFace)

**Expected Time**: 20-30 minutes on GPU for ~400 images

**Output**: Synthetic depth maps for train/val/test splits

**Usage**: Prerequisite for A.4a (Synthetic Depth Only) and A.4b (RGB+Synthetic Depth)

---

## 1. Setup & Environment

In [None]:
import os
import sys
from pathlib import Path
import time

# Detect environment
IS_KAGGLE = os.path.exists('/kaggle/input')
print(f"Running on: {'Kaggle' if IS_KAGGLE else 'Local'}")

# Set base paths
if IS_KAGGLE:
    BASE_PATH = Path('/kaggle/working')
    RGB_DATASET_PATH = Path('/kaggle/input/ffb-localization')  # Adjust to your Kaggle dataset
else:
    BASE_PATH = Path(r'd:\Work\Assisten Dosen\Anylabel\Experiments')
    RGB_DATASET_PATH = BASE_PATH / 'datasets' / 'ffb_localization'

# Output path for synthetic depth
SYNTHETIC_DEPTH_PATH = BASE_PATH / 'datasets' / 'depth_synthetic_da2_raw'

os.chdir(BASE_PATH)
print(f"Working directory: {os.getcwd()}")
print(f"RGB dataset: {RGB_DATASET_PATH}")
print(f"Output path: {SYNTHETIC_DEPTH_PATH}")

In [None]:
# Install dependencies
!pip install -q transformers torch torchvision pillow numpy opencv-python tqdm

In [None]:
# Import libraries
import torch
import numpy as np
import cv2
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
from tqdm.auto import tqdm

# Check GPU
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    device = torch.device('cuda')
else:
    print("⚠️ GPU not available, using CPU (will be slow)")
    device = torch.device('cpu')

## 2. Load Depth-Anything-V2 Model

Loading from HuggingFace: `depth-anything/Depth-Anything-V2-Large`

In [None]:
# Load model and processor
MODEL_NAME = "depth-anything/Depth-Anything-V2-Large"

print(f"Loading model: {MODEL_NAME}")
print("This may take a few minutes on first run...\n")

processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = AutoModelForDepthEstimation.from_pretrained(MODEL_NAME)
model = model.to(device)
model.eval()

print("✅ Model loaded successfully")
print(f"Model device: {next(model.parameters()).device}")

## 3. Dataset Scanning

In [None]:
# Scan RGB images
def scan_rgb_images(base_path):
    """Scan RGB images from train/val/test splits"""
    images = {}
    splits = ['train', 'val', 'test']
    
    for split in splits:
        split_path = base_path / 'images' / split
        if not split_path.exists():
            print(f"⚠️ Path not found: {split_path}")
            images[split] = []
            continue
        
        img_files = sorted(split_path.glob('*.png')) + sorted(split_path.glob('*.jpg'))
        images[split] = img_files
        print(f"{split.upper()}: {len(img_files)} images")
    
    total = sum(len(v) for v in images.values())
    print(f"\nTotal images: {total}")
    return images

print("Scanning RGB images...\n")
rgb_images = scan_rgb_images(RGB_DATASET_PATH)

## 4. Generate Synthetic Depth Maps

Process each RGB image and generate corresponding depth map.

In [None]:
def generate_depth(rgb_image_path, model, processor, device):
    """Generate depth map from RGB image"""
    # Load image
    image = Image.open(rgb_image_path).convert('RGB')
    
    # Prepare inputs
    inputs = processor(images=image, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Inference
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth
    
    # Post-process
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],  # (height, width)
        mode="bicubic",
        align_corners=False,
    )
    
    # Convert to numpy
    depth_np = prediction.squeeze().cpu().numpy()
    
    return depth_np

def save_depth(depth_np, output_path):
    """Save depth map as 16-bit PNG"""
    # Normalize to 0-65535 range
    depth_min = depth_np.min()
    depth_max = depth_np.max()
    depth_normalized = (depth_np - depth_min) / (depth_max - depth_min)
    depth_u16 = (depth_normalized * 65535).astype(np.uint16)
    
    # Save
    output_path.parent.mkdir(parents=True, exist_ok=True)
    cv2.imwrite(str(output_path), depth_u16)
    
print("Helper functions defined ✓")

In [None]:
# Process all images
start_time = time.time()
stats = {'success': 0, 'failed': 0}

for split, img_list in rgb_images.items():
    if not img_list:
        print(f"Skipping {split} (no images)\n")
        continue
    
    print(f"\n{'='*60}")
    print(f"Processing {split.upper()} set ({len(img_list)} images)")
    print(f"{'='*60}\n")
    
    for rgb_path in tqdm(img_list, desc=f"{split}"):
        try:
            # Generate depth
            depth_np = generate_depth(rgb_path, model, processor, device)
            
            # Save depth
            output_path = SYNTHETIC_DEPTH_PATH / split / rgb_path.name
            save_depth(depth_np, output_path)
            
            stats['success'] += 1
            
        except Exception as e:
            print(f"\n❌ Failed: {rgb_path.name} - {str(e)}")
            stats['failed'] += 1

elapsed_time = time.time() - start_time

print(f"\n{'='*60}")
print("GENERATION COMPLETE")
print(f"{'='*60}")
print(f"Success: {stats['success']}")
print(f"Failed: {stats['failed']}")
print(f"Total time: {elapsed_time/60:.2f} minutes")
print(f"Avg time per image: {elapsed_time/stats['success']:.2f} seconds")
print(f"\nOutput saved to: {SYNTHETIC_DEPTH_PATH}")

## 5. Verification & Visualization

In [None]:
# Verify output
print("Verifying generated depth maps...\n")

for split in ['train', 'val', 'test']:
    depth_path = SYNTHETIC_DEPTH_PATH / split
    if depth_path.exists():
        depth_files = list(depth_path.glob('*.png'))
        print(f"{split.upper()}: {len(depth_files)} depth maps generated")
    else:
        print(f"{split.upper()}: No output directory")

In [None]:
# Visualize sample
import matplotlib.pyplot as plt

def visualize_rgb_depth_pair(rgb_path, depth_path):
    """Visualize RGB and corresponding synthetic depth side by side"""
    rgb = cv2.imread(str(rgb_path))
    rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
    
    depth = cv2.imread(str(depth_path), cv2.IMREAD_UNCHANGED)
    
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    axes[0].imshow(rgb)
    axes[0].set_title('RGB Image')
    axes[0].axis('off')
    
    im = axes[1].imshow(depth, cmap='turbo')
    axes[1].set_title('Synthetic Depth')
    axes[1].axis('off')
    plt.colorbar(im, ax=axes[1], label='Depth (relative)')
    
    plt.tight_layout()
    plt.show()

# Show 3 samples from train set
train_rgb = rgb_images['train']
if train_rgb:
    print("Sample visualizations from train set:\n")
    for i in range(min(3, len(train_rgb))):
        rgb_path = train_rgb[i]
        depth_path = SYNTHETIC_DEPTH_PATH / 'train' / rgb_path.name
        if depth_path.exists():
            print(f"Sample {i+1}: {rgb_path.name}")
            visualize_rgb_depth_pair(rgb_path, depth_path)
else:
    print("No train images to visualize")

## 6. Next Steps

After generating synthetic depth maps:

1. Run `prepare_synthetic_depth_data.py` to organize and process depth maps
2. This will create the `depth_synthetic_da2` dataset (3-channel, 0-255 normalized)
3. Then you can run A.4a (Synthetic Depth Only) and A.4b (RGB+Synthetic Depth) experiments

**Output Location**:
- Raw synthetic depth (16-bit): `datasets/depth_synthetic_da2_raw/`
- Processed (3-channel, 8-bit): `datasets/depth_synthetic_da2/` (after running prepare script)

---

## Summary

This notebook:
1. ✅ Loaded Depth-Anything-V2-Large model
2. ✅ Generated synthetic depth maps for all RGB images
3. ✅ Saved as 16-bit PNG files
4. ✅ Visualized samples

**Performance**: Depth-Anything-V2 is a state-of-the-art monocular depth estimation model that can generate high-quality depth maps from single RGB images.

**Next**: Run preparation script to normalize and format for YOLO training.