# Depth estimation with Depth-Anything-V2-Large

Depth Anything V2 is trained from 595K synthetic labeled images and 62M+ real unlabeled images, providing the most capable monocular depth estimation (MDE) model with the following features:
- more fine-grained details than Depth Anything V1
- more robust than Depth Anything V1 and SD-based models (e.g., Marigold, Geowizard)
- more efficient (10x faster) and more lightweight than SD-based models
- impressive fine-tuned performance with our pre-trained models

## Installation

In this folder run the following commands.

```bash
git clone https://huggingface.co/spaces/depth-anything/Depth-Anything-V2
cp -r Depth-Anything-V2/depth_anything_v2 .
pip install -r Depth-Anything-V2/requirements.txt
```

## Usage

Download the [model](https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true) first and put it under the `checkpoints` directory.

```python
import cv2
import torch

from depth_anything_v2.dpt import DepthAnythingV2

model = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024])
model.load_state_dict(torch.load('checkpoints/depth_anything_v2_vitl.pth', map_location='cpu'))
model.eval()

raw_img = cv2.imread('your/image/path')
depth = model.infer_image(raw_img) # HxW raw depth map
```



**For more please refer to [the official instructions](https://huggingface.co/depth-anything/Depth-Anything-V2-Large).**

### Imports

In [None]:
import cv2
import os
import os.path as osp
import torch
from tqdm import tqdm
import pickle

from depth_anything_v2.dpt import DepthAnythingV2

%load_ext autoreload
%autoreload 2

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Large
model = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024])
model.load_state_dict(torch.load('checkpoints/depth_anything_v2_vitl.pth', map_location='cpu'))
model.eval().to(device)

print("Model loaded!")

In [None]:
ANNOTATIONS_PATH = "../../resources/annotations_public.pkl"
VIDEOS_ROOT = "/home/marek/datasets/coool-benchmark"          # <---- UPDATE THIS ONE
RESULTS_FOLDER = "../../resources/depth-estimation"  # Folder to save depth captures

if not osp.exists(RESULTS_FOLDER):
    os.makedirs(RESULTS_FOLDER)

In [None]:
def load_pickle(file_path: str) -> dict:
    with open(file_path, 'rb') as f:
        return pickle.load(f)


In [None]:
annotations = load_pickle(ANNOTATIONS_PATH)

# Check if video folder exists
if not osp.exists(VIDEOS_ROOT):
    raise FileNotFoundError(f"Videos folder does not exist: {VIDEOS_ROOT}")

video_names = sorted(list(annotations.keys()))
if not video_names:
    raise ValueError("No videos found in the annotations.")

# Process each video
for video_name in tqdm(video_names, total=len(video_names)):
    video_path = osp.join(VIDEOS_ROOT, f"{video_name}.mp4")

    if not osp.exists(video_path):
        print(f"Warning: Video file not found: {video_path}. Skipping.")
        continue

    video_data = []
    video_stream = cv2.VideoCapture(video_path)
    fps = video_stream.get(cv2.CAP_PROP_FPS)

    if not video_stream.isOpened():
        print(f"Error: Video {video_name} could not be opened. Skipping.")
        continue

    frame = 0
    while video_stream.isOpened():
        ret, frame_image = video_stream.read()
        if not ret:
            break

        # Process frame at 1-second intervals (assuming fps is not zero)
        if fps > 0 and frame % int(fps) == 0:
            try:
                depth = model.infer_image(frame_image)  # Assuming model is defined elsewhere
                img_path = osp.join(RESULTS_FOLDER, f"{video_name}_{frame}.jpeg")
                cv2.imwrite(img_path, depth)
            except Exception as e:
                print(f"Error processing frame {frame} of video {video_name}: {e}")

        frame += 1

    video_stream.release()

print("Processing complete.")