In [1]:
# Setup
%pip install timm==0.6.7
# !git clone https://github.com/isl-org/ZoeDepth.git
# !cd ZoeDepth && python sanity.py

# Append ZoeDepth to path
import sys
sys.path.append('ZoeDepth')

Note: you may need to restart the kernel to use updated packages.


In [35]:
# Imports
# From URL
from zoedepth.utils.misc import colorize
import matplotlib.pyplot as plt
import cv2
import torch
import pathlib
import os
from tqdm import tqdm
import numpy as np

# Constants 
CWD = pathlib.Path(os.path.abspath(""))
GIT_ROOT = CWD.parent.parent
DATA_DIR = GIT_ROOT / "data" / 'AIED2024'

In [2]:
# References: 
# https://huggingface.co/spaces/shariqfarooq/ZoeDepth/blob/962e87ab4eb0e419610ddf512841dec2558dcf7c/geometry.py#L3
# Load the models
torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)  # Triggers fresh download of MiDaS repo
repo = "isl-org/ZoeDepth"

##### Select model
# Zoe_N
# model_zoe_n = torch.hub.load(repo, "ZoeD_N", pretrained=True)

# # Zoe_K
# model_zoe_k = torch.hub.load(repo, "ZoeD_K", pretrained=True)

# Zoe_NK
model_zoe_nk = torch.hub.load(repo, "ZoeD_NK", pretrained=True)

##### sample prediction
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
zoe = model_zoe_nk.to(DEVICE)


Downloading: "https://github.com/intel-isl/MiDaS/zipball/master" to /home/nicole/.cache/torch/hub/master.zip
Using cache found in /home/nicole/.cache/torch/hub/isl-org_ZoeDepth_main


img_size [384, 512]


Using cache found in /home/nicole/.cache/torch/hub/intel-isl_MiDaS_master
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Params passed to Resize transform:
	width:  512
	height:  384
	resize_target:  True
	keep_aspect_ratio:  True
	ensure_multiple_of:  32
	resize_method:  minimal
Using pretrained resource url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt
Loaded successfully
Overwriting config with config_version kitti
img_size [384, 768]


Using cache found in /home/nicole/.cache/torch/hub/isl-org_ZoeDepth_main
Using cache found in /home/nicole/.cache/torch/hub/intel-isl_MiDaS_master


Params passed to Resize transform:
	width:  768
	height:  384
	resize_target:  True
	keep_aspect_ratio:  True
	ensure_multiple_of:  32
	resize_method:  minimal
Using pretrained resource url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt
Loaded successfully
img_size [384, 512]


Using cache found in /home/nicole/.cache/torch/hub/isl-org_ZoeDepth_main
Using cache found in /home/nicole/.cache/torch/hub/intel-isl_MiDaS_master


Params passed to Resize transform:
	width:  512
	height:  384
	resize_target:  True
	keep_aspect_ratio:  True
	ensure_multiple_of:  32
	resize_method:  minimal
Using pretrained resource url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt
Loaded successfully


In [39]:
# # Local file
# from PIL import Image
# image = Image.open("/path/to/image.jpg").convert("RGB")  # load
# depth_numpy = zoe.infer_pil(image)  # as numpy

# depth_pil = zoe.infer_pil(image, output_type="pil")  # as 16-bit PIL Image

# depth_tensor = zoe.infer_pil(image, output_type="tensor")  # as torch tensor

# Load video
vid_file = DATA_DIR / "block-a-blue-day1-first-group-cam1.mp4"
assert vid_file.exists()
cap = cv2.VideoCapture(str(vid_file))
fps = cap.get(cv2.CAP_PROP_FPS)
LENGTH = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# LENGTH = 10

RECORD = True

# Reset
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

if RECORD:
    # Define the codec and create VideoWriter object
    output_file = DATA_DIR / 'rgbd.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(output_file), fourcc, fps, (1920*2, 1080))
    depth_output_file = DATA_DIR / 'depth.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out_depth = cv2.VideoWriter(str(depth_output_file), fourcc, fps, (1920, 1080), isColor=False)

for i in tqdm(range(LENGTH), total=LENGTH):

    # Load frame
    ret, frame = cap.read()
    if not ret:
        break
    
    # Generate depth
    depth = zoe.infer_pil(frame)
    colored_depth = colorize(depth) # [h,w,4]

    # Stich together
    img = cv2.hconcat([frame, colored_depth[:,:,:3]])

    cv2.imshow('frame', img)
    # cv2.imshow('frame', frame)
    # cv2.imshow('depth', colored_depth)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    if RECORD:
        out.write(img)
        out_depth.write((depth*255).astype(np.uint16))

cv2.destroyAllWindows()
if RECORD:
    out.release()
    out_depth.release()

# # Example URL
# URL = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU"

# depth = zoe.infer_pil(image)

# colored_depth = colorize(depth)
# fig, axs = plt.subplots(1,2, figsize=(15,7))
# for ax, im, title in zip(axs, [image, colored_depth], ['Input', 'Predicted Depth']):
#   ax.imshow(im)
#   ax.axis('off')
#   ax.set_title(title)

# # Save raw
# from zoedepth.utils.misc import save_raw_16bit
# fpath = "/path/to/output.png"
# save_raw_16bit(depth, fpath)

# # Colorize output
# from zoedepth.utils.misc import colorize

# colored = colorize(depth)

# # save colored output
# fpath_colored = "/path/to/output_colored.png"
# Image.fromarray(colored).save(fpath_colored)

100%|██████████| 13464/13464 [1:22:27<00:00,  2.72it/s]
