# Step 0: Install dependencies

In [6]:
%pip install torch timm opencv-python matplotlib

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 23.0
[notice] To update, run: python.exe -m pip install --upgrade pip


# Step 1: Import Dependencies

In [7]:
import cv2 as cv
import time
import torch
import timm
import numpy as np

# Step 2: Initialize the model using PyTorch

Select your model type by uncommenting a line from below labeled 'model_type'.

In [8]:
# Select one of the following models:
#model_type = "DPT_Large" # Highest quality model
#model_type = "DPT_Hybrid" # Average quality model
model_type = "MiDaS_small" # Lowest quality model

#Download the model
midas = torch.hub.load('intel-isl/midas', model_type, pretrained=True)

#Load the model onto a device
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

midas.to(device)
midas.eval()

#Download transformations
midas_transform = torch.hub.load('intel-isl/MiDaS', 'transforms')

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transform.dpt_transform
else:
    transform = midas_transform.small_transform

Using cache found in C:\Users\kiric/.cache\torch\hub\intel-isl_midas_master
Using cache found in C:\Users\kiric/.cache\torch\hub\intel-isl_MiDaS_master


# Step 3: Create Depth Map

In [9]:
def create_depth_map(frame):
    # Convert the image to RGB format for displaying using pyplot
    frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)

    # Apply transformations to frame
    input_frame_batch = transform(frame).to(device)

    # Get predictions from model
    with torch.no_grad():
        prediction = midas(input_frame_batch)
        prediction = torch.nn.functional.interpolate(prediction.unsqueeze(1), size=frame.shape[:2], mode='bicubic', align_corners=False).squeeze()

    # Get depth map as numpy array from the gpu to the cpu
    depth_map = prediction.cpu().numpy()

    #Normalize numpy array into 0-1 range
    depth_map = cv.normalize(depth_map, None, 0, 1, norm_type=cv.NORM_MINMAX, dtype=cv.CV_32F)

    depth_map = (depth_map * 255).astype(np.uint8)
    depth_map = cv.applyColorMap(depth_map, cv.COLORMAP_JET)

    return depth_map

# Step 4: Choose Input Solution For MiDaS model

You can choose images, video, or from a camera connected to your computer.

### Webcam Solution

In [10]:
capture = cv.VideoCapture(0)

while capture.isOpened():

    # Capture frame
    status, frame = capture.read()

    #Get start time of frame
    start = time.time()
    
    # Get depth map
    depth_map = create_depth_map(frame)

    #Get end time of frame
    end = time.time()

    # Get FPS to display
    total_time = end - start
    fps = 1 / total_time

    # Display the webcam frame
    cv.putText(frame, "Press 'Spacebar' to quit", (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
    cv.putText(frame, f'FPS: {int(fps)}', (20,70), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
    cv.imshow('Webcam', frame)

    #Display the depth map for the webcam frame
    cv.putText(depth_map, "Press 'Spacebar' to quit", (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
    cv.putText(depth_map, f'FPS: {int(fps)}', (20,70), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
    cv.imshow('Depth Map', depth_map)

    # Close the windows on Spacebar
    if(cv.waitKey(5) & 0xFF == ord(' ')):
        break

# Release device and destroy all windows
capture.release()
cv.destroyAllWindows()
