# Download Dependancies
midas requires timm

In [None]:
pip install timm

Collecting timm
  Downloading timm-0.5.4-py3-none-any.whl (431 kB)
[?25l[K     |▊                               | 10 kB 34.8 MB/s eta 0:00:01[K     |█▌                              | 20 kB 41.2 MB/s eta 0:00:01[K     |██▎                             | 30 kB 38.0 MB/s eta 0:00:01[K     |███                             | 40 kB 28.0 MB/s eta 0:00:01[K     |███▉                            | 51 kB 21.6 MB/s eta 0:00:01[K     |████▋                           | 61 kB 24.4 MB/s eta 0:00:01[K     |█████▎                          | 71 kB 25.4 MB/s eta 0:00:01[K     |██████                          | 81 kB 26.3 MB/s eta 0:00:01[K     |██████▉                         | 92 kB 28.1 MB/s eta 0:00:01[K     |███████▋                        | 102 kB 30.1 MB/s eta 0:00:01[K     |████████▍                       | 112 kB 30.1 MB/s eta 0:00:01[K     |█████████▏                      | 122 kB 30.1 MB/s eta 0:00:01[K     |█████████▉                      | 133 kB 30.1 MB/s eta 0:00:01

#Connect to Google Drive

In [None]:
import os
from google.colab import drive
drive.mount('/content/gdrive')

os.chdir('/content/gdrive/My Drive/detectron')
!ls

Mounted at /content/gdrive
coco_test  coco_train  coco_validate  output


# Import

In [None]:
import cv2
import torch
import numpy as np

import matplotlib.pyplot as plt

# Load model
Load a model (see [https://github.com/intel-isl/MiDaS/#Accuracy](https://github.com/intel-isl/MiDaS/#Accuracy) for an overview)

In [None]:
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

Downloading: "https://github.com/intel-isl/MiDaS/archive/master.zip" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt" to /root/.cache/torch/hub/checkpoints/dpt_large-midas-2f21e586.pt


  0%|          | 0.00/1.28G [00:00<?, ?B/s]

Move model to GPU if available

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

DPTDepthModel(
  (pretrained): Module(
    (model): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (drop_path): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=1024, out_features=4096, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (fc2): Linear(in_features=4

Load transforms to resize and normalize the image for large or small model

In [None]:
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


# Utility Function

In [None]:
def NormalizeDepth(depth):
  max = -1000
  min = 1000

  for row in depth:
    for col in row:
      if col > max:
        max = col

      if col < min:
        min = col

  range = max - min

  for idx, x in np.ndenumerate(depth):
    depth[idx] = (depth[idx] - min) / range * 255

#Depth Estimation on all images

In [None]:
def EstimateDepth(path, out_path):
  directory_contents = os.listdir(path)
  existing_contents = os.listdir(out_path)

  count = 0
  for item in directory_contents:
    filename = path + "/" + item
    if not os.path.isdir(filename):

      if item in existing_contents:
        print(item + " - depth already estimated, moving on...")
        continue

      img = cv2.imread(filename)
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

      input_batch = transform(img).to(device)

      with torch.no_grad():
        prediction = midas(input_batch)
    
      prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=img.shape[:2],
        mode="bicubic",
        align_corners=False,
        ).squeeze()

      output = prediction.cpu().numpy()

      NormalizeDepth(output)

      dimg = cv2.cvtColor(output, cv2.COLOR_GRAY2RGB)
      cv2.imwrite(out_path + "/" + item, dimg)

      count += 1
      #if count == 1:
      #  break

Predict and resize to original resolution

In [None]:
EstimateDepth("coco_train/data", "coco_train/depth")

Test_5 112.jpg - depth already estimated, moving on...
Test_26 245.jpg - depth already estimated, moving on...
Test_26 100.jpg - depth already estimated, moving on...
Test_17 020.jpg - depth already estimated, moving on...
Test_14 096.jpg - depth already estimated, moving on...
Test_16 231.jpg - depth already estimated, moving on...
Test_21 100.jpg - depth already estimated, moving on...
Test_16 015.jpg - depth already estimated, moving on...
Test_24 011.jpg - depth already estimated, moving on...
Test_20 039.jpg - depth already estimated, moving on...
acrylic_batch_1_112.jpg - depth already estimated, moving on...
Test_21 177.jpg - depth already estimated, moving on...
Test_16 274.jpg - depth already estimated, moving on...
Test_25 044.jpg - depth already estimated, moving on...
Test_2 204.jpg - depth already estimated, moving on...
Test_24 044.jpg - depth already estimated, moving on...
Test_3 233.jpg - depth already estimated, moving on...
Test_3 256.jpg - depth already estimated, m

### References
[Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer](https://arxiv.org/abs/1907.01341)

[Vision Transformers for Dense Prediction](https://arxiv.org/abs/2103.13413)