# Depth Predictions

In [1]:
import cv2
import sys
import time
import numpy as np

sys.path.append('../tools')

from monodepth2.infer import load_model
from fps_utils import run_first_phase_model
from tracktor_utils import tracker_obj

import os

os.chdir('../')

In [2]:
video = './samples/mot16.webm' 

## Monodepth FPS

In [3]:
inference = {'name': 'monodepth'}

encoder, depth_decoder, (feed_width, feed_height) = load_model("mono+stereo_1024x320")
inference['encoder'] = encoder
inference['depth_decoder'] = depth_decoder
inference['input_size'] = (feed_width, feed_height)

-> Loading model from  models/mono+stereo_1024x320
   Loading pretrained encoder
   Loading pretrained decoder


In [4]:
cap = cv2.VideoCapture(video)

w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

In [5]:
start = time.time()

while(cap.isOpened()):
    ret, frame = cap.read()
    if not ret:
        break

    run_first_phase_model(frame, inference)
    
duration = time.time() - start
    
cap.release()

In [6]:
monodepth_fps = n_frames / duration

print(f"frame Size: {h}x{w}\nFPS: {monodepth_fps}")

frame Size: 540x960
FPS: 21.85879561908524


## Mannequin FPS

In [7]:
from mannequinchallenge.infer import infer_depth as mannequin_infer

In [8]:
# dummy image for initial loading
run_first_phase_model(np.random.randint(255, size=(900,800,3),dtype=np.uint8));

./monoculardepth/mannequinchallenge/checkpoints/test_local/best_depth_Ours_Bilinear_inc_3_net_G.pth
---------- Networks initialized -------------
DataParallel(
  (module): HourglassModel(
    (seq): Sequential(
      (0): Conv2d(3, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): Channels4(
        (list): ModuleList(
          (0): Sequential(
            (0): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (1): inception[[32], [3, 32, 32], [5, 32, 32], [7, 32, 32]]
            (2): inception[[32], [3, 32, 32], [5, 32, 32], [7, 32, 32]]
            (3): Channels3(
              (list): ModuleList(
                (0): Sequential(
                  (0): AvgPool2d(kernel_size=2, stride=2, padding=0)
                  (1): inception[[32], [3, 32, 32], [5, 32, 32], [7, 32, 32]]
                  (2): inception[[64], [3, 32, 64], [5, 32, 64], [7, 

In [9]:
cap = cv2.VideoCapture(video)

w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

In [10]:
start = time.time()

while(cap.isOpened()):
    ret, frame = cap.read()
    if not ret:
        break

    run_first_phase_model(frame)
    
duration = time.time() - start
    
cap.release()

In [11]:
mannequin_fps = n_frames / duration

print(f"frame Size: {h}x{w}\nFPS: {mannequin_fps}")

frame Size: 540x960
FPS: 5.656180823425704


# Segmentation/Object Tracking Predictions

In [6]:
from fps_utils import run_second_phase_model

## YOLACT FPS

In [13]:
# dummy image for initial loading
run_second_phase_model(np.random.randint(255, size=(900,800,3),dtype=np.uint8))

-> Loading model from  models/yolact_plus_resnet50_54_800000.pth
   Loading pretrained model


In [14]:
cap = cv2.VideoCapture(video)

w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

In [15]:
start = time.time()

while(cap.isOpened()):
    ret, frame = cap.read()
    if not ret:
        break

    run_second_phase_model(frame)
    
duration = time.time() - start
    
cap.release()

In [16]:
yolact_fps = n_frames / duration

print(f"frame Size: {h}x{w}\nFPS: {yolact_fps}")

frame Size: 540x960
FPS: 5.845065278487446


## Tracker FPS

In [11]:
tracker = tracker_obj("./tracking_wo_bnw")
tracker.reset()

In [12]:
cap = cv2.VideoCapture(video)

w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

In [13]:
start = time.time()

while(cap.isOpened()):
    ret, frame = cap.read()
    if not ret:
        break

    run_second_phase_model(frame, tracker)
    
duration = time.time() - start
    
cap.release()

In [14]:
tracker_fps = n_frames / duration

print(f"frame Size: {h}x{w}\nFPS: {tracker_fps}")

frame Size: 540x960
FPS: 2.7322334210962897
