In [4]:
import cv2
import torch
import numpy as np

# Load MiDaS model
midas = torch.hub.load('intel-isl/MiDaS', 'MiDaS_small')
midas.to('cpu')
midas.eval()

# Load MiDaS transforms
transforms = torch.hub.load('intel-isl/MiDaS', 'transforms')
transform = transforms.small_transform

# Path to the video file
video_path = '/Users/adil/Desktop/Codes/Image Captioning/videos/video.mp4'
output_video_path = 'output_depth_map_video.mp4'

# Create VideoCapture object
cap = cv2.VideoCapture(video_path)

# Get the width, height, and frames per second (fps) of the video
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4 files
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height), isColor=False)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Transforming the image
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    imgbatch = transform(img).to('cpu')

    with torch.no_grad():
        prediction = midas(imgbatch)
        
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode='bicubic',
            align_corners=False
        ).squeeze()
         
        output = prediction.cpu().numpy()
        output_normalized = cv2.normalize(output, None, 0, 255, cv2.NORM_MINMAX)
        output_normalized = np.uint8(output_normalized)
        
        # Write the frame to the output video
        out.write(output_normalized)

        # # Optional: Display the output in a pop-up window
        # cv2.imshow('Depth Prediction', output_normalized)
        # cv2.imshow('CV2Frame', frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release everything if the job is finished
cap.release()
out.release()
cv2.destroyAllWindows()


Using cache found in /Users/adil/.cache/torch/hub/intel-isl_MiDaS_master


Loading weights:  None


Using cache found in /Users/adil/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master
Using cache found in /Users/adil/.cache/torch/hub/intel-isl_MiDaS_master


In [1]:
import cv2
import torch
import numpy as np

# Load MiDaS model
midas = torch.hub.load('intel-isl/MiDaS', 'MiDaS_small')
midas.to('cpu')
midas.eval()

# Load MiDaS transforms
transforms = torch.hub.load('intel-isl/MiDaS', 'transforms')
transform = transforms.small_transform

# Mobile camera stream URL
address = 'http://192.168.1.9:8081/video'

# Create VideoCapture object for the mobile stream
cap = cv2.VideoCapture()
cap.open(address)

# Check if the stream opened successfully
if not cap.isOpened():
    print("Error: Could not open video stream")
    exit()

# Get the width, height, and frames per second (fps) of the video (you can update fps if known)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) or 30  # Default to 30fps if the stream does not provide fps

# Define the codec and create VideoWriter object to save the depth map output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4 files
output_video_path = 'output_depth_map_mobile_stream.mp4'
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height), isColor=False)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame")
        break

    # Transforming the frame for MiDaS
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    imgbatch = transform(img).to('cpu')

    with torch.no_grad():
        # Make depth prediction using MiDaS
        prediction = midas(imgbatch)
        
        # Interpolate to the size of the original image
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode='bicubic',
            align_corners=False
        ).squeeze()
        
        # Convert depth map to a visual format
        output = prediction.cpu().numpy()
        output_normalized = cv2.normalize(output, None, 0, 255, cv2.NORM_MINMAX)
        output_normalized = np.uint8(output_normalized)

        # Write the depth map to the output video
        out.write(output_normalized)

        # Optional: Display the original frame and depth map side by side
        cv2.imshow('Original Frame', frame)
        cv2.imshow('Depth Map', output_normalized)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release the video stream and output file
cap.release()
out.release()
cv2.destroyAllWindows()

Using cache found in /Users/adil/.cache/torch/hub/intel-isl_MiDaS_master
  from .autonotebook import tqdm as notebook_tqdm
Using cache found in /Users/adil/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master


Loading weights:  None


Using cache found in /Users/adil/.cache/torch/hub/intel-isl_MiDaS_master
[mjpeg @ 0x13f562670] overread 7
[mjpeg @ 0x13f562670] overread 4
[mjpeg @ 0x13f562670] overread 8
[mjpeg @ 0x13f562670] overread 8
[mjpeg @ 0x13f562670] overread 8
[mjpeg @ 0x13f562670] overread 8
[mjpeg @ 0x13f562670] overread 8


KeyboardInterrupt: 

: 