In [2]:
from transformers import pipeline
from PIL import Image
import requests
import numpy as np
import cv2

# load pipe
pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# load image

for i in range(1):
    data = np.load('data/MOVIE/videos/'+str(i)+'.npy')

    frameSize = (128,128)
    name = 'depth/image_'+str(i)+'_depth.mp4'
    vid = cv2.VideoWriter(name,cv2.VideoWriter_fourcc(*'mp4v'), 12, frameSize)
    
    for frame in data:
        image = Image.fromarray(frame)

        # inference
        depth = np.array(pipe(image)["depth"])

        depth = depth/depth.max()

        save = np.stack( ( depth.copy()*255, depth.copy()*255, depth.copy()*255 ), 2).astype(np.uint8)    
        vid.write(save)
    vid.release()

# Sobel Filter

In [56]:
from skimage import filters, color, morphology
import skimage
import os
import cv2
import matplotlib.pyplot as plt
from skimage.filters import threshold_multiotsu

try:
    os.mkdir("depth/images/"+str(i)+"/")
except:
    pass

for i in range(1):
    data = np.load('data/MOVIE/videos/'+str(i)+'.npy')
    try:
        os.mkdir("depth/images/"+str(i)+"/")
    except:
        pass

    for j,frame in enumerate(data):
        image = Image.fromarray(frame)

        # inference
        depth = np.array(pipe(image)["depth"])
        depth = depth/depth.max()

        # SOBEL
        gX = cv2.Sobel(src=depth, ddepth=cv2.CV_64F, dx=1, dy=0, ksize=3)
        gY = cv2.Sobel(src=depth, ddepth=cv2.CV_64F, dx=0, dy=1, ksize=3)
        sobel = np.absolute(cv2.addWeighted(gX, 0.5, gY, 0.5, 0))
        sobel /= sobel.max()
        sobel_raw = sobel.copy()
        threshhold = (sobel.mean() + np.median(sobel))/2
        sobel = np.digitize(sobel, bins=[np.median(threshhold)]).astype(np.uint8)
        regions = skimage.morphology.label(sobel,1)+1

        
        save = np.stack( ( sobel_raw.copy()*234, sobel_raw.copy()*345, sobel_raw.copy()*567 ), 2).astype(np.uint8)  
        cv2.imwrite("depth/images/"+str(i)+"/"+str(j)+"_sobel_raw.jpg", save)
        save = np.stack( ( regions.copy()*234, regions.copy()*345, regions.copy()*567 ), 2).astype(np.uint8)  
        cv2.imwrite("depth/images/"+str(i)+"/"+str(j)+"_sobel_segmentation.jpg", save)
        save = np.stack( ( depth.copy()*255, depth.copy()*255, depth.copy()*255 ), 2).astype(np.uint8)  
        cv2.imwrite("depth/images/"+str(i)+"/"+str(j)+"_depth.jpg", save)

        # multiotsu
        thresholds = threshold_multiotsu(depth)
        regions = np.digitize(image, bins=thresholds)
        save = np.stack( ( regions.copy()*234, regions.copy()*345, regions.copy()*567 ), 2).astype(np.uint8)  
        cv2.imwrite("depth/images/"+str(i)+"/"+str(j)+"_multi_otsu.jpg", save)
        print("end")
        break


[[[2 2 2]
  [2 2 2]
  [2 2 2]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]

 [[2 2 2]
  [2 2 2]
  [2 2 2]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]

 [[2 2 2]
  [2 2 2]
  [2 2 2]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]

 ...

 [[2 2 2]
  [2 2 2]
  [2 2 2]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]

 [[2 2 2]
  [2 2 2]
  [2 2 2]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]

 [[2 2 2]
  [2 2 2]
  [2 2 2]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]]
end


In [53]:
import cv2
import torch
import urllib.request
import matplotlib.pyplot as plt

model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

Using cache found in /user/georg.eckardt/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /user/georg.eckardt/.cache/torch/hub/intel-isl_MiDaS_master


In [54]:
for i in range(10):
    data = np.load('data/MOVIE/videos/'+str(i)+'.npy')

    frameSize = (128,128)
    name = 'depth/image_'+str(i)+'_depth_2.mp4'
    vid = cv2.VideoWriter(name,cv2.VideoWriter_fourcc(*'mp4v'), 12, frameSize)
    
    for frame in data:
        img = np.array(Image.fromarray(frame))

        if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
            transform = midas_transforms.dpt_transform
        else:
            transform = midas_transforms.small_transform

        input_batch = transform(img).to(device)
        
        with torch.no_grad():
            prediction = midas(input_batch)

            prediction = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze()

        depth = prediction.cpu().numpy()
        depth = depth/depth.max()

        save = np.stack( ( depth.copy()*255, depth.copy()*255, depth.copy()*255 ), 2).astype(np.uint8)

        vid.write(save)
    vid.release()