## Gluon

In [None]:
import numpy as np

import mxnet as mx
from mxnet.gluon.data.vision import transforms
import gluoncv
# using cpu
ctx = mx.cpu(0)

url = 'https://raw.githubusercontent.com/KuangHaofei/GluonCV_Test/master/monodepthv2/tutorials/test_img.png'
filename = 'test_img.png'
# gluoncv.utils.download(url, filename, True)

import PIL.Image as pil
img = pil.open(filename).convert('RGB')

from matplotlib import pyplot as plt
plt.imshow(img)
plt.show()

original_width, original_height = img.size
feed_height = 192
feed_width = 640

img = img.resize((feed_width, feed_height), pil.LA)

model = gluoncv.model_zoo.get_model('monodepth2_resnet18_kitti_stereo_640x192',
                pretrained_base=False, ctx=ctx, pretrained=True)

outputs = model.predict(img)
disp = outputs[("disp", 0)]
disp_resized = mx.nd.contrib.BilinearResize2D(disp, height=original_height, width=original_width)

import matplotlib as mpl
import matplotlib.cm as cm
disp_resized_np = disp_resized.squeeze().as_in_context(mx.cpu()).asnumpy()
vmax = np.percentile(disp_resized_np, 95)
normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
im = pil.fromarray(colormapped_im)
im.save('test_output.png')

import matplotlib.image as mpimg
disp_map = mpimg.imread('test_output.png')
plt.imshow(disp_map)
plt.show()

## Hugging Face

In [2]:
from matplotlib import pyplot as plt
import torch
import numpy as np
import PIL.Image as pil

# image = pil.open('images/inputs/rooms/real/sofa.png')

In [7]:
from transformers import GLPNImageProcessor, GLPNForDepthEstimation

processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")

import os

folder_dir = "C:/Users/Kim/Documents/Bath/Dissertation/Code/images/inputs/rooms/real"

for image in os.listdir(folder_dir):
    img = pil.open('../images/inputs/rooms/real/' + image)
    # prepare image for the model
    inputs = processor(images=img, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    # interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=img.size[::-1],
        mode="bicubic",
        align_corners=False,
    )

    # visualize the prediction
    output = prediction.squeeze().cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")
    depth = pil.fromarray(formatted)
    plt.imsave('../images/outputs/final-outputs/depth/glpn/' + image, depth)
    # plt.imshow(depth)

In [18]:
from transformers import DPTForDepthEstimation, DPTImageProcessor

model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas")
processor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")

import os

folder_dir = "C:/Users/Kim/Documents/Bath/Dissertation/Code/images/inputs/rooms/real"

for image in os.listdir(folder_dir):
    img = pil.open('../images/inputs/rooms/real/' + image)
    # prepare image for the model
    inputs = processor(images=img, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    # interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=img.size[::-1],
        mode="bicubic",
        align_corners=False
    )

    # visualize the prediction
    output = prediction.squeeze().cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")
    depth = pil.fromarray(formatted)
    plt.imsave('../images/outputs/final-outputs/depth/midas/' + image, depth)
# plt.imshow(depth)

KeyboardInterrupt: 

In [None]:
from transformers import DPTImageProcessor, DPTForDepthEstimation

processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

# prepare image for the model
inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)
    predicted_depth = outputs.predicted_depth

# interpolate to original size
prediction = torch.nn.functional.interpolate(
    predicted_depth.unsqueeze(1),
    size=image.size[::-1],
    mode="bicubic",
    align_corners=False,
)

# visualize the prediction
output = prediction.squeeze().cpu().numpy()
formatted = (output * 255 / np.max(output)).astype("uint8")
depth = pil.fromarray(formatted)
plt.imsave('images/outputs/depth-dpt-large.png', depth)
plt.imshow(depth)

In [None]:
# from transformers import AutoImageProcessor, DPTForDepthEstimation

# image_processor = AutoImageProcessor.from_pretrained("facebook/dpt-dinov2-small-nyu")
# model = DPTForDepthEstimation.from_pretrained("facebook/dpt-dinov2-small-nyu")

# # prepare image for the model
# inputs = image_processor(images=image, return_tensors="pt")

# with torch.no_grad():
#     outputs = model(**inputs)
#     predicted_depth = outputs.predicted_depth

# # interpolate to original size
# prediction = torch.nn.functional.interpolate(
#     predicted_depth.unsqueeze(1),
#     size=image.size[::-1],
#     mode="bicubic",
#     align_corners=False,
# )

# # visualize the prediction
# output = prediction.squeeze().cpu().numpy()
# formatted = (output * 255 / np.max(output)).astype("uint8")
# depth = pil.fromarray(formatted)
# plt.imsave('images/outputs/depth-dpt-dinov2-small-nyu.png', depth)
# plt.imshow(depth)