In [1]:
import cv2
import torch

# Load MiDaS model for depth estimation
model_type = "DPT_Large"
midas = torch.hub.load("intel-isl/MiDaS", model_type)

# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

# Set up image transformation pipeline
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.dpt_transform

# Load and preprocess the image
img = cv2.imread("/home/nitesh/.local/share/ov/pkg/isaac-sim-4.0.0/maniRL/images/image.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
input_batch = transform(img).to(device)

# Estimate depth
with torch.no_grad():
    prediction = midas(input_batch)
    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=img.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()
    depth = prediction.cpu().numpy()

# Normalize depth map
depth_min = depth.min()
depth_max = depth.max()
depth_normalized = (depth - depth_min) / (depth_max - depth_min)

# Create a mask for nearest objects (adjust threshold as needed)
threshold = 0.5  # Higher values remove more distant objects
mask = depth_normalized > threshold

# revert the mask
mask = ~mask

# Apply mask to remove nearest objects
result = img.copy()
result[mask] = [0, 0, 0]  # Set to white (or any other color)

# Count the number of pixels remaining
num_pixels = mask.sum()

# Save the result
cv2.imwrite("/home/nitesh/.local/share/ov/pkg/isaac-sim-4.0.0/maniRL/images/image_goal_output.jpg", result)

Using cache found in /home/nitesh/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /home/nitesh/.cache/torch/hub/intel-isl_MiDaS_master


True

In [2]:
num_pixels

18321