In [1]:
import torch
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import transforms
from transformers import DPTFeatureExtractor, DPTForDepthEstimation, SegformerFeatureExtractor, SegformerForSemanticSegmentation

In [2]:
# Load Segmentation Model (for Road Detection)
seg_feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
# Move segmentation model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seg_model = seg_model.to(device)

# Load Depth Estimation Model
depth_feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
# Move depth model to GPU
depth_model = depth_model.to(device)

def get_road_mask(image_path, target_size):
    """Performs semantic segmentation to detect roads and returns a resized binary mask."""
    image = Image.open(image_path).convert("RGB")
    inputs = seg_feature_extractor(images=image, return_tensors="pt")
    # Move inputs to GPU
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Get segmentation output
    with torch.no_grad():
        outputs = seg_model(**inputs)
    logits = outputs.logits
    # Move logits back to CPU for NumPy operations
    segmentation = logits.argmax(dim=1).squeeze().cpu().numpy()
    
    # Define road class index (based on Cityscapes dataset)
    ROAD_CLASS_INDEX = 0  # Adjust this if needed
    # Create binary mask (255 for roads, 0 for others)
    road_mask = np.where(segmentation == ROAD_CLASS_INDEX, 255, 0).astype(np.uint8)
    # Resize road mask to match depth image size
    road_mask = cv2.resize(road_mask, target_size, interpolation=cv2.INTER_NEAREST)
    return Image.fromarray(road_mask)

def get_depth_map(image_path):
    """Generates an RGB depth map from an image."""
    image = Image.open(image_path)
    pixel_values = depth_feature_extractor(images=image, return_tensors="pt").pixel_values
    # Move inputs to GPU
    pixel_values = pixel_values.to(device)
    
    # Get depth prediction
    with torch.no_grad():
        outputs = depth_model(pixel_values)
        predicted_depth = outputs.predicted_depth
    
    # Resize to match original image (keep on GPU for interpolation)
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )
    
    # Move back to CPU for NumPy operations
    prediction = prediction.squeeze().cpu().numpy()
    
    # Normalize depth values to [0, 255]
    normalized_depth = (prediction - prediction.min()) / (prediction.max() - prediction.min())
    depth_colored = (plt.cm.viridis(normalized_depth) * 255).astype(np.uint8)
    return Image.fromarray(depth_colored[:, :, :3])  # Keep RGB channels

def overlay_road_on_depth(depth_image, road_mask):
    """Overlays the road mask on the depth map, highlighting roads in red."""
    depth_array = np.array(depth_image)
    road_array = np.array(road_mask)
    # Define the road highlight color (soft yellow-green)
    highlight_color = np.array([180, 230, 100])
    # Apply the highlight color where the road is detected
    mask_indices = road_array > 0
    depth_array[mask_indices] = highlight_color
    return Image.fromarray(depth_array)

def process_image(image_path):
    """Full pipeline: Get depth map, detect road, and overlay."""
    depth_map = get_depth_map(image_path)
    
    # Get the depth map size
    target_size = depth_map.size  # (width, height)
    # Get road mask with the correct size
    road_mask = get_road_mask(image_path, target_size)
    # Overlay road on depth
    final_image = overlay_road_on_depth(depth_map, road_mask)
    return final_image

# Add a helper function to check if GPU is available and show memory usage
def print_gpu_info():
    if torch.cuda.is_available():
        print(f"GPU Available: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory Allocated: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
        print(f"GPU Memory Reserved: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")
    else:
        print("GPU not available. Running on CPU.")

  return func(*args, **kwargs)
Some weights of DPTForDepthEstimation were not initialized from the model checkpoint at Intel/dpt-large and are newly initialized: ['neck.fusion_stage.layers.0.residual_layer1.convolution1.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution1.weight', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
'''
# Example usage:
final_image = process_image(r"D:\minie\code\data\export\1478020197691441017_jpg.rf.8e8772a66971d66aa5d60270715723f9.jpg")
final_image.show()
'''

In [3]:
import os
import pandas as pd
from pathlib import Path
from PIL import Image

In [5]:
# Define paths (modify these)
image_dir = "data/export/"  # Directory containing input images
depth_output_folder = "data/export/depth/"  # Directory to save depth images
output_csv_path = "data/export/objDepth.csv"  # Output CSV file

# Ensure depth output folder exists
os.makedirs(depth_output_folder, exist_ok=True)

# Process images
data = []
i=1
for image_name in os.listdir(image_dir):
    image_path = os.path.join(image_dir, image_name)

    # Skip non-image files
    if not os.path.isfile(image_path) or not image_name.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue

    try:
        # Generate depth image
        depth_image = process_image(image_path)

        # Ensure output is a PIL image
        if not isinstance(depth_image, Image.Image):
            raise TypeError(f"process_image did not return a PIL Image for {image_name}")

        # Save depth image
        #sanitized_filename = sanitize_filename(f"depth_{Path(image_name).stem}.png")
        depth_image_filename = f"depth_{Path(image_path).stem}.png"
        depth_image_path = os.path.join(depth_output_folder, depth_image_filename)
        depth_image.save(depth_image_path)

        # Store info
        data.append({"filename": image_path, "depth_file_path": depth_image_path})

        print(f"Processed {i}: {image_name}")
        i+=1

    except Exception as e:
        print(f"Error processing {image_name}: {e}")


Processed 1: 1478019952686311006_jpg.rf.54e2d12dbabc46be3c78995b6eaf3fee.jpg
Processed 2: 1478019952686311006_jpg.rf.JLSB3LP2Q4RuGHYKqfF6.jpg
Processed 3: 1478019953180167674_jpg.rf.8a816c9d7e9b423a63ed6ecd4a663e47.jpg
Processed 4: 1478019953180167674_jpg.rf.azslsZnM8FLQPu3QWLTl.jpg
Processed 5: 1478019953689774621_jpg.rf.2e4b7ae29c3379da1282e85cff4c1745.jpg
Processed 6: 1478019953689774621_jpg.rf.UpOkvtBppZZaLfvVZ0JX.jpg
Processed 7: 1478019954186238236_jpg.rf.cbpP963e2sN7rHIZMJtD.jpg
Processed 8: 1478019954186238236_jpg.rf.f8ac6ad31e400d4918ff394f903bb95c.jpg
Processed 9: 1478019954685370994_jpg.rf.049fda8ca86adf1e404b41955eb4aba8.jpg
Processed 10: 1478019954685370994_jpg.rf.Jah8pEPis5n1x6iQCtmT.jpg
Processed 11: 1478019955185244088_jpg.rf.gJpj2eCO1Dd7Sic9WlhE.jpg
Processed 12: 1478019955679801306_jpg.rf.e0c1afab5b4a00d9812e56907cd1b9bf.jpg
Processed 13: 1478019955679801306_jpg.rf.QnW1eAO23mmDm9ap1gm7.jpg
Processed 14: 1478019956186247611_jpg.rf.9ad113e7eb74453890500935ec00590a.jpg
P

In [6]:
# Save to CSV
df = pd.DataFrame(data)
df.to_csv(output_csv_path, index=False)

print(f"Processing complete. Depth images saved in '{depth_output_folder}', CSV saved to '{output_csv_path}'.")

Processing complete. Depth images saved in 'data/export/depth/', CSV saved to 'data/export/objDepth.csv'.


In [None]:
# Paths (Replace these with actual paths)
csv_path = "data/export/annotations.csv"  # CSV containing image metadata
depth_output_folder = "data/export/depth/"  # Where depth images will be saved
output_csv_path = "data/export/objDepth.csv"  # Updated CSV file