In [2]:
import cv2
import numpy as np

# Load images
top_view_image = cv2.imread(r"F:\Anuj\image\TopView.jpg")
left_view_image = cv2.imread(r"F:\Anuj\image\LeftView.jpg")
right_view_image = cv2.imread(r"F:\Anuj\image\RightView.jpg")

# Known dimensions of the reference coin (in cm)
coin_diameter_cm = 2.5
coin_thickness_cm = 0.2

def detect_coin(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (11, 11), 0)
    circles = cv2.HoughCircles(blurred, cv2.HOUGH_GRADIENT, dp=1.2, minDist=50,
                               param1=50, param2=30, minRadius=10, maxRadius=100)
    if circles is not None:
        circles = np.round(circles[0, :]).astype("int")
        for (x, y, r) in circles:
            # We are making a rough assumption on coin radius range based on the expected coin size
            if 10 < r < 50:  # Adjust these values if needed
                return (x, y, r)
    return None

def calculate_scale(coin_radius_px, coin_diameter_cm):
    return coin_diameter_cm / (2 * coin_radius_px)

def estimate_dimension(scale, point1, point2):
    distance_px = np.linalg.norm(np.array(point1) - np.array(point2))
    return distance_px * scale

# Detect coin in top view to establish scale
coin_top = detect_coin(top_view_image)
if coin_top is None:
    print("Coin not detected in top view image")
else:
    coin_radius_top_px = coin_top[2]
    scale_top = calculate_scale(coin_radius_top_px, coin_diameter_cm)
    print(f"Scale (Top View): {scale_top} cm/px")

# Detect coin in left view to establish scale
coin_left = detect_coin(left_view_image)
if coin_left is None:
    print("Coin not detected in left view image")
else:
    coin_radius_left_px = coin_left[2]
    scale_left = calculate_scale(coin_radius_left_px, coin_diameter_cm)
    print(f"Scale (Left View): {scale_left} cm/px")

# Detect coin in right view to establish scale
coin_right = detect_coin(right_view_image)
if coin_right is None:
    print("Coin not detected in right view image")
else:
    coin_radius_right_px = coin_right[2]
    scale_right = calculate_scale(coin_radius_right_px, coin_diameter_cm)
    print(f"Scale (Right View): {scale_right} cm/px")

# Check if all scales are defined
if 'scale_top' in locals() and 'scale_left' in locals() and 'scale_right' in locals():
    # Assuming object spans from (x1, y1) to (x2, y2) in the top view
    object_top_corners = [(50, 50), (150, 150)]  # Example points; replace with actual object detection
    object_length_px = estimate_dimension(scale_top, object_top_corners[0], object_top_corners[1])
    object_width_px = estimate_dimension(scale_top, object_top_corners[0], object_top_corners[1])
    print(f"Object Length: {object_length_px} cm")
    print(f"Object Width: {object_width_px} cm")

    # Assuming object spans from (x1, y1) to (x2, y2) in the left view
    object_left_corners = [(50, 50), (150, 200)]  # Example points; replace with actual object detection
    object_height_px = estimate_dimension(scale_left, object_left_corners[0], object_left_corners[1])
    print(f"Object Height: {object_height_px} cm")

    # Calculate volume (assuming object is a rectangular prism)
    object_volume = object_length_px * object_width_px * object_height_px
    print(f"Estimated Object Volume: {object_volume} cubic cm")

    # Visualization (Optional)
    cv2.circle(top_view_image, (coin_top[0], coin_top[1]), coin_top[2], (0, 255, 0), 2)
    cv2.circle(left_view_image, (coin_left[0], coin_left[1]), coin_left[2], (0, 255, 0), 2)
    cv2.circle(right_view_image, (coin_right[0], coin_right[1]), coin_right[2], (0, 255, 0), 2)

    cv2.imshow("Top View with Coin", top_view_image)
    cv2.imshow("Left View with Coin", left_view_image)
    cv2.imshow("Right View with Coin", right_view_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print("Could not detect the coin in one or more views, hence volume estimation cannot proceed.")


Scale (Top View): 0.028409090909090908 cm/px
Scale (Left View): 0.025510204081632654 cm/px
Scale (Right View): 0.026041666666666668 cm/px
Object Length: 4.017652165832656 cm
Object Width: 4.017652165832656 cm
Object Height: 4.598917443193864 cm
Estimated Object Volume: 74.23355893585135 cubic cm


In [11]:
import cv2
import math

def estimate_depth(image_path, apparent_size_pixels, baseline_distance_meters, angle_degrees):
    # Load the image
    image = cv2.imread(image_path)
    
    # Resize image for better visualization (optional)
    scale_percent = 50  # adjust as needed
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    resized_image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA)
    
    # Display the resized image
    cv2.imshow('Resized Image', resized_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # Convert angle to radians
    angle_radians = math.radians(angle_degrees)
    
    # Calculate distance using trigonometry
    distance = baseline_distance_meters * math.tan(angle_radians) / (apparent_size_pixels / 2)
    
    return distance

# Example usage:
image_path = r"F:\Anuj\image\LeftView.jpg"  # Replace with your image file path
apparent_size_pixels = 200  # Example: measured apparent size of the object in pixels
baseline_distance_meters = 1.0  # Example: known distance between two points on the object in meters
angle_degrees = 30.0  # Example: angle formed by the lines from the camera to the two points

estimated_distance = estimate_depth(image_path, apparent_size_pixels, baseline_distance_meters, angle_degrees)
print(f"Estimated distance to the object: {estimated_distance} meters")


Estimated distance to the object: 0.005773502691896257 meters


In [17]:
import cv2
import numpy as np

def estimate_depth_and_volume(image_path, object_width_cm, object_height_cm, focal_length_pixels):
    # Load image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not read the image at {image_path}")
        return None, None
    
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to get binary image
    _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Assume the largest contour is the object
    if len(contours) == 0:
        print("Error: No object detected in the image.")
        return None, None
    
    largest_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(largest_contour)
    
    # Calculate apparent size in pixels (assuming the object is rectangular)
    apparent_size_pixels = max(w, h)
    
    # Estimate depth using perspective properties
    object_width_pixels = w
    object_height_pixels = h
    
    distance = (object_width_cm * focal_length_pixels) / object_width_pixels
    
    # Estimate volume assuming object is a rectangular prism
    object_volume = (object_width_cm / 100) * (object_height_cm / 100) * (distance / 100)  # Convert cm to meters
    
    return distance, object_volume

# Example usage:
image_path = r"F:\Anuj\image\LeftView.jpg"  # Replace with your image file path
object_width_cm = 4  # Example: width of the object in centimeters
object_height_cm = 15  # Example: height of the object in centimeters
focal_length_pixels = 480  # Example: focal length of the camera in pixels

estimated_distance, estimated_volume = estimate_depth_and_volume(image_path, object_width_cm, object_height_cm, focal_length_pixels)

if estimated_distance is not None and estimated_volume is not None:
    print(f"Estimated distance to the object: {estimated_distance:.2f} meters")
    print(f"Estimated volume of the object: {estimated_volume:.2f} cubic meters")


Estimated distance to the object: 6.49 meters
Estimated volume of the object: 0.00 cubic meters


In [18]:
!pip install opencv-python scikit-image




In [19]:
import cv2
import numpy as np
from skimage.segmentation import slic
from skimage.measure import regionprops
from skimage.color import label2rgb
from sklearn.cluster import DBSCAN

# Load image
image = cv2.imread(r"F:\Anuj\DiabTrain\biriyani\biriyanitrain (67).jpg")

# Step 1: Selective Search
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(image)
ss.switchToSelectiveSearchFast()
rects = ss.process()

# Draw initial bounding boxes
output = image.copy()
for (x, y, w, h) in rects[:100]:
    cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow("Initial Bounding Boxes", output)
cv2.waitKey(0)

# Step 2: Bounding Box Clustering
boxes = np.array(rects[:100])
db = DBSCAN(eps=30, min_samples=2).fit(boxes)
labels = db.labels_

# Draw clustered bounding boxes
for label in set(labels):
    if label == -1:
        continue
    mask = (labels == label)
    cluster_boxes = boxes[mask]
    x1 = np.min(cluster_boxes[:, 0])
    y1 = np.min(cluster_boxes[:, 1])
    x2 = np.max(cluster_boxes[:, 0] + cluster_boxes[:, 2])
    y2 = np.max(cluster_boxes[:, 1] + cluster_boxes[:, 3])
    cv2.rectangle(output, (x1, y1), (x2, y2), (255, 0, 0), 2)

cv2.imshow("Clustered Bounding Boxes", output)
cv2.waitKey(0)

# Step 3: Saliency Maps
# Using OpenCV's saliency API
saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
(success, saliency_map) = saliency.computeSaliency(image)
saliency_map = (saliency_map * 255).astype("uint8")

cv2.imshow("Saliency Map", saliency_map)
cv2.waitKey(0)

# Step 4: Segmentation by GrabCut
mask = np.zeros(image.shape[:2], np.uint8)
rect = (50, 50, image.shape[1]-100, image.shape[0]-100)
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
cv2.grabCut(image, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
output_image = image * mask2[:, :, np.newaxis]

cv2.imshow("Segmented Image", output_image)
cv2.waitKey(0)

# Step 5: Non-Maximum Suppression (NMS)
def nms(boxes, overlapThresh):
    if len(boxes) == 0:
        return []

    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    pick = []
    x1 = boxes[:,0]
    y1 = boxes[:,1]
    x2 = boxes[:,2] + boxes[:,0]
    y2 = boxes[:,3] + boxes[:,1]

    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        overlap = (w * h) / area[idxs[:last]]

        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))

    return boxes[pick].astype("int")

# Apply NMS on the initial rects
picked_boxes = nms(boxes, 0.3)

# Draw final bounding boxes after NMS
for (x, y, w, h) in picked_boxes:
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow("Final Bounding Boxes after NMS", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [29]:
import torch
import cv2
import numpy as np
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

# Load MiDaS model
model_type = "DPT_Large"  # Can also be "DPT_Hybrid", "MiDaS_small", etc.
midas = torch.hub.load("intel-isl/MiDaS", model_type)

# Load transforms
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
if model_type in ["DPT_Large", "DPT_Hybrid"]:
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

# Load and prepare the image
image_path = r"F:\Anuj\28.jpg"  # Replace with your image path
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
print(f"Loaded image shape: {image.shape}")

# Apply MiDaS transform
input_batch = transform(image).unsqueeze(0)
print(f"Shape after transform and unsqueeze: {input_batch.shape}")

# Remove the extra dimension
input_batch = input_batch.squeeze(1)
print(f"Shape after squeezing: {input_batch.shape}")

# Move the input and model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas.to(device)
input_batch = input_batch.to(device)
print(f"Shape after moving to device: {input_batch.shape}")

# Perform depth estimation
with torch.no_grad():
    prediction = midas(input_batch)

# Resize prediction to original image size
prediction = torch.nn.functional.interpolate(
    prediction.unsqueeze(1),
    size=image.shape[:2],
    mode="bicubic",
    align_corners=False,
).squeeze()
print(f"Shape after interpolation: {prediction.shape}")

depth_map = prediction.cpu().numpy()

# Normalize the depth map for visualization
depth_min = depth_map.min()
depth_max = depth_map.max()
depth_map_normalized = (depth_map - depth_min) / (depth_max - depth_min)

# Save and display the depth map
depth_map_normalized = (depth_map_normalized * 255).astype(np.uint8)
cv2.imwrite("depth_map.png", depth_map_normalized)
cv2.imshow("Depth Map", depth_map_normalized)
cv2.waitKey(0)
cv2.destroyAllWindows()


Using cache found in C:\Users\Anuj/.cache\torch\hub\intel-isl_MiDaS_master
Using cache found in C:\Users\Anuj/.cache\torch\hub\intel-isl_MiDaS_master


Loaded image shape: (335, 500, 3)
Shape after transform and unsqueeze: torch.Size([1, 1, 3, 384, 576])
Shape after squeezing: torch.Size([1, 3, 384, 576])
Shape after moving to device: torch.Size([1, 3, 384, 576])
Shape after interpolation: torch.Size([335, 500])
