In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import timm
import numpy as np
import cv2
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# 1. Load MiDaS Model for Depth Estimation
def load_midas_model():
    """
    Loads the MiDaS model for depth estimation.

    Returns:
        model (torch.nn.Module): The loaded MiDaS model.
        transform (torchvision.transforms.Compose): The transformation applied to input images.
    """
    model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
    model.eval()
    transform = torch.hub.load("intel-isl/MiDaS", "transforms").small_transform
    return model, transform

In [4]:
from torch.utils.data import Dataset
from torchvision import transforms
import os
from PIL import Image

class FoodSegDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = sorted(os.listdir(image_dir))
        self.masks = sorted(os.listdir(mask_dir))
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.masks[idx])
        
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")  # Convert to grayscale
        
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        
        # Binarize the mask
        mask = (mask > 0.5).float()
        
        return image, mask

In [5]:
class UNet(nn.Module):
    """
    UNet is a convolutional neural network architecture for image segmentation.
    
    Args:
        in_channels (int): Number of input channels. Default is 3.
        out_channels (int): Number of output channels. Default is 1.
    
    Attributes:
        encoder (nn.Module): Encoder part of the UNet model.
        decoder (nn.Module): Decoder part of the UNet model.
    
    Methods:
        forward(x): Forward pass of the UNet model.
    """
    def __init__(self, in_channels=3, out_channels=1):
        super(UNet, self).__init__()
        self.encoder = timm.create_model("resnet34", features_only=True, pretrained=True)
        self.decoder = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, out_channels, kernel_size=1)
        )
    
    def forward(self, x):
        """
        Forward pass of the UNet model.
        
        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width).
        
        Returns:
            torch.Tensor: Output tensor of shape (batch_size, out_channels, height, width).
        """
        enc_outs = self.encoder(x)
        out = self.decoder(enc_outs[-1])
        return torch.sigmoid(out)

In [6]:
def load_unet_model():
    """
    Load the pre-trained UNet model for depth estimation.

    Returns:
        model (UNet): The loaded UNet model.
    """
    model = UNet()
    try:
        model.load_state_dict(torch.load("path_to_pretrained_unet.pth", map_location='cpu'))
    except FileNotFoundError:
        print("Pre-trained U-Net model not found. Using untrained model.")
    model.eval()
    return model

In [7]:
def estimate_depth(model, transform, image):
    """
    Estimates the depth of an image using a given model and transformation.

    Parameters:
    model (torch.nn.Module): The depth estimation model.
    transform (torchvision.transforms.Compose): The transformation to be applied to the image.
    image (PIL.Image.Image): The input image.

    Returns:
    numpy.ndarray: The estimated depth map.
    """
    input_batch = transform(image)  # Add batch dimension
    print("Input Batch Shape",input_batch.shape)
    with torch.no_grad():
        depth = model(input_batch)
        print("Depth Shape",depth.shape)
        # Fix: Ensure depth has the correct shape [batch_size, channels, height, width]
        if len(depth.shape) == 5:
            
            depth = depth.squeeze(1)  # Remove the extra dimension
            
        depth = depth.squeeze().cpu().numpy()
        depth = cv2.resize(depth, (image.shape[1], image.shape[0]))
    return depth

In [8]:
def segment_food(unet_model, image):
    print("Image Shape",image.shape)
    """
    Segment food in an image using a U-Net model.

    Parameters:
    - unet_model (torch.nn.Module): The U-Net model used for segmentation.
    - image (PIL.Image.Image): The input image to be segmented.

    Returns:
    - numpy.ndarray: The binary mask indicating the segmented food regions.
    """
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((256, 256)),
    ])
    input_tensor = transform(image)
    
    print("Input Tensor Shape",input_tensor.shape)
    input_tensor = input_tensor.unsqueeze(0)
    with torch.no_grad():
        mask = unet_model(input_tensor)
        mask = mask.squeeze().cpu().numpy()
        mask = cv2.resize(mask, (image.shape[1], image.shape[0]))
        return mask > 0.5

In [20]:
import torch
import torchvision.transforms as T
import torchvision.transforms.functional as F

import torch
import torch.nn.functional as F
import numpy as np

def gaussian_edge_blur(image, kernel_size=5, sigma=1.0):
    """
    Apply Gaussian blur and edge enhancement on a NumPy array image.

    Args:
        image (np.ndarray): Input image as a NumPy array (H x W or H x W x C).
        kernel_size (int): Size of the Gaussian kernel.
        sigma (float): Standard deviation for Gaussian kernel.

    Returns:
        np.ndarray: Processed image as a NumPy array.
    """
    # Ensure the input image is a 3D tensor (C x H x W)
    if image.ndim == 2:  # Grayscale image
        image = image[np.newaxis, ...]  # Add channel dimension
    elif image.ndim == 3:  # Color image
        image = image.transpose(2, 0, 1)  # Convert H x W x C to C x H x W
    else:
        raise ValueError("Input image must be 2D (H x W) or 3D (H x W x C).")

    # Convert to PyTorch tensor
    image_tensor = torch.from_numpy(image).float().unsqueeze(0)  # Add batch dimension

    # Apply Gaussian Blur
    blurred_image = F.gaussian_blur(image_tensor, kernel_size=(kernel_size, kernel_size), sigma=[sigma, sigma])

    # Define Sobel operators
    sobel_x = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=torch.float32)
    sobel_y = sobel_x.T

    sobel_x = sobel_x.unsqueeze(0).unsqueeze(0)  # Shape (1, 1, 3, 3)
    sobel_y = sobel_y.unsqueeze(0).unsqueeze(0)  # Shape (1, 1, 3, 3)

    # Apply Sobel operators for edge detection
    edges_x = F.conv2d(blurred_image, sobel_x, padding=1)
    edges_y = F.conv2d(blurred_image, sobel_y, padding=1)
    edges = torch.sqrt(edges_x**2 + edges_y**2)

    # Combine blurred image and edges
    result = blurred_image * (1 - edges)

    # Convert back to NumPy array
    result = result.squeeze(0).numpy()  # Remove batch dimension
    if result.shape[0] > 1:  # Color image
        result = result.transpose(1, 2, 0)  # Convert C x H x W to H x W x C

    return result


In [21]:
def calculate_volume(depth_map, mask, pixel_area=0.1):
    volume = np.sum(depth_map[mask]) * pixel_area
    return volume

In [22]:
def main():
    """
    Main function to perform depth estimation and volume calculation for a given image.
    
    Args:
        image_path (str): The path to the input image file.
    """
    # Load Image
    image_path = 'colorful_plate.jpg'
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    blurred_image = gaussian_edge_blur(image, kernel_size=5, sigma=1.0)
    midas_model, midas_transform = load_midas_model()
    unet_model = load_unet_model()
    
    food_mask = segment_food(unet_model, image)
    
    depth_map = estimate_depth(midas_model, midas_transform, blurred_image)
    
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.title("Segmented Food")
    plt.imshow(food_mask, cmap='gray')
    
    plt.subplot(1, 2, 2)
    plt.title("Depth Map")
    plt.imshow(depth_map, cmap='viridis')
    plt.show()
    
    volume = calculate_volume(depth_map, food_mask)
    print(f"Estimated Volume: {volume:.2f} cubic units")

In [23]:
main()

AttributeError: module 'torch.nn.functional' has no attribute 'gaussian_blur'