In [None]:
import os
import cv2
import numpy as np
import rootutils
from PIL import Image
from dotenv import load_dotenv
from pathlib import Path

# adding root to python path
rootutils.setup_root(
    os.path.abspath(''), indicator=['.git', 'pyproject.toml'], pythonpath=True
)

from src.data.components.utils import list_files, find_file_by_name

load_dotenv()

In [None]:
def paste_image(background, image, mask, position=(0, 0), scale_factor=1.0, use_bbox=True):
    """
    Paste the image onto the background at the specified position with scaling.
    If use_bbox=True, the position is relative to the seat's bounding box, not the full image.
    
    Args:
        background: Background image (RGB)
        image: Image (RGB)
        mask: Mask (3-channel)
        position: Tuple (x, y) for the position to paste
        scale_factor: Float to scale the image and mask (1.0 = original size)
        use_bbox: If True, position is relative to bounding box (offset is applied)
        
    Returns:
        tuple: (numpy.ndarray: Combined image, numpy.ndarray: Updated mask)
    """
    # Make a copy of the background to avoid modifying the original
    result = background.copy()
    
    # Create a new mask with the same size as the background
    result_mask = np.zeros_like(background)
    
    # Calculate bounding box of the seat mask if we're using bbox positioning
    bbox_x, bbox_y, bbox_w, bbox_h = 0, 0, 0, 0
    if use_bbox:
        mask_binary = (mask[:,:,0] > 0.5).astype(np.uint8)
        contours, _ = cv2.findContours(mask_binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # Find the largest contour by area
            largest_contour = max(contours, key=cv2.contourArea)
            bbox_x, bbox_y, bbox_w, bbox_h = cv2.boundingRect(largest_contour)
    
    # Scale the seat image and mask if needed
    if scale_factor != 1.0:
        h, w = image.shape[:2]
        new_h, new_w = int(h * scale_factor), int(w * scale_factor)
        seat_image = cv2.resize(image, (new_w, new_h))
        seat_mask = cv2.resize(mask, (new_w, new_h))
        
        # Scale the bounding box too
        bbox_x = int(bbox_x * scale_factor)
        bbox_y = int(bbox_y * scale_factor)
        bbox_w = int(bbox_w * scale_factor)
        bbox_h = int(bbox_h * scale_factor)
    
    # Extract seat dimensions
    seat_h, seat_w = seat_image.shape[:2]
    bg_h, bg_w = background.shape[:2]
    
    # Calculate coordinates for pasting
    x, y = position
    
    if use_bbox:
        # Adjust position to align the bounding box with the specified position
        # This makes the top-left of the bounding box align with position, not the top-left of the image
        x = x - bbox_x
        y = y - bbox_y
    
    # Make sure we're not trying to paste completely outside the background
    if x + seat_w <= 0 or y + seat_h <= 0 or x >= bg_w or y >= bg_h:
        print(f"Warning: Position {(x, y)} would place the seat completely outside the background")
        return result, result_mask  # Nothing to paste, return the background and empty mask as is
    
    # Calculate the valid region to paste (handling edge cases)
    x_start = max(0, x)
    y_start = max(0, y)
    x_end = min(x + seat_w, bg_w)
    y_end = min(y + seat_h, bg_h)
    
    # Calculate offsets into the seat image
    seat_x_offset = x_start - x
    seat_y_offset = y_start - y
    
    # Prepare the region of interest (ROI) in the background
    bg_roi = result[y_start:y_end, x_start:x_end]
    
    # Prepare the corresponding parts of the seat image and mask
    seat_roi = seat_image[seat_y_offset:seat_y_offset + (y_end - y_start), 
                          seat_x_offset:seat_x_offset + (x_end - x_start)]
    mask_roi = seat_mask[seat_y_offset:seat_y_offset + (y_end - y_start), 
                         seat_x_offset:seat_x_offset + (x_end - x_start)]
    
    # Ensure dimensions match (defensive coding)
    min_h = min(bg_roi.shape[0], seat_roi.shape[0], mask_roi.shape[0])
    min_w = min(bg_roi.shape[1], seat_roi.shape[1], mask_roi.shape[1])
    
    if min_h <= 0 or min_w <= 0:
        # Nothing to blend if dimensions are invalid
        return result, result_mask
        
    bg_roi = bg_roi[:min_h, :min_w]
    seat_roi = seat_roi[:min_h, :min_w]
    mask_roi = mask_roi[:min_h, :min_w]
    
    # Blend the seat with the background using the mask
    blended_roi = seat_roi * mask_roi + bg_roi * (1 - mask_roi)
    
    # Update the background with the blended region
    result[y_start:y_start + min_h, x_start:x_start + min_w] = blended_roi
    
    # Update the result mask in the same region
    result_mask[y_start:y_start + min_h, x_start:x_start + min_w] = mask_roi
    
    return result, result_mask

In [None]:
seat_outline_photos_path = Path(os.environ.get('lear_outline_data_path')) / 'train' / 'images'
seat_outline_mask_path = Path(os.environ.get('lear_outline_data_path')) / 'train' / 'labels'
output_path = Path(os.environ.get('lear_outline_data_path')) / 'synth' / 'train'
image_paths = list_files(seat_outline_photos_path, file_extensions=['.bmp', '.jpg', '.png'])

seat_background = cv2.imread('seat_background.bmp', cv2.IMREAD_COLOR)
seat_background_rgb = cv2.cvtColor(seat_background, cv2.COLOR_BGR2RGB)

x_min, x_max = 200, seat_background_rgb.shape[1] - 1200
y_min, y_max = 100, seat_background_rgb.shape[0] - 2000

scale_min, scale_max = 0.85, 0.95

for i, image_path in enumerate(image_paths):
    mask_path = find_file_by_name(seat_outline_mask_path, image_path.stem)
    image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE) / 255.0
    mask = np.stack([mask] * 3, axis=-1)

    random_x = np.random.randint(x_min, x_max + 1)
    random_y = np.random.randint(y_min, y_max + 1)
    scale = np.random.uniform(scale_min, scale_max)

    combined_image, mask_n = paste_image(seat_background_rgb, 
                                         image, 
                                         mask, 
                                         position=(random_x, random_y),
                                         scale_factor=scale,
                                         use_bbox=True
                                         )
    
    if (i == 0):
        display(Image.fromarray(combined_image))
        
    cv2.imwrite(str(output_path / 'images' / image_path.stem) + '_aug.bmp', cv2.cvtColor(combined_image, cv2.COLOR_RGB2BGR))
    cv2.imwrite(str(output_path / 'masks' / image_path.stem) + '_aug.png', mask_n * 255)