In [5]:
import cv2
import numpy as np

# List of datasets
datasets = ["forebrain", "pancreas", "gastrulation_erythroid", "dentategyrus_lamanno_P5"]

# Define the border size (adjusted for image resolution, 1 mm equivalent in pixels)
border_size = 10  # Adjust based on image resolution

for dataset in datasets:
    file_path = f"/mnt/data2/home/leonardo/git/imVelo/benchmark/scib_metrics/bio_conservation_plots_unfiltered/{dataset}_bio_conservation.png"
    new_path = f"/mnt/data2/home/leonardo/git/imVelo/benchmark/scib_metrics/bio_conservation_plots_unfiltered/cropped_{dataset}_bio_conservation.png"

    image = cv2.imread(file_path)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply adaptive thresholding to better capture borders
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 11, 2)

    # Find contours and get the bounding box of the main content
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Only proceed if contours were found
    if contours:
        # Find the bounding box covering all contours
        x, y, w, h = cv2.boundingRect(np.concatenate(contours))
        cropped_image = image[y:y+h, x:x+w]

        # Add a white border of specified size around the cropped image
        bordered_image = cv2.copyMakeBorder(cropped_image, border_size, border_size, 
                                            border_size, border_size, 
                                            cv2.BORDER_CONSTANT, value=[255, 255, 255])

        # Save the bordered image
        cv2.imwrite(new_path, bordered_image)
    else:
        print(f"No content found in the image for dataset: {dataset}")
