In [1]:
import PIL
import os
import matplotlib.pyplot as plt
import numpy as np
import cv2
from rembg import remove

PIL.Image.MAX_IMAGE_PIXELS = None #Increase image size limit

In [2]:
def detect_leaves(input_image):
    """
    Detects and returns bounding boxes around specific areas (leaves) in the input image.
    
    Args:
    input_image (numpy.ndarray): Input image to detect leaves from.
    
    Returns:
    numpy.ndarray: Array containing bounding boxes around leaves.
    """
    # Convert input image to numpy array
    #img = np.array(input_image)
    img = input_image
    
    # Remove Background of input_image
    output_image = np.array(remove(img))

    # Threshold area to filter small contours
    THRESHOLD_AREA = 600000

    # Convert BGR image to RGB
    img = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)

    # Define lower and higher bounds for color thresholding
    lower = np.array([5, 10, 5])  # Lower bounds for BGR channels
    higher = np.array([250, 250, 250])

    # Create mask using color thresholding
    mask = cv2.inRange(img, lower, higher)

    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    # Initialize list to store detected bounding boxes
    bounding_boxes = []

    # Iterate through contours to filter and extract bounding boxes around leaves
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > THRESHOLD_AREA:
            x, y, w, h = cv2.boundingRect(contour)
            # Filter bounding boxes based on width and height
            if w > 400 and h > 5000:
                bounding_boxes.append([x, y, x+w, y+h])

    # Convert list of bounding boxes to numpy array and return
    return np.array(bounding_boxes)

In [3]:
def is_image_unusable(image):
    """
    Checks if the image is unusable based on its dimensions.
    
    Args:
    image (numpy.ndarray): Input image to check for usability.
    
    Returns:
    bool: True if the image is unusable, False otherwise.
    """
    # Get the height and width of the image
    height = image.shape[0]
    
    # Define the threshold values for image dimensions as constants
    MIN_HEIGHT = 13000
    MAX_HEIGHT = 22500
    
    # Check if the height is outside the acceptable range
    if height < MIN_HEIGHT or height > MAX_HEIGHT:
        return True
    else:
        return False

In [4]:
def image_processing(entry_path, exit_path):
    """
    Process images in the specified directory.
    
    Args:
    entry_path (str): Path to the directory containing input images.
    exit_path (str): Path to the directory where processed images will be saved.
    """
    # Initialize counters
    num_files = 0
    count_files = 0
    count_unusable_files = 0
    
    # Count the number of JPEG files in the entry directory
    for filename in os.listdir(entry_path):
        if filename.lower().endswith('.jpg'):
            num_files += 1

    # Create the output directory 'new_results'
    new_results_path = os.path.join(exit_path, "new_results")
    exit_path = new_results_path
    os.makedirs(new_results_path, exist_ok=True)  # This line creates the directory if it doesn't exist
    
    # Create subdirectories 'File' and 'Unusable_File' inside 'new_results'
    file_path = os.path.join(new_results_path, "File")
    unusable_file_path = os.path.join(new_results_path, "Unusable_File")
    os.makedirs(file_path, exist_ok=True)
    os.makedirs(unusable_file_path, exist_ok=True)

    # Process each JPEG file in the entry directory
    for i in range(num_files):
        full_path = os.path.join(entry_path, str(i+1) + ".jpg")
            
        # Read the image
        img = cv2.imread(full_path)
        count_files += 1

        # Check if the image is unusable
        if is_image_unusable(img):
            count_unusable_files += 1
            cv2.imwrite(f"{exit_path}/Unusable_File/Unusable_File_{count_files}.jpg", img)
            continue
        else:
            # Detect leaves and save each leaf as a separate image
            bounding_boxes = detect_leaves(img)
            for j, box in enumerate(bounding_boxes):
                x1, y1, x2, y2 = box
                part = img[y1:y2, x1:x2]

                cv2.imwrite(f"{exit_path}/File/Sheet{count_files}_leaf{j + 1}.jpg", part)

    print(f"Usable Files: {count_files - count_unusable_files}, Unusable Files: {count_unusable_files}")

In [5]:
image_processing("/Users/titouanlegourrierec/Desktop/all_scan_data/all_090523", "/Users/titouanlegourrierec/Desktop/new_results")

Corrupt JPEG data: 4110 extraneous bytes before marker 0xd5
[0;93m2024-03-28 11:01:58.104875 [W:onnxruntime:, coreml_execution_provider.cc:81 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 67 number of nodes in the graph: 371 number of nodes supported by CoreML: 300[m
Corrupt JPEG data: 4621 extraneous bytes before marker 0xd2
[0;93m2024-03-28 11:02:11.807418 [W:onnxruntime:, coreml_execution_provider.cc:81 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 67 number of nodes in the graph: 371 number of nodes supported by CoreML: 300[m
Corrupt JPEG data: 4541 extraneous bytes before marker 0xd7
[0;93m2024-03-28 11:02:24.295763 [W:onnxruntime:, coreml_execution_provider.cc:81 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 67 number of nodes in the graph: 371 number of nodes supported by CoreML: 300[m
Corrupt JPEG data: 4254 extraneous by

AttributeError: 'NoneType' object has no attribute 'shape'

---
# Brouillon

In [23]:
import os

def image_processing(entry_path, exit_path):
    """
    Process images in the specified directory.
    
    Args:
    entry_path (str): Path to the directory containing input images.
    exit_path (str): Path to the directory where processed images will be saved.
    """
    # Initialize counters
    num_files = 0
    count_unusable_files = 0
    
    # Count the number of JPEG files in the entry directory
    for filename in os.listdir(entry_path):
        if filename.lower().endswith('.jpg'):
            num_files += 1

    # Create the output directory 'new_results'
    new_results_path = os.path.join(exit_path, "new_results")
    file_path = os.path.join(new_results_path, "File")
    unusable_file_path = os.path.join(new_results_path, "Unusable_File")
    
    os.makedirs(new_results_path, exist_ok=True)  # This line creates the directory if it doesn't exist
    os.makedirs(file_path, exist_ok=True)
    os.makedirs(unusable_file_path, exist_ok=True)

    # Process each JPEG file in the entry directory
    for filename in os.listdir(entry_path):
        if filename.lower().endswith('.jpg'):
            full_path = os.path.join(entry_path, filename)
            
            # Read the image
            img = cv2.imread(full_path)

            # Check if the image is unusable
            if is_image_unusable(img):
                count_unusable_files += 1
                cv2.imwrite(f"{unusable_file_path}/Unusable_File_{filename}", img)
                continue
            else:
                # Detect leaves and save each leaf as a separate image
                bounding_boxes = detect_leaves(img)
                for j, box in enumerate(bounding_boxes):
                    x1, y1, x2, y2 = box
                    part = img[y1:y2, x1:x2]

                    # Use the input image filename for output file naming
                    output_filename = f"Sheet_{os.path.splitext(filename)[0]}_leaf{j + 1}.jpg"
                    cv2.imwrite(os.path.join(file_path, output_filename), part)

    print(f"Usable Files: {num_files - count_unusable_files}, Unusable Files: {count_unusable_files}")

image_processing("/Users/titouanlegourrierec/Desktop/Test ilastic", "/Users/titouanlegourrierec/Desktop/")

Corrupt JPEG data: 4149 extraneous bytes before marker 0xd3
[0;93m2024-03-24 18:32:28.960115 [W:onnxruntime:, coreml_execution_provider.cc:81 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 67 number of nodes in the graph: 371 number of nodes supported by CoreML: 300[m
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Context leak detected, msgtracer returned -1
Corrupt JPEG data: 4401 extraneous bytes before marker 0xd0
[0;93m2024-03-24 18:32:45.404231 [W:onnxruntime:, coreml_execution_provider.cc:81 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 67 number of nodes in the graph: 3

Usable Files: 6, Unusable Files: 0
