In [1]:
from typing import List, Tuple, Dict
import cv2
import numpy as np
import os
from roboflow import Roboflow

# Function to convert normalized bounding box coordinates to pixel coordinates
def convert_to_pixels(box: List[float], image_width: int, image_height: int) -> Tuple[int, int, int, int]:
    x_center, y_center, width, height = box
    x_center *= image_width
    y_center *= image_height
    width *= image_width
    height *= image_height
    
    x1 = int(x_center - width / 2)
    y1 = int(y_center - height / 2)
    x2 = int(x_center + width / 2)
    y2 = int(y_center + height / 2)
    
    return x1, y1, x2, y2

# Function to calculate Intersection over Union (IoU)
def bb_intersection_over_union(boxA: Tuple[int, int, int, int], boxB: Tuple[int, int, int, int]) -> float:
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    
    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    
    # compute the intersection over union by taking the intersection area and dividing it by the sum of
    # prediction + ground-truth areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    
    return iou

# Function to determine if boxes overlap significantly
def has_significant_overlap(boxA: Tuple[int, int, int, int], boxB: Tuple[int, int, int, int], threshold: float = 0.3) -> bool:
    iou = bb_intersection_over_union(boxA, boxB)
    return iou >= threshold

# Function to crop the image based on the bounding box
def crop_image(image: np.ndarray, box: Tuple[int, int, int, int]) -> np.ndarray:
    x1, y1, x2, y2 = box
    return image[y1:y2, x1:x2]

# Function to parse label file and get bounding boxes with class prioritization
def parse_label_file(label_file_path: str, image_width: int, image_height: int) -> Dict[int, List[Tuple[int, int, int, int]]]:
    with open(label_file_path, 'r') as file:
        lines = file.readlines()

    boxes = {}
    for line in lines:
        class_label, x_center, y_center, width, height = map(float, line.strip().split())
        box = convert_to_pixels([x_center, y_center, width, height], image_width, image_height)
        
        if class_label not in boxes:
            boxes[class_label] = []
        
        # Check for overlap with existing boxes of different class
        for other_class, other_boxes in boxes.items():
            if other_class != class_label:
                for other_box in other_boxes:
                    if has_significant_overlap(box, other_box):
                        # If the current class is '1', replace the overlapping '0' box
                        if class_label > other_class:
                            boxes[other_class].remove(other_box)
                            boxes[class_label].append(box)
                        # If the current class is '0', ignore it as it overlaps with a higher priority '1' box
                        break
                else:
                    # No significant overlap was found, add the box
                    boxes[class_label].append(box)
                    break
            else:
                # If it's the same class, just add the box
                boxes[class_label].append(box)
                break

    return boxes
# Updated function to process images with the new criteria: ignore class '0' if class '1' is present
def process_images_with_class_priority(directory_path: str, save_dir: str, start: int, end: int):
    images_dir = os.path.join(directory_path, 'images')
    labels_dir = os.path.join(directory_path, 'labels')
    image_files = sorted(os.listdir(images_dir))[start:end]  # Process a subset of images
    
    # Process each image in the batch
    for image_file in image_files:
        # Construct paths for the image and its corresponding label file
        image_path = os.path.join(images_dir, image_file)
        label_file = image_file.replace('.jpg', '.txt').replace('.jpeg', '.txt').replace('.png', '.txt')
        label_path = os.path.join(labels_dir, label_file)

        # Load the image
        image = cv2.imread(image_path)
        if image is None:  # If the image is not readable or is missing, skip it
            continue
        image_height, image_width = image.shape[:2]

        # Parse the label file and get the bounding boxes with classes
        if os.path.exists(label_path):  # Ensure the label file exists
            boxes = parse_label_file(label_path, image_width, image_height)

            # Check if class '1' is present, if so, ignore class '0'
            if 1.0 in boxes and 0.0 in boxes:
                boxes.pop(0.0)

            # Crop the image based on the bounding boxes and save them by class
            for class_label, boxes in boxes.items():
                class_dir = os.path.join(save_dir, str(int(class_label)))
                os.makedirs(class_dir, exist_ok=True)
                
                for i, box in enumerate(boxes):
                    cropped_img = crop_image(image, box)
                    cropped_img_path = os.path.join(class_dir, f'{image_file}_{int(class_label)}_{i}.jpg')
                    cv2.imwrite(cropped_img_path, cropped_img)
        else:
            print(f"Label file {label_file} not found for image {image_file}")

# Function to process all images in a given directory with updated class prioritization
def process_all_images_class_priority(directory_path: str, save_dir: str):
    images_dir = os.path.join(directory_path, 'images')
    labels_dir = os.path.join(directory_path, 'labels')
    image_files = sorted(os.listdir(images_dir))  # Process all images
    
    # Process each image
    for image_file in image_files:
        # Construct paths for the image and its corresponding label file
        image_path = os.path.join(images_dir, image_file)
        label_file = image_file.replace('.jpg', '.txt').replace('.jpeg', '.txt').replace('.png', '.txt')
        label_path = os.path.join(labels_dir, label_file)

        # Load the image
        image = cv2.imread(image_path)
        if image is None:  # If the image is not readable or is missing, skip it
            continue
        image_height, image_width = image.shape[:2]

        # Parse the label file and get the bounding boxes with classes
        if os.path.exists(label_path):  # Ensure the label file exists
            boxes = parse_label_file(label_path, image_width, image_height)

            # Check if class '1' is present, if so, ignore class '0'
            if 1.0 in boxes and 0.0 in boxes:
                boxes.pop(0.0)

            # Crop the image based on the bounding boxes and save them by class
            for class_label, boxes in boxes.items():
                class_dir = os.path.join(save_dir, str(int(class_label)))
                os.makedirs(class_dir, exist_ok=True)
                
                for i, box in enumerate(boxes):
                    cropped_img = crop_image(image, box)
                    cropped_img_path = os.path.join(class_dir, f'{image_file}_{int(class_label)}_{i}.jpg')
                    cv2.imwrite(cropped_img_path, cropped_img)
        else:
            print(f"Label file {label_file} not found for image {image_file}")





rf = Roboflow(api_key="nz2w1UFnyFrM7e73WOzh")
project = rf.workspace("nyu-figsb").project("basketballdetection-cki6r")
dataset = project.version(18).download("yolov8")



loading Roboflow workspace...
loading Roboflow project...
Dependency ultralytics==8.0.134 is required but found version=8.0.142, to fix: `pip install ultralytics==8.0.134`
Downloading Dataset Version Zip in basketballDetection-18 to yolov8: 100% [373098507 / 373098507] bytes


Extracting Dataset Version Zip to basketballDetection-18 in yolov8:: 100%|██████████| 4958/4958 [00:05<00:00, 990.65it/s] 


In [3]:

base_dataset_path = dataset.location
cropped_images_base_path = f"{base_dataset_path}/cropped_for_cls"
subsets = ['train', 'valid', 'test']
for subset in subsets:
    print(f"processing {subset} set")
    process_all_images_class_priority(f'{base_dataset_path}/{subset}', f'{cropped_images_base_path}/{subset}')

processing train set
processing valid set
processing test set


In [33]:
import os
import shutil

# Define the base directory
base_dir = cropped_images_base_path

# Define the source directories
source_dirs = [os.path.join(base_dir, 'train'), os.path.join(base_dir, 'test'), os.path.join(base_dir, 'valid')]
# Define the destination directory
destination_dir = os.path.join(base_dir, 'combined')

# Create destination directories for '0' and '1' within cropped_for_cls
os.makedirs(os.path.join(destination_dir, '0'), exist_ok=True)
os.makedirs(os.path.join(destination_dir, '1'), exist_ok=True)

# Function to copy contents from source to destination
def copy_contents(src, dst):
    for item in os.listdir(src):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, dirs_exist_ok=True) # For directories
        else:
            shutil.copy2(s, d) # For files

# Copy the contents of '0' and '1' from each source directory
for dir_path in source_dirs:
    for class_dir in ['0', '1']:
        src_path = os.path.join(dir_path, class_dir)
        dst_path = os.path.join(destination_dir, class_dir)
        copy_contents(src_path, dst_path)
