In [2]:
from typing import List, Tuple, Dict
import cv2
import numpy as np
import os
from roboflow import Roboflow
from tqdm import tqdm
import random
import matplotlib.pyplot as plt 
from PIL import Image

def convert_to_pixels(box: List[float], image_width: int, image_height: int) -> Tuple[int, int, int, int]:
    x_center, y_center, width, height = box
    x_center *= image_width
    y_center *= image_height
    width *= image_width
    height *= image_height
    
    x1 = int(x_center - width / 2)
    y1 = int(y_center - height / 2)
    x2 = int(x_center + width / 2)
    y2 = int(y_center + height / 2)
    
    return x1, y1, x2, y2

def crop_and_save(image_path, bbox_coords, output_dir, class_id):
    """
    Crop the image to the bounding box and save it to the specified output directory.
    """
    with Image.open(image_path) as img:
        cropped_img = img.crop(bbox_coords)
        output_path = os.path.join(output_dir, f"{class_id}", os.path.basename(image_path))
        cropped_img.save(output_path)

# Directories
base_dir = "basketballDetection-24/train"
images_dir = os.path.join(base_dir, "images")
labels_dir = os.path.join(base_dir, "labels")
output_dir = "classification_dataset"

# Create output directories
os.makedirs(output_dir, exist_ok=True)
for i in range(2):  # Number of classes
    os.makedirs(os.path.join(output_dir, f"{i}"), exist_ok=True)

# Process each label file
for label_file in tqdm(os.listdir(labels_dir)):
    with open(os.path.join(labels_dir, label_file), 'r') as file:
        lines = file.readlines()

    if not lines:
        continue

    # Check for class "1" first, then fall back to class "0"
    selected_line = None
    for line in lines:
        class_id, _ = line.strip().split(' ', 1)
        if class_id == '1':
            selected_line = line
            break
        elif class_id == '0' and selected_line is None:
            selected_line = line

    if selected_line is None:
        continue  # Skip if no suitable class found

    class_id, coords = selected_line.strip().split(' ', 1)
    box = [float(coord) for coord in coords.split()]
    image_file = label_file.replace('.txt', '.jpg')
    image_path = os.path.join(images_dir, image_file)

    if not os.path.exists(image_path):
        continue

    img = Image.open(image_path)
    bbox_coords = convert_to_pixels(box, img.width, img.height)
    crop_and_save(image_path, bbox_coords, output_dir, class_id)



# rf = Roboflow(api_key="nz2w1UFnyFrM7e73WOzh")
# project = rf.workspace("nyu-figsb").project("basketballdetection-cki6r")
# dataset = project.version(24).download("yolov8")

100%|██████████| 2955/2955 [01:05<00:00, 45.03it/s]


In [2]:
def find_small_images(directory, min_width, min_height = None):
    print(directory)
    min_height = min_width if not min_height else min_height
    small_images_info = []
    for dirpath, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            # Skip system files like .DS_Store
            if filename.startswith('.'):
                continue
            try:
                file_path = os.path.join(dirpath, filename)
                with Image.open(file_path) as img:
                    w, h = img.size
                    if w * h < min_width * min_height:
                        small_images_info.append((file_path, img.size))
            except Exception as e:
                print(f"Error processing image {filename}: {e}")
    return small_images_info

small_images_info = find_small_images(output_dir + "/0", 17)


classification_dataset/0


In [4]:
for img in small_images_info:
    os.remove(img[0])