In [1]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="9MoGKNjyxriFivoPOl2m")
project = rf.workspace("signaturedetection-im4xu").project("leaf_detection-fcyc2")
version = project.version(1)
dataset = version.download("yolov12")
                

Collecting roboflow
  Downloading roboflow-1.1.54-py3-none-any.whl (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.1/83.1 kB[0m [31m783.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting idna==3.7
  Downloading idna-3.7-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cycler
  Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)
Collecting kiwisolver>=1.3.1
  Using cached kiwisolver-1.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
Collecting matplotlib
  Using cached matplotlib-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.6 MB)
Collecting opencv-python-headless==4.10.0.84
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 MB[0m [31m3.3 MB/s[0m eta [36

Downloading Dataset Version Zip in leaf_detection-1 to yolov12:: 100%|██████████| 8518/8518 [00:03<00:00, 2222.10it/s]





Extracting Dataset Version Zip to leaf_detection-1 in yolov12:: 100%|██████████| 162/162 [00:00<00:00, 17780.38it/s]


In [8]:
import os
import cv2
import shutil
import albumentations as A

# Define paths
dataset_path = "/home/sourav/workplace/leaf_disease_detection/leaf_detection-1"  # Update this with your dataset path
output_path = "/home/sourav/workplace/leaf_disease_detection/cropped_data"  # Update this with your desired output path
dataset_types = ["train", "valid", "test"]

# Define augmentation pipelines
augmentations = [
    A.Compose([
        A.HorizontalFlip(p=1.0),
        A.RandomBrightnessContrast(p=1.0),
        A.Rotate(limit=30, p=1.0),
        A.GaussNoise(p=1.0)
    ]),
    A.Compose([
        A.VerticalFlip(p=1.0),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=20, p=1.0),
        A.Blur(blur_limit=3, p=1.0)
    ]),
    A.Compose([
        A.CLAHE(p=1.0),
        A.ColorJitter(p=1.0),
        A.RandomGamma(p=1.0)
    ])
]

# Function to create class-wise directories
def create_class_dirs(base_path, class_ids):
    for dataset_type in dataset_types:
        for class_id in class_ids:
            class_dir = os.path.join(base_path, dataset_type, str(class_id))
            os.makedirs(class_dir, exist_ok=True)

# Function to extract and augment objects
def extract_objects():
    class_ids = set()
    
    for dataset_type in dataset_types:
        images_path = os.path.join(dataset_path, dataset_type, "images")
        labels_path = os.path.join(dataset_path, dataset_type, "labels")
        
        for label_file in os.listdir(labels_path):
            if not label_file.endswith(".txt"):
                continue
            
            image_file = label_file.replace(".txt", ".jpg")  # Change to .png if needed
            image_path = os.path.join(images_path, image_file)
            label_path = os.path.join(labels_path, label_file)
            
            if not os.path.exists(image_path):
                continue
            
            image = cv2.imread(image_path)
            h, w, _ = image.shape
            
            with open(label_path, "r") as file:
                lines = file.readlines()
                
                for idx, line in enumerate(lines):
                    parts = line.strip().split()
                    class_id = int(parts[0])
                    class_ids.add(class_id)
                    x_center, y_center, box_width, box_height = map(float, parts[1:])
                    
                    # Convert YOLO format to pixel values
                    x_min = int((x_center - box_width / 2) * w)
                    y_min = int((y_center - box_height / 2) * h)
                    x_max = int((x_center + box_width / 2) * w)
                    y_max = int((y_center + box_height / 2) * h)
                    
                    cropped_object = image[y_min:y_max, x_min:x_max]
                    
                    if cropped_object.size == 0:
                        continue
                    
                    output_dir = os.path.join(output_path, dataset_type, str(class_id))
                    os.makedirs(output_dir, exist_ok=True)
                    
                    output_filename = f"{image_file.split('.')[0]}_{idx}.jpg"
                    output_filepath = os.path.join(output_dir, output_filename)
                    cv2.imwrite(output_filepath, cropped_object)
                    
                    # Apply multiple augmentations only for class_id 0
                    if class_id == 0:
                        for aug_idx, aug in enumerate(augmentations):
                            augmented = aug(image=cropped_object)
                            augmented_image = augmented["image"]
                            
                            aug_output_filename = f"{image_file.split('.')[0]}_{idx}_aug{aug_idx}.jpg"
                            aug_output_filepath = os.path.join(output_dir, aug_output_filename)
                            cv2.imwrite(aug_output_filepath, augmented_image)
                    
    print("Cropping and augmentation completed!")
    create_class_dirs(output_path, class_ids)

# Run extraction and augmentation
extract_objects()


  original_init(self, **validated_kwargs)


Cropping and augmentation completed!


In [10]:
len(os.listdir('/home/sourav/workplace/leaf_disease_detection/cropped_data/train/0'))

488