# YOLOv11 DataSet Generation

In this notebook, we will reformat our images dataset into the format supported by YOLOv11 classification task as described in https://docs.ultralytics.com/datasets/classify/
```text
ODIR-2019-Preprocessed-/
|-- train/
    |-- normal/
    |-- diabetes/
    |-- glaucoma/
    |-- cataract/
    |-- ageing/
    |-- hypertension/
    |-- myopia/
    |-- other/
|-- test/
    |-- normal/
    |-- diabetes/
    |-- glaucoma/
    |-- cataract/
    |-- ageing/
    |-- hypertension/
    |-- myopia/
    |-- other/
|-- val (optional)
    |-- normal/
    |-- diabetes/
    |-- glaucoma/
    |-- cataract/
    |-- ageing/
    |-- hypertension/
    |-- myopia/
    |-- other/
```
For YOLO validation is  optional given the framework is capable to generate its own validation from training data

In [None]:
import os
folders=[
'ODIR-2019/YOLO/processed_512/train',
'ODIR-2019/YOLO/processed_512/test',
'ODIR-2019/YOLO/processed_512/val',
'ODIR-2019/YOLO/augmented_512/train',
'ODIR-2019/YOLO/augmented_512/test',
'ODIR-2019/YOLO/augmented_512/val',
]
for folder in folders:
    os.makedirs(folder,exist_ok=True)


In [None]:
import pandas as pd

eda_val_df = pd.read_csv('ODIR-2019/results/01_eda_val_df.csv')
eda_test_df = pd.read_csv('ODIR-2019/results/01_eda_test_df.csv')
eda_train_df = pd.read_csv('ODIR-2019/results/01_eda_train_df.csv')
aug_train_df = pd.read_csv('ODIR-2019/results/03_aug_train_512_df.csv')



In [None]:
# we will use filename, label_detail to split our images
import os
import shutil
def populate_folder(source_folder, output_folder, df):
    for label, group in df.groupby('label_detail'):
        group_output_folder = os.path.join(output_folder, label)
        os.makedirs(group_output_folder, exist_ok=True)
        for row in group.itertuples():
            filename = row.filename
            source_file = os.path.join(source_folder, filename)
            dest_file = os.path.join(group_output_folder, filename)
            if not os.path.exists(dest_file):
                print(f'copy {source_file} to {dest_file}') # we use symbolic link to reduce file size
                shutil.copyfile(source_file, dest_file, follow_symlinks=True)
populate_folder('ODIR-2019/processed_512_images','ODIR-2019/YOLO/processed/test',eda_test_df)
populate_folder('ODIR-2019/processed_512_images','ODIR-2019/YOLO/processed/train',eda_train_df)
populate_folder('ODIR-2019/processed_512_images','ODIR-2019/YOLO/processed/val',eda_val_df)
populate_folder('ODIR-2019/processed_512_images','ODIR-2019/YOLO/augmented/test',eda_test_df)
populate_folder('ODIR-2019/augmented_512_images','ODIR-2019/YOLO/augmented/train',aug_train_df)
populate_folder('ODIR-2019/processed_512_images','ODIR-2019/YOLO/augmented/val',eda_val_df)

In [None]:
from pathlib import Path

def print_tree(path, files_limit, prefix='', show_files=True ):
    """
    Recursively prints the directory tree structure.
    """
    if path.is_dir():
        print(f'{prefix}├── {path.name}/')
        prefix += '│   ' # Extend the prefix for children
        # Sort items for consistent output (directories first)
        items = sorted(list(path.iterdir()), key=lambda x: (not x.is_dir(), x.name))
        file_count=0
        file_more=False
        for item in items:
            if item.is_dir():
                print_tree(item, prefix=prefix, files_limit=files_limit, show_files=show_files)
            elif show_files:
                if files_limit is None :
                    print(f'{prefix}├── {item.name}')
                elif file_count<files_limit:
                    print(f'{prefix}├── {item.name}')
                    file_count+=1
                elif file_more == False:
                    print(f'{prefix}├── ...')
                    file_more = True
                    

# Example usage: print the tree of the current working directory
def show_tree(path, files_limit):
    directory = Path(path)
    print(f"Directory tree for: {directory}")
    print_tree(directory, files_limit=files_limit)



Let's show our created directory structure but limit file display

In [None]:
for folder in folders:
    show_tree(folder, files_limit=1)
