### Imports

In [1]:
import os, shutil, pandas as pd, kagglehub

  from .autonotebook import tqdm as notebook_tqdm


### Downloading The Database

In [2]:
if not os.path.exists('Aircrafts'):
    path = kagglehub.dataset_download('a2015003713/militaryaircraftdetectiondataset')
    shutil.move(path, 'Aircrafts')

### Removing The Unused Folders

In [3]:
if os.path.exists('Aircrafts/annotated_samples'):
    shutil.rmtree('Aircrafts/annotated_samples')

if os.path.exists('Aircrafts/crop'):
    shutil.rmtree('Aircrafts/crop')

### Pathes

In [4]:
csv_file = 'Aircrafts/labels_with_split.csv'
img_dir = 'Aircrafts/dataset'
output_dir = 'Dataset'

### Creating The Necessary Folders

In [5]:
splits = {'train': 'train/images', 'validation': 'valid/images', 'test': 'test/images'}
for split_path in splits.values():
    os.makedirs(f'{output_dir}/{split_path}', exist_ok=True)
    os.makedirs(f'{output_dir}/{split_path.replace('images', 'labels')}', exist_ok=True)

### Extracting The Data From The CSV File

In [6]:
df = pd.read_csv(csv_file)
pd.set_option('display.max_rows', 100)
df['class'] = df['class'].apply(lambda x: x if df['class'].value_counts()[x] >= 340 else 'unsupported airplane')
class_names = sorted(df['class'].unique())
class_to_id = {name: i for i, name in enumerate(class_names)}

### Converting The Data To YOLO's Format

In [7]:
def convert_to_yolo_format(row):
    x_center = (row['xmin'] + row['xmax']) / 2 / row['width']
    y_center = (row['ymin'] + row['ymax']) / 2 / row['height']
    box_width = (row['xmax'] - row['xmin']) / row['width']
    box_height = (row['ymax'] - row['ymin']) / row['height']
    cls_id = class_to_id[row['class']]
    return f'{cls_id} {x_center:.6f} {y_center:.6f} {box_width:.6f} {box_height:.6f}'

for split in ['train', 'validation', 'test']:
    split_df = df[df['split'] == split]
    for _, row in split_df.iterrows():
        img_filename = row['filename'] + '.jpg'
        img_src = os.path.join(img_dir, img_filename)
        img_dst = os.path.join(output_dir, splits[split], img_filename)
        label_dst = os.path.join(output_dir, splits[split].replace('images', 'labels'), row['filename'] + '.txt')
        shutil.copy(img_src, img_dst)
        with open(label_dst, 'w') as f:
            annotations = split_df[split_df['filename'] == row['filename']]
            for _, annotation in annotations.iterrows():
                f.write(convert_to_yolo_format(annotation) + '\n')

### Creating The YAML File

In [None]:
with open(f'{output_dir}/data.yaml', 'w') as f:
    f.write(f'train: train/images\n')
    f.write(f'val: valid/images\n')
    f.write(f'test: test/images\n\n')
    f.write(f'nc: {len(class_names)}\n')
    f.write(f'names: {class_names}\n')

## Erasing The Old Database

In [9]:
shutil.rmtree('Aircrafts')