## Imports

In [15]:
import os, shutil, random, cv2, kaggle
import pandas as pd
from pathlib import Path

## Downloading database

In [16]:
if not os.path.exists("Aircrafts"):
    kaggle.api.dataset_download_files('a2015003713/militaryaircraftdetectiondataset', path='Aircrafts', unzip=True)

## Pathes definitions

In [17]:
base_dir = Path("Dataset")
dataset_dir = Path("Aircrafts") / "dataset"
train_dir = base_dir / "train"
valid_dir = base_dir / "valid"
test_dir = base_dir / "test"

## Loading all images

In [18]:
images = list(Path("Aircrafts/dataset").glob("*.jpg"))

## Generating annotations

In [None]:
correct = {'Mi8': }
for img in images:
    csv_path = img.with_suffix(".csv")
    if not csv_path.exists():
        os.remove(img)
        continue

    df, im = pd.read_csv(csv_path), cv2.imread(str(img))
    if im is None:
        continue

    h, w = im.shape[:2]
    for idx, row in df.iterrows():
        craft_type = row['class'].replace('-', '')
        if craft_type in ['Mi8', 'F18', 'Z10', 'KAAN', 'Il76', 'EMB314', 'V280', 'J36', 'J35', 'WZ9']:
            if craft_type == 'F18':
                df.loc[idx, 'class'] = 'F/A-18'

            elif craft_type == 'Il76':
                df.loc[idx, 'class'] = 'Il-76'
            
            else:
                pass

In [None]:
current_id = 0
class_mapping = {}
for img in images:
    csv_path = img.with_suffix(".csv")
    if not csv_path.exists():
        os.remove(img)
        continue

    df, im = pd.read_csv(csv_path), cv2.imread(str(img))
    if im is None:
        continue

    h, w = im.shape[:2]
    for idx, row in df.iterrows():
        craft_type = row['class'].replace('-', '')
        if craft_type in ['Mi8', 'F18', 'Z10', 'KAAN', 'Il76', 'EMB314', 'V280', 'J36', 'J35', 'WZ9']:
            if craft_type == 'F18':
                df.loc[idx, 'class'] = 'F/A-18'

            print(img.name, df.loc[idx, 'class'])

        if df.loc[idx, 'class'] not in class_mapping:
            class_mapping[df.loc[idx, 'class']] = current_id
            current_id += 1

    annotations = [
        f"{class_mapping[row['class']]} "
        f"{(row['xmin'] + row['xmax']) / (2 * w):.6f} {(row['ymin'] + row['ymax']) / (2 * h):.6f} "
        f"{(row['xmax'] - row['xmin']) / w:.6f} {(row['ymax'] - row['ymin']) / h:.6f}"
        for _, row in df.iterrows()
    ]

    with open(img.with_suffix(".txt"), "w") as f:
        f.write("\n".join(annotations))
    os.remove(csv_path)


00032844ab679240fc03ecd27d29a6aa.jpg F/A-18


KeyError: 'F18'

## Creating the new folders

In [None]:
(train_dir / "images").mkdir(parents=True, exist_ok=True)
(train_dir / "labels").mkdir(parents=True, exist_ok=True)

(valid_dir / "images").mkdir(parents=True, exist_ok=True)
(valid_dir / "labels").mkdir(parents=True, exist_ok=True)

(test_dir / "images").mkdir(parents=True, exist_ok=True)
(test_dir / "labels").mkdir(parents=True, exist_ok=True)


## Generating .yaml file for YOLO

In [None]:
with open(base_dir / "data.yaml", "w") as f:
    f.write(f"train: {'train/images'}\nval: {'valid/images'}\ntest: {'test/images'}\n")
    f.write(f"nc: {len(class_mapping)}\nnames: {sorted(class_mapping, key=class_mapping.get)}\n")

## Loading all images again

In [None]:
images = list(dataset_dir.glob("*.jpg"))

## Splitting the data into 80% train, 10% validation and 10% test

In [None]:
random.shuffle(images)
splits = {"train": 0.8, "valid": 0.1, "test": 0.1}
n = len(images)
split_map = {
    "train": images[:int(splits["train"] * n)],
    "valid": images[int(splits["train"] * n):int((splits["train"] + splits["valid"]) * n)],
    "test": images[int((splits["train"] + splits["valid"]) * n):]
}

## Removing the old database

In [None]:
shutil.rmtree("Aircrafts")

In [None]:
class_mapping