In [2]:
import json
import os
from pathlib import Path

import numpy as np
from PIL import Image
from tqdm import tqdm

from ultralytics.data.utils import autosplit
from ultralytics.utils.ops import xyxy2xywhn
from ultralytics.utils import downloads
from ultralytics.utils import yaml



In [3]:
def convert_labels(fname=Path('xView/xView_train.geojson')):
    # Convert xView geoJSON labels to YOLO format
    path = fname.parent
    with open(fname) as f:
        print(f'Loading {fname}...')
        data = json.load(f)

    # Make dirs
    labels = Path(path / 'labels' / 'train')
    os.system(f'rm -rf {labels}')
    labels.mkdir(parents=True, exist_ok=True)

    # xView classes 11-94 to 0-59
    xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
                        12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
                        29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
                        47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]

    shapes = {}
    for feature in tqdm(data['features'], desc=f'Converting {fname}'):
        p = feature['properties']
        if p['bounds_imcoords']:
            id = p['image_id']
            file = path / 'train_images' / id
            if file.exists():  # 1395.tif missing
                try:
                    box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
                    assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
                    cls = p['type_id']
                    cls = xview_class2index[int(cls)]  # xView class to 0-60
                    assert 59 >= cls >= 0, f'incorrect class index {cls}'

                    # Write YOLO label
                    if id not in shapes:
                        shapes[id] = Image.open(file).size
                    box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
                    with open((labels / id).with_suffix('.txt'), 'a') as f:
                        f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
                except Exception as e:
                    print(f'WARNING: skipping one label for {file}: {e}')



In [5]:
# Download manually from https://challenge.xviewdataset.org
# yaml_data = yaml.load('yolo.yaml')
# dir = Path(yaml)  # dataset root dir
# ../datasets/xViewd
dir = Path('./datasets/xView')

In [None]:
import numpy as np
np.float = float  # np.floatÏùÑ floatÎ°ú Ïû¨Ï†ïÏùò

In [10]:
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
#         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
#         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
# downloads.download(urls, dir=dir)

# Convert labels
convert_labels(dir / 'xView_train.geojson')

Loading datasets/xView/xView_train.geojson...


Converting datasets/xView/xView_train.geojson:   4%|‚ñç         | 25452/601937 [00:01<00:40, 14278.23it/s]



Converting datasets/xView/xView_train.geojson:   5%|‚ñç         | 28379/601937 [00:02<00:39, 14453.02it/s]



Converting datasets/xView/xView_train.geojson:   5%|‚ñå         | 31267/601937 [00:02<00:39, 14323.42it/s]



Converting datasets/xView/xView_train.geojson:   7%|‚ñã         | 43465/601937 [00:03<00:37, 14720.41it/s]



Converting datasets/xView/xView_train.geojson:  10%|‚ñâ         | 59747/601937 [00:05<00:50, 10703.40it/s]



Converting datasets/xView/xView_train.geojson:  11%|‚ñà         | 66747/601937 [00:05<00:40, 13115.42it/s]



Converting datasets/xView/xView_train.geojson:  13%|‚ñà‚ñé        | 78832/601937 [00:06<00:34, 15098.42it/s]



Converting datasets/xView/xView_train.geojson:  19%|‚ñà‚ñä        | 112763/601937 [00:08<00:34, 14340.53it/s]



Converting datasets/xView/xView_train.geojson:  20%|‚ñà‚ñà        | 123261/601937 [00:09<00:30, 15555.08it/s]



Converting datasets/xView/xView_train.geojson:  21%|‚ñà‚ñà‚ñè       | 128158/601937 [00:09<00:29, 15847.33it/s]



Converting datasets/xView/xView_train.geojson:  22%|‚ñà‚ñà‚ñè       | 131465/601937 [00:09<00:29, 16201.42it/s]



Converting datasets/xView/xView_train.geojson:  25%|‚ñà‚ñà‚ñå       | 151848/601937 [00:12<00:37, 11858.59it/s]



Converting datasets/xView/xView_train.geojson:  27%|‚ñà‚ñà‚ñã       | 165212/601937 [00:13<00:31, 13806.48it/s]



Converting datasets/xView/xView_train.geojson:  30%|‚ñà‚ñà‚ñâ       | 178703/601937 [00:14<00:28, 14757.44it/s]



Converting datasets/xView/xView_train.geojson:  30%|‚ñà‚ñà‚ñà       | 181804/601937 [00:14<00:28, 14610.87it/s]



Converting datasets/xView/xView_train.geojson:  32%|‚ñà‚ñà‚ñà‚ñè      | 191261/601937 [00:14<00:27, 15066.02it/s]



Converting datasets/xView/xView_train.geojson:  35%|‚ñà‚ñà‚ñà‚ñç      | 210618/601937 [00:16<00:26, 14604.32it/s]



Converting datasets/xView/xView_train.geojson:  36%|‚ñà‚ñà‚ñà‚ñå      | 216421/601937 [00:16<00:31, 12143.13it/s]



Converting datasets/xView/xView_train.geojson:  36%|‚ñà‚ñà‚ñà‚ñã      | 219320/601937 [00:17<00:28, 13246.64it/s]



Converting datasets/xView/xView_train.geojson:  37%|‚ñà‚ñà‚ñà‚ñã      | 223529/601937 [00:17<00:27, 13609.68it/s]



Converting datasets/xView/xView_train.geojson:  38%|‚ñà‚ñà‚ñà‚ñä      | 228165/601937 [00:17<00:25, 14691.08it/s]



Converting datasets/xView/xView_train.geojson:  39%|‚ñà‚ñà‚ñà‚ñâ      | 235532/601937 [00:18<00:26, 14070.92it/s]



Converting datasets/xView/xView_train.geojson:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 303568/601937 [00:23<00:22, 13541.87it/s]



Converting datasets/xView/xView_train.geojson:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 368157/601937 [00:28<00:14, 15832.78it/s]



Converting datasets/xView/xView_train.geojson:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 429161/601937 [00:32<00:11, 15522.64it/s]



Converting datasets/xView/xView_train.geojson:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 434049/601937 [00:32<00:10, 15926.39it/s]



Converting datasets/xView/xView_train.geojson:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 438747/601937 [00:33<00:10, 14896.08it/s]



Converting datasets/xView/xView_train.geojson:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 497462/601937 [00:37<00:07, 14233.29it/s]



Converting datasets/xView/xView_train.geojson:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 501845/601937 [00:37<00:06, 14331.83it/s]



Converting datasets/xView/xView_train.geojson:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 589608/601937 [00:43<00:00, 14620.61it/s]



Converting datasets/xView/xView_train.geojson:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 594386/601937 [00:43<00:00, 15528.34it/s]



Converting datasets/xView/xView_train.geojson: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 601937/601937 [00:44<00:00, 13487.38it/s]


### Î∞òÎ≥µ Ïã§Ìñâ X

In [None]:
# Move images
images = Path(dir / 'images')
images.mkdir(parents=True, exist_ok=True)
Path(dir / 'train_images').rename(dir / 'images' / 'train')
Path(dir / 'val_images').rename(dir / 'images' / 'val')

# Split
autosplit(dir / 'images' / 'train')

# Train

In [11]:
from ultralytics import YOLO

model = YOLO("yolo11n.pt")

In [12]:
# Test training with minimal epochs
results = model.train(data="./xView.yaml", epochs=1, imgsz=640, verbose=True)

Ultralytics 8.3.49 üöÄ Python-3.9.6 torch-2.5.1 CPU (Apple M2 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=./xView.yaml, epochs=1, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_wid

[34m[1mtrain: [0mScanning /Users/san/Library/Mobile Documents/com~apple~CloudDocs/01 KoreaUniv/24-2/s76(Capstone)/dev/ml/01 OD/datasets/xView/labels/train... 760 images, 0 backgrounds, 0 corrupt: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 760/760 [00:06<00:00, 109.46it/s]

[34m[1mtrain: [0mNew cache created: /Users/san/Library/Mobile Documents/com~apple~CloudDocs/01 KoreaUniv/24-2/s76(Capstone)/dev/ml/01 OD/datasets/xView/labels/train.cache



[34m[1mval: [0mScanning /Users/san/Library/Mobile Documents/com~apple~CloudDocs/01 KoreaUniv/24-2/s76(Capstone)/dev/ml/01 OD/datasets/xView/labels/train... 86 images, 0 backgrounds, 0 corrupt: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 86/86 [00:00<00:00, 112.12it/s]

[34m[1mval: [0mNew cache created: /Users/san/Library/Mobile Documents/com~apple~CloudDocs/01 KoreaUniv/24-2/s76(Capstone)/dev/ml/01 OD/datasets/xView/labels/train.cache





Plotting labels to runs/detect/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000156, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ‚úÖ
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train3[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1         0G       3.43      5.528      1.417       5405        640: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 48/48 [18:13<00:00, 22.78s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [05:15<00:00, 105.09s/it]

                   all         86      64690          0          0          0          0






1 epochs completed in 0.394 hours.
Optimizer stripped from runs/detect/train3/weights/last.pt, 5.5MB
Optimizer stripped from runs/detect/train3/weights/best.pt, 5.5MB

Validating runs/detect/train3/weights/best.pt...
Ultralytics 8.3.49 üöÄ Python-3.9.6 torch-2.5.1 CPU (Apple M2 Pro)
YOLO11n summary (fused): 238 layers, 2,593,852 parameters, 0 gradients, 6.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:24<00:00,  8.19s/it]

                   all         86      64690          0          0          0          0





Speed: 0.9ms preprocess, 165.2ms inference, 0.0ms loss, 1.0ms postprocess per image
Results saved to [1mruns/detect/train3[0m
