# Data Loading and Preprocessing


### Imports

In [1]:
!pip install ultralytics
from ultralytics import YOLO
from PIL import Image
import os
import kagglehub
import os
import pandas as pd
import cv2
import json

Collecting ultralytics
  Downloading ultralytics-8.3.162-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

### Get Kaggle dataset

In [2]:
path = kagglehub.dataset_download("ollypowell/fair1m-satellite-imagery-for-object-detection")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fair1m-satellite-imagery-for-object-detection


### Organize dataset

In [3]:
labels_df = pd.read_parquet(os.path.join(path, "Dataset", "labels.parquet"))

base_dir = "fair1m"
base_label_dir = os.path.join(base_dir, "labels")
base_image_dir = os.path.join(base_dir, "images")

os.makedirs(base_dir, exist_ok=True)
os.makedirs(f"{base_label_dir}/train", exist_ok=True)
os.makedirs(f"{base_label_dir}/val", exist_ok=True)
os.makedirs(f"{base_image_dir}/train", exist_ok=True)
os.makedirs(f"{base_image_dir}/val", exist_ok=True)

class_names = sorted(labels_df["Category"].unique())
class_to_id = {name: idx for idx, name in enumerate(class_names)}

with open(os.path.join(base_dir, "classes.txt"), "w") as f:
    for name in class_names:
        f.write(name + "\n")

for _, row in labels_df.iterrows():
    filepath = row["FilePath"].replace("/home/olly/Desktop/Gaofen_Challenge/Dataset/Images/", os.path.join(path, "Dataset", "Images") + "/"
)

    category = row["Category"]
    split = row["Split"].lower()

    x_min, y_min = row["x_min"], row["y_min"]
    x_max, y_max = row["x_max"], row["y_max"]
    img_w, img_h = row["ImageWidth"], row["ImageHeight"]

    filename = os.path.basename(filepath)
    name_no_ext = os.path.splitext(filename)[0]

    x_center = ((x_min + x_max) / 2) / img_w
    y_center = ((y_min + y_max) / 2) / img_h
    width = (x_max - x_min) / img_w
    height = (y_max - y_min) / img_h

    label_path = os.path.join(base_label_dir, split, name_no_ext + ".txt")

    class_id = class_to_id[category]
    with open(label_path, "a") as f:
        f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

    dest_image_path = os.path.join(base_image_dir, split, filename)
    if not os.path.exists(dest_image_path):
        if os.path.exists(filepath):
            os.system(f'cp "{filepath}" "{dest_image_path}"')
        else:
            print(f"Missing image: {filepath}")


# LONG Training

Training on full dataset, for 50 epochs

### Create yaml file

In [29]:
dataset_root = "/content/fair1m"
train_path = os.path.join(dataset_root, "images/train")
val_path = os.path.join(dataset_root, "images/val")

if not os.path.exists(train_path):
    raise FileNotFoundError(f"Train path not found: {train_path}")
if not os.path.exists(val_path):
    raise FileNotFoundError(f"Val path not found: {val_path}")

with open(os.path.join(dataset_root, "classes.txt"), "r") as f:
    class_names = [line.strip() for line in f if line.strip()]

data_yaml_path = os.path.join(dataset_root, "data.yaml")
with open(data_yaml_path, "w") as f:
    f.write(f"path: {dataset_root}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write(f"nc: {len(class_names)}\n")
    f.write("names:\n")
    for name in class_names:
        f.write(f"  - {name}\n")

print(f"Saved data.yaml to {data_yaml_path}")


Saved data.yaml to /content/fair1m/data.yaml


### Train Model

In [None]:
dataset_root = "/content/fair1m"
train_path = os.path.join(dataset_root, "images/train")
val_path = os.path.join(dataset_root, "images/val")
label_train_path = os.path.join(dataset_root, "labels/train")
label_val_path = os.path.join(dataset_root, "labels/val")
class_file = os.path.join(dataset_root, "classes.txt")
data_yaml_path = os.path.join(dataset_root, "data.yaml")

with open(class_file, "r") as f:
    class_names = [line.strip() for line in f if line.strip()]

model = YOLO("yolov8s.pt")

# model.train(
#     data=data_yaml_path,
#     epochs=50,
#     imgsz=640,
#     batch=16,
#     name="fair1m_train"
# )

model.train(
    data=data_yaml_path,
    epochs=100,
    imgsz=640,
    batch=8,
    name="fair1m_finetune",
    lr0=0.001,
    weight_decay=0.0001,
    momentum=0.9,
    cos_lr=True,
    patience=20,
    val=True,
    freeze=10,
    mosaic=0.8,
    mixup=0.1,
    translate=0.1,
    scale=0.5,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    erasing=0.2,
)

best_model_path = "runs/detect/fair1m_train/weights/best.pt"
if not os.path.exists(best_model_path):
    best_model_path = "runs/detect/fair1m_train/weights/last.pt"
    print(f"best.pt not found, using last.pt instead: {best_model_path}")

model = YOLO(best_model_path)

results = model.predict(source=val_path, stream=True, conf=0.25)

predictions = []
img_id = 0
for r in results:
    boxes = []
    labels = []
    if r.boxes is not None:
        for box in r.boxes:
            xyxy = box.xyxy[0].cpu().numpy().astype(int).tolist()
            cls_id = int(box.cls[0])
            label = class_names[cls_id] if cls_id < len(class_names) else f"class_{cls_id}"
            boxes.append(xyxy)
            labels.append(label)
    predictions.append({
        "id": img_id,
        "original_id": r.path,
        "label": labels,
        "boxes": boxes
    })
    img_id += 1

### Ground Truths JSON file

In [None]:
with open("ground_truths.json", "w") as f:
    json.dump(ground_truths, f, indent=2)
print("Saved ground_truths.json")

### Predictions JSON files

In [None]:
with open("predictions.json", "w") as f:
    json.dump(predictions, f, indent=2)
print("Saved predictions.json")

ground_truths = []
img_id = 0
for filename in sorted(os.listdir(val_path)):
    if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
        continue
    image_path = os.path.join(val_path, filename)
    label_path = os.path.join(label_val_path, os.path.splitext(filename)[0] + ".txt")
    if not os.path.exists(label_path):
        continue
    img = cv2.imread(image_path)
    h, w = img.shape[:2]
    boxes = []
    labels = []
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            cls_id = int(parts[0])
            cx, cy, bw, bh = map(float, parts[1:])
            x1 = int((cx - bw / 2) * w)
            y1 = int((cy - bh / 2) * h)
            x2 = int((cx + bw / 2) * w)
            y2 = int((cy + bh / 2) * h)
            boxes.append([x1, y1, x2, y2])
            labels.append(class_names[cls_id] if cls_id < len(class_names) else f"class_{cls_id}")
    if boxes:
        ground_truths.append({
            "id": img_id,
            "original_id": image_path,
            "label": labels,
            "boxes": boxes
        })
        img_id += 1

# SHORT Training

Training on 100 randomly selected images, for only 3 epochs

### Create filtered train, val, and yaml file

In [4]:
dataset_root = "/content/fair1m"
train_dir = os.path.join(dataset_root, "images/train")
val_dir = os.path.join(dataset_root, "images/val")

def get_first_n_images(dir_path, n=100):
    files = sorted([os.path.join(dir_path, f) for f in os.listdir(dir_path) if f.lower().endswith((".jpg", ".jpeg", ".png"))])
    return files[:n]

train_images = get_first_n_images(train_dir, 100)
val_images = get_first_n_images(val_dir, 100)

train_txt = os.path.join(dataset_root, "train_subset.txt")
val_txt = os.path.join(dataset_root, "val_subset.txt")

with open(train_txt, "w") as f:
    f.write("\n".join(train_images) + "\n")

with open(val_txt, "w") as f:
    f.write("\n".join(val_images) + "\n")

with open(os.path.join(dataset_root, "data_filtered.yaml"), "w") as f:
    f.write(f"train: {train_txt}\n")
    f.write(f"val: {val_txt}\n")
    f.write(f"nc: {len(class_names)}\n")
    f.write("names:\n")
    for name in class_names:
        f.write(f"  - {name}\n")

print("Created train_subset.txt, val_subset.txt and data_filtered.yaml")


Created train_subset.txt, val_subset.txt and data_filtered.yaml


### Train

In [5]:
model = YOLO("yolov8s.pt")
model.train(data="/content/fair1m/data_filtered.yaml", epochs=50, imgsz=640)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 104MB/s] 


Ultralytics 8.3.162 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/fair1m/data_filtered.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=3, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12

100%|██████████| 755k/755k [00:00<00:00, 14.7MB/s]

Overriding model.yaml nc=80 with nc=37

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytic




 15                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
 16                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 19                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1   1969152  ultralytics.nn.modules.block.C2f             [768, 512, 1]                 
 22        [15, 18, 21]  1   2130367  ultralytics.nn.modules.head.Detect           [37, [128, 256, 512]]         
Model summary: 129 layers, 11,149,919 parameters, 11,149,903 gradients, 28.7 GFLOPs

Tra

[34m[1mtrain: [0mScanning /content/fair1m/labels/train... 100 images, 0 backgrounds, 0 corrupt: 100%|██████████| 100/100 [00:00<00:00, 114.87it/s]

[34m[1mtrain: [0mNew cache created: /content/fair1m/labels/train.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1590.0±428.1 MB/s, size: 250.7 KB)


[34m[1mval: [0mScanning /content/fair1m/labels/val... 100 images, 0 backgrounds, 0 corrupt: 100%|██████████| 100/100 [00:00<00:00, 1551.45it/s]

[34m[1mval: [0mNew cache created: /content/fair1m/labels/val.cache





Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000244, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 3 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/3         0G      2.786      7.367      1.572         48        640: 100%|██████████| 7/7 [04:32<00:00, 38.86s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [01:50<00:00, 27.62s/it]

                   all        100       2308    0.00079     0.0314    0.00409    0.00364






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/3         0G      2.754      6.165      1.589         31        640: 100%|██████████| 7/7 [04:08<00:00, 35.52s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/4 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  25%|██▌       | 1/4 [00:30<01:32, 30.82s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [01:43<00:00, 25.94s/it]

                   all        100       2308      0.411     0.0119    0.00624    0.00493






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/3         0G      2.734      5.761      1.574         63        640: 100%|██████████| 7/7 [03:52<00:00, 33.20s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/4 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  25%|██▌       | 1/4 [00:31<01:33, 31.10s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [01:44<00:00, 26.11s/it]

                   all        100       2308    0.00847     0.0192    0.00834    0.00518






3 epochs completed in 0.298 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/train/weights/best.pt, 22.5MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.162 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
Model summary (fused): 72 layers, 11,139,903 parameters, 0 gradients, 28.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/4 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  25%|██▌       | 1/4 [00:24<01:13, 24.33s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [01:17<00:00, 19.39s/it]


                   all        100       2308    0.00795     0.0188    0.00807    0.00511
                  A220         13         31          0          0          0          0
                  A321          7         12          0          0          0          0
                  A330          7         10          0          0          0          0
                  A350          4          5   0.000121        0.2   0.000588   0.000411
                 ARJ21          1          1          0          0          0          0
        Baseball Field          2          2          0          0          0          0
      Basketball Court          1          1          0          0          0          0
             Boeing737          6         15       0.05     0.0667     0.0284     0.0114
             Boeing747          6         13   0.000577      0.154    0.00415     0.0023
             Boeing777          3          3          0          0          0          0
             Boeing78

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 31, 32, 33, 34, 35, 36])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x78c416f90b90>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0

### Ground Truths JSON file

In [6]:
image_dir = "/content/fair1m/images/val"
label_dir = "/content/fair1m/labels/val"
classes_path = "/content/fair1m/classes.txt"

with open(classes_path) as f:
    class_names = [line.strip() for line in f]

image_files = sorted([
    f for f in os.listdir(image_dir)
    if f.lower().endswith((".jpg", ".png"))
])[:100]

ground_truths = []

for idx, image_file in enumerate(image_files):
    image_path = os.path.join(image_dir, image_file)
    label_file = os.path.join(label_dir, os.path.splitext(image_file)[0] + ".txt")

    if not os.path.exists(label_file):
        continue

    with Image.open(image_path) as img:
        width, height = img.size

    labels = []
    boxes = []

    with open(label_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue

            class_id, x_center, y_center, w, h = map(float, parts)
            class_id = int(class_id)
            if not (0 <= class_id < len(class_names)):
                continue

            xc, yc, bw, bh = x_center * width, y_center * height, w * width, h * height
            xmin = int(xc - bw / 2)
            ymin = int(yc - bh / 2)
            xmax = int(xc + bw / 2)
            ymax = int(yc + bh / 2)

            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(class_names[class_id])

    if boxes:
        ground_truths.append({
            "id": idx,
            "original_id": image_path,
            "label": labels,
            "boxes": boxes
        })

with open("ground_truths.json", "w") as f:
    json.dump(ground_truths, f, indent=2)


### Predictions JSON file

In [7]:
model = YOLO("runs/detect/train/weights/best.pt")

with open("/content/fair1m/classes.txt") as f:
    class_names = [line.strip() for line in f]

val_dir = "/content/fair1m/images/val"
image_paths = sorted([
    os.path.join(val_dir, f)
    for f in os.listdir(val_dir)
    if f.lower().endswith((".jpg", ".png"))
])[:100]

predictions = []
for idx, img_path in enumerate(image_paths):
    result = model(img_path)[0]
    boxes = []
    labels = []

    for box in result.boxes:
        xyxy = box.xyxy[0].cpu().numpy().astype(int).tolist()
        cls_id = int(box.cls[0])
        if 0 <= cls_id < len(class_names):
            boxes.append(xyxy)
            labels.append(class_names[cls_id])

    predictions.append({
        "id": idx,
        "original_id": img_path,
        "label": labels,
        "boxes": boxes
    })

with open("predictions.json", "w") as f:
    json.dump(predictions, f, indent=2)



image 1/1 /content/fair1m/images/val/v_0.jpg: 640x640 (no detections), 540.1ms
Speed: 4.5ms preprocess, 540.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /content/fair1m/images/val/v_1.jpg: 640x640 (no detections), 545.0ms
Speed: 4.5ms preprocess, 545.0ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /content/fair1m/images/val/v_10.jpg: 640x480 (no detections), 417.0ms
Speed: 3.5ms preprocess, 417.0ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /content/fair1m/images/val/v_100.jpg: 480x640 (no detections), 429.0ms
Speed: 3.5ms preprocess, 429.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 /content/fair1m/images/val/v_1000.jpg: 640x640 1 Baseball Field, 525.3ms
Speed: 4.4ms preprocess, 525.3ms inference, 11.2ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /content/fair1m/images/val/v_1001.jpg: 480x640 (no detections), 426.2ms
Speed: 3.5ms prepro