# Data Loading and Preprocessing


### Imports

In [None]:
!pip install ultralytics
from ultralytics import YOLO
from PIL import Image
import os
import kagglehub
import os
import pandas as pd
import cv2
import json



### Get Kaggle dataset

In [None]:
path = kagglehub.dataset_download("ollypowell/fair1m-satellite-imagery-for-object-detection")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fair1m-satellite-imagery-for-object-detection


### Organize dataset

In [None]:
labels_df = pd.read_parquet(os.path.join(path, "Dataset", "labels.parquet"))

base_dir = "fair1m"
base_label_dir = os.path.join(base_dir, "labels")
base_image_dir = os.path.join(base_dir, "images")

os.makedirs(base_dir, exist_ok=True)
os.makedirs(f"{base_label_dir}/train", exist_ok=True)
os.makedirs(f"{base_label_dir}/val", exist_ok=True)
os.makedirs(f"{base_image_dir}/train", exist_ok=True)
os.makedirs(f"{base_image_dir}/val", exist_ok=True)

class_names = sorted(labels_df["Category"].unique())
class_to_id = {name: idx for idx, name in enumerate(class_names)}

with open(os.path.join(base_dir, "classes.txt"), "w") as f:
    for name in class_names:
        f.write(name + "\n")

for _, row in labels_df.iterrows():
    filepath = row["FilePath"].replace("/home/olly/Desktop/Gaofen_Challenge/Dataset/Images/", os.path.join(path, "Dataset", "Images") + "/"
)

    category = row["Category"]
    split = row["Split"].lower()

    x_min, y_min = row["x_min"], row["y_min"]
    x_max, y_max = row["x_max"], row["y_max"]
    img_w, img_h = row["ImageWidth"], row["ImageHeight"]

    filename = os.path.basename(filepath)
    name_no_ext = os.path.splitext(filename)[0]

    x_center = ((x_min + x_max) / 2) / img_w
    y_center = ((y_min + y_max) / 2) / img_h
    width = (x_max - x_min) / img_w
    height = (y_max - y_min) / img_h

    label_path = os.path.join(base_label_dir, split, name_no_ext + ".txt")

    class_id = class_to_id[category]
    with open(label_path, "a") as f:
        f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

    dest_image_path = os.path.join(base_image_dir, split, filename)
    if not os.path.exists(dest_image_path):
        if os.path.exists(filepath):
            os.system(f'cp "{filepath}" "{dest_image_path}"')
        else:
            print(f"Missing image: {filepath}")


# LONG Training

Training on full dataset, for 50 epochs

### Create yaml file

In [None]:
dataset_root = "/content/fair1m"
train_path = os.path.join(dataset_root, "images/train")
val_path = os.path.join(dataset_root, "images/val")

if not os.path.exists(train_path):
    raise FileNotFoundError(f"Train path not found: {train_path}")
if not os.path.exists(val_path):
    raise FileNotFoundError(f"Val path not found: {val_path}")

with open(os.path.join(dataset_root, "classes.txt"), "r") as f:
    class_names = [line.strip() for line in f if line.strip()]

data_yaml_path = os.path.join(dataset_root, "data.yaml")
with open(data_yaml_path, "w") as f:
    f.write(f"path: {dataset_root}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write(f"nc: {len(class_names)}\n")
    f.write("names:\n")
    for name in class_names:
        f.write(f"  - {name}\n")

print(f"Saved data.yaml to {data_yaml_path}")


### Train Model

In [None]:
dataset_root = "/content/fair1m"
val_path = os.path.join(dataset_root, "images/val")
class_file = os.path.join(dataset_root, "classes.txt")

with open(class_file, "r") as f:
    class_names = [line.strip() for line in f if line.strip()]

model = YOLO("yolov8s.pt")

results = model.predict(source=val_path, stream=True, conf=0.25)

predictions = []
img_id = 0
for r in results:
    boxes = []
    labels = []
    if r.boxes is not None:
        for box in r.boxes:
            xyxy = box.xyxy[0].cpu().numpy().astype(int).tolist()
            cls_id = int(box.cls[0])
            label = class_names[cls_id] if cls_id < len(class_names) else f"class_{cls_id}"
            boxes.append(xyxy)
            labels.append(label)
    predictions.append({
        "id": img_id,
        "original_id": r.path,
        "label": labels,
        "boxes": boxes
    })
    img_id += 1

with open("predictions.json", "w") as f:
    json.dump(predictions, f, indent=2)


### Ground Truths JSON file

In [None]:
with open("ground_truths.json", "w") as f:
    json.dump(ground_truths, f, indent=2)
print("Saved ground_truths.json")

### Predictions JSON files

In [None]:
with open("predictions.json", "w") as f:
    json.dump(predictions, f, indent=2)
print("Saved predictions.json")

ground_truths = []
img_id = 0
for filename in sorted(os.listdir(val_path)):
    if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
        continue
    image_path = os.path.join(val_path, filename)
    label_path = os.path.join(label_val_path, os.path.splitext(filename)[0] + ".txt")
    if not os.path.exists(label_path):
        continue
    img = cv2.imread(image_path)
    h, w = img.shape[:2]
    boxes = []
    labels = []
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            cls_id = int(parts[0])
            cx, cy, bw, bh = map(float, parts[1:])
            x1 = int((cx - bw / 2) * w)
            y1 = int((cy - bh / 2) * h)
            x2 = int((cx + bw / 2) * w)
            y2 = int((cy + bh / 2) * h)
            boxes.append([x1, y1, x2, y2])
            labels.append(class_names[cls_id] if cls_id < len(class_names) else f"class_{cls_id}")
    if boxes:
        ground_truths.append({
            "id": img_id,
            "original_id": image_path,
            "label": labels,
            "boxes": boxes
        })
        img_id += 1

# SHORT Training

Training on 100 randomly selected images, for only 3 epochs

### Create filtered train, val, and yaml file

In [None]:
dataset_root = "/content/fair1m"
train_dir = os.path.join(dataset_root, "images/train")
val_dir = os.path.join(dataset_root, "images/val")

def get_first_n_images(dir_path, n=100):
    files = sorted([os.path.join(dir_path, f) for f in os.listdir(dir_path) if f.lower().endswith((".jpg", ".jpeg", ".png"))])
    return files[:n]

train_images = get_first_n_images(train_dir, 100)
val_images = get_first_n_images(val_dir, 100)

train_txt = os.path.join(dataset_root, "train_subset.txt")
val_txt = os.path.join(dataset_root, "val_subset.txt")

with open(train_txt, "w") as f:
    f.write("\n".join(train_images) + "\n")

with open(val_txt, "w") as f:
    f.write("\n".join(val_images) + "\n")

with open(os.path.join(dataset_root, "data_filtered.yaml"), "w") as f:
    f.write(f"train: {train_txt}\n")
    f.write(f"val: {val_txt}\n")
    f.write(f"nc: {len(class_names)}\n")
    f.write("names:\n")
    for name in class_names:
        f.write(f"  - {name}\n")

print("Created train_subset.txt, val_subset.txt and data_filtered.yaml")


### Train

In [None]:
model = YOLO("yolov8s.pt")
model.train(data="/content/fair1m/data_filtered.yaml", epochs=3, imgsz=640)

### Ground Truths JSON file

In [None]:
image_dir = "/content/fair1m/images/val"
label_dir = "/content/fair1m/labels/val"
classes_path = "/content/fair1m/classes.txt"

with open(classes_path) as f:
    class_names = [line.strip() for line in f]

image_files = sorted([
    f for f in os.listdir(image_dir)
    if f.lower().endswith((".jpg", ".png"))
])[:100]

ground_truths = []

for idx, image_file in enumerate(image_files):
    image_path = os.path.join(image_dir, image_file)
    label_file = os.path.join(label_dir, os.path.splitext(image_file)[0] + ".txt")

    if not os.path.exists(label_file):
        continue

    with Image.open(image_path) as img:
        width, height = img.size

    labels = []
    boxes = []

    with open(label_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue

            class_id, x_center, y_center, w, h = map(float, parts)
            class_id = int(class_id)
            if not (0 <= class_id < len(class_names)):
                continue

            xc, yc, bw, bh = x_center * width, y_center * height, w * width, h * height
            xmin = int(xc - bw / 2)
            ymin = int(yc - bh / 2)
            xmax = int(xc + bw / 2)
            ymax = int(yc + bh / 2)

            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(class_names[class_id])

    if boxes:
        ground_truths.append({
            "id": idx,
            "original_id": image_path,
            "label": labels,
            "boxes": boxes
        })

with open("ground_truths.json", "w") as f:
    json.dump(ground_truths, f, indent=2)


### Predictions JSON file

In [None]:
model = YOLO("runs/detect/train/weights/best.pt")

with open("/content/fair1m/classes.txt") as f:
    class_names = [line.strip() for line in f]

val_dir = "/content/fair1m/images/val"
image_paths = sorted([
    os.path.join(val_dir, f)
    for f in os.listdir(val_dir)
    if f.lower().endswith((".jpg", ".png"))
])[:100]

predictions = []
for idx, img_path in enumerate(image_paths):
    result = model(img_path)[0]
    boxes = []
    labels = []

    for box in result.boxes:
        xyxy = box.xyxy[0].cpu().numpy().astype(int).tolist()
        cls_id = int(box.cls[0])
        if 0 <= cls_id < len(class_names):
            boxes.append(xyxy)
            labels.append(class_names[cls_id])

    predictions.append({
        "id": idx,
        "original_id": img_path,
        "label": labels,
        "boxes": boxes
    })

with open("predictions.json", "w") as f:
    json.dump(predictions, f, indent=2)



image 1/1 /content/fair1m/images/val/v_0.jpg: 640x640 (no detections), 565.3ms
Speed: 6.9ms preprocess, 565.3ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /content/fair1m/images/val/v_1.jpg: 640x640 (no detections), 680.4ms
Speed: 4.6ms preprocess, 680.4ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /content/fair1m/images/val/v_10.jpg: 640x480 (no detections), 687.1ms
Speed: 5.8ms preprocess, 687.1ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /content/fair1m/images/val/v_100.jpg: 480x640 (no detections), 664.3ms
Speed: 5.0ms preprocess, 664.3ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 /content/fair1m/images/val/v_1000.jpg: 640x640 1 Baseball Field, 842.9ms
Speed: 7.1ms preprocess, 842.9ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /content/fair1m/images/val/v_1001.jpg: 480x640 (no detections), 424.7ms
Speed: 3.6ms preproc