# Boat Object Detection with YOLO

In this exercise, you will fine-tune a YOLO model for boat detection in satellite imagery.

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import requests
import pandas as pd

## Data Collection

Download the training and test datasets.

In [None]:
# URLs for the dataset files
data_train_features_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/X_train.pkl'
data_train_labels_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/y_train.pkl'
data_test_features_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/X_test.pkl'

def download_file(url, file_name):
    response = requests.get(url)
    response.raise_for_status()
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_name}')

# Download the files
download_file(data_train_features_url, 'X_train.pkl')
download_file(data_train_labels_url, 'y_train.pkl')
download_file(data_test_features_url, 'X_test.pkl')

In [None]:
# Load the training data
with open('X_train.pkl', 'rb') as f:
    X = pickle.load(f)

with open('y_train.pkl', 'rb') as f:
    y = pickle.load(f)

print(f"Loaded {len(X)} training images")
print(f"Sample image shape: {X[0].shape}")
print(f"Sample labels shape: {y[0].shape}")
print(f"Sample labels (YOLO format - class_id, x_center, y_center, width, height):\n{y[0]}")

## Data Visualization

Display sample images with bounding boxes.

In [None]:
def plot_image_with_boxes(image, boxes, ax=None):
    """
    Plot an image with bounding boxes
    boxes: numpy array with shape (n, 5) - [class_id, x_center, y_center, width, height]
    All box coordinates are normalized (0-1)
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(8, 8))

    ax.imshow(image)
    height, width = image.shape[:2]

    # Draw each bounding box
    for box in boxes:
        class_id, x_center, y_center, box_width, box_height = box

        # Convert from normalized YOLO format to pixel coordinates
        x_center_px = x_center * width
        y_center_px = y_center * height
        box_width_px = box_width * width
        box_height_px = box_height * height

        # Calculate top-left corner
        x1 = x_center_px - box_width_px / 2
        y1 = y_center_px - box_height_px / 2

        # Draw rectangle
        rect = patches.Rectangle(
            (x1, y1), box_width_px, box_height_px,
            linewidth=2, edgecolor='red', facecolor='none'
        )
        ax.add_patch(rect)

        # Add label
        ax.text(x1, y1-5, 'boat', color='red', fontsize=10,
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))

    ax.axis('off')
    return ax

In [None]:
# Display 5 sample images with bounding boxes
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for i in range(6):
    plot_image_with_boxes(X[i], y[i], ax=axes[i])
    axes[i].set_title(f'Image {i}: {len(y[i])} boat(s)')

plt.tight_layout()
plt.show()

## Model Setup

Load a pre-trained YOLO model and test it on the training images.

In [None]:
import sys, subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "ultralytics", "opencv-python", "matplotlib"])


In [None]:
from ultralytics import YOLO

# Load a pre-trained YOLOv8 model (nano version)
model = YOLO('yolov8n.pt')

print("Model loaded successfully!")

In [None]:
# Test the pre-trained model on a sample image
# Save a sample image temporarily
from PIL import Image
sample_img = Image.fromarray(X[0])
sample_img.save('temp_sample.png')

# Run prediction
results = model.predict(source='temp_sample.png', conf=0.25)

print(f"Pre-trained model detected {len(results[0].boxes)} objects")
print("Note: The pre-trained model may not detect boats well - you need to fine-tune it!")

## Fine-tune your YOLO model

Your task is to fine-tune the YOLO model on the boat detection dataset to achieve a mAP50 score of at least 0.73 on the test set.

In [None]:
# Your code here
# Fine-tune the YOLO model
# =========================
# 1) 将 pkl 数据写成 YOLO 期望的目录结构
# =========================
import os, pickle, math, random, shutil
from pathlib import Path
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

random.seed(42)
np.random.seed(42)

root = Path("boat_yolo")
img_train_dir = root/"data/images/train"
img_val_dir   = root/"data/images/val"
lbl_train_dir = root/"data/labels/train"
lbl_val_dir   = root/"data/labels/val"
for d in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir]:
    d.mkdir(parents=True, exist_ok=True)

# —— 分层划分：按“每图目标数”做分层，避免验证集分布失真
counts = np.array([len(b) if isinstance(b, (list, np.ndarray)) else 0 for b in y])
# 把计数离散到 0/1/2/3+ 四档，便于 stratify
strata = np.clip(counts, 0, 3)
idx = np.arange(len(X))
tr_idx, va_idx = train_test_split(idx, test_size=0.12, random_state=42, stratify=strata)

def to_uint8_rgb(arr):
    """把任意 numpy 图像转成 uint8 的 3 通道 RGB。"""
    import numpy as np
    arr = np.asarray(arr)

    # ---- 统一 dtype 到 uint8 ----
    if arr.dtype != np.uint8:
        if arr.dtype.kind in "fc":  # float
            if arr.max() <= 1.0:  # 0~1 归一化
                arr = (arr * 255.0).round().clip(0, 255).astype(np.uint8)
            else:
                arr = arr.round().clip(0, 255).astype(np.uint8)
        else:  # int 等
            arr = arr.clip(0, 255).astype(np.uint8)

    # ---- 统一通道到 3 通道 ----
    if arr.ndim == 2:                  # 灰度 -> 3 通道
        arr = np.stack([arr, arr, arr], axis=-1)
    elif arr.ndim == 3 and arr.shape[2] == 4:  # RGBA -> RGB（丢 alpha）
        arr = arr[:, :, :3]
    elif arr.ndim == 3 and arr.shape[2] == 3:
        pass
    else:
        raise ValueError(f"Unexpected image shape {arr.shape}")
    return arr

def _save_img_and_label(i, split):
    from PIL import Image
    if split == "train":
        ipath = img_train_dir / f"{i:06d}.jpg"   # 统一存 JPG
        lpath = lbl_train_dir / f"{i:06d}.txt"
    else:
        ipath = img_val_dir / f"{i:06d}.jpg"
        lpath = lbl_val_dir / f"{i:06d}.txt"

    # ---- 关键：强制转为 RGB，再保存为 JPEG ----
    img_rgb = to_uint8_rgb(X[i])
    Image.fromarray(img_rgb).convert("RGB").save(ipath, quality=95)

    # 标签写盘（与你原来一致）
    lines = []
    if y[i] is not None and len(y[i]) > 0:
        for row in np.asarray(y[i]).reshape(-1, 5):
            cls, xc, yc, w, h = row.astype(float)
            xc = float(np.clip(xc, 0, 1)); yc = float(np.clip(yc, 0, 1))
            w  = float(np.clip(w , 0, 1)); h  = float(np.clip(h , 0, 1))
            lines.append(f"{int(cls)} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}")
    with open(lpath, "w") as f:
        if lines:
            f.write("\n".join(lines))

for i in tr_idx: _save_img_and_label(i, "train")
for i in va_idx: _save_img_and_label(i, "val")

# 写 data.yaml
data_yaml = root/"data/data.yaml"
data_yaml.write_text(
f"""# generated
path: {str((root/'data').resolve()).replace('\\','/')}
train: images/train
val: images/val
names: {{0: boat}}
"""
)
print(f"Train images: {len(tr_idx)} | Val images: {len(va_idx)}")
print("Data yaml ->", data_yaml)

# =========================
# 2) 训练（微调）YOLOv8
#    以小目标友好的配置起步：v8s + imgsz=896
# =========================
from ultralytics import YOLO
import torch

device = 0 if torch.cuda.is_available() else 'cpu'
model_name = "yolov8s.pt"   # 显存吃紧可改为 'yolov8n.pt'
model = YOLO(model_name)

results = model.train(
    data=str(data_yaml),
    epochs=100,                # 小数据集 80~120 都可
    imgsz=896,                 # 对小目标友好；显存不足可降到 768/640
    batch=16,                  # 视显存而定
    optimizer="AdamW",
    lr0=2e-3,
    weight_decay=5e-4,
    warmup_epochs=3,
    degrees=5.0,               # 轻旋转
    scale=0.5,                 # 随机缩放
    shear=0.0,
    fliplr=0.5, flipud=0.0,
    mosaic=0.5, mixup=0.0,     # 保守的 mosaic
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    patience=30,               # 早停耐心值（不强制，但可防止过拟合）
    workers=8,
    device=device,
    project=str(root/"runs"),
    name="boats_v8s_896",
    seed=42,
    verbose=True
)

# =========================
# 3) 在验证集评估，打印 mAP50
# =========================
best_pt = root/"runs/boats_v8s_896/weights/best.pt"
best_model = YOLO(str(best_pt))
metrics = best_model.val(
    data=str(data_yaml),
    split="val",
    imgsz=896,
    conf=0.001,
    iou=0.7,
    device=device
)
try:
    print(f"mAP50: {metrics.box.map50:.4f}, mAP50-95: {metrics.box.map:.4f}")
except:
    # 旧版本字段名可能不同
    print(metrics)

if hasattr(metrics, "box") and getattr(metrics.box, "map50", 0) < 0.70:
    print("mAP50 < 0.70 → 再进行一次冲刺训练（imgsz=1024, 额外 30 epoch）")
    results2 = best_model.train(
        data=str(data_yaml),
        epochs=30,
        imgsz=1024,
        batch=12,
        optimizer="AdamW",
        lr0=1.5e-3,
        weight_decay=4e-4,
        warmup_epochs=2,
        mosaic=0.4,
        fliplr=0.5,
        device=device,
        project=str(root/"runs"),
        name="boats_v8s_1024_ft",
        seed=42
    )
    best_pt = root/"runs/boats_v8s_1024_ft/weights/best.pt"
    best_model = YOLO(str(best_pt))
    metrics = best_model.val(data=str(data_yaml), split="val", imgsz=1024, conf=0.001, iou=0.7, device=device)
    try:
        print(f"[After FT] mAP50: {metrics.box.map50:.4f}, mAP50-95: {metrics.box.map:.4f}")
    except:
        print(metrics)


model = best_model
print("Best weights ->", best_pt)


## Generate Predictions for Test Set

Generate predictions on the test set and save them in the required CSV format.

In [None]:
# Load test data
with open('X_test.pkl', 'rb') as f:
    X_test = pickle.load(f)

print(f"Loaded {len(X_test)} test images")

In [None]:
# Generate predictions on test set
# Your trained model should be used here
# Format: CSV with columns: image_id, box_idx, class_id, confidence, x_center, y_center, width, height

predictions = []

for img_idx, img in enumerate(X_test):
    # Save image temporarily
    temp_img = Image.fromarray(img)
    temp_img.save('temp_test.png')

    # Run prediction with your fine-tuned model
    # Replace 'model' with your fine-tuned model
    results = model.predict(source='temp_test.png', conf=0.25, verbose=False)

    # Extract predictions
    for box_idx, box in enumerate(results[0].boxes):
        # Get box data in YOLO format (normalized coordinates)
        x_center, y_center, width, height = box.xywhn[0].cpu().numpy()
        confidence = box.conf[0].cpu().numpy()
        class_id = int(box.cls[0].cpu().numpy())

        predictions.append({
            'image_id': img_idx,
            'box_idx': box_idx,
            'class_id': 0,  # Always 0 for boat
            'confidence': float(confidence),
            'x_center': float(x_center),
            'y_center': float(y_center),
            'width': float(width),
            'height': float(height)
        })

    if (img_idx + 1) % 10 == 0:
        print(f"Processed {img_idx + 1}/{len(X_test)} images")

print(f"\nTotal predictions: {len(predictions)}")

In [None]:
# Save predictions to CSV
df_predictions = pd.DataFrame(predictions)
df_predictions.to_csv('predictions.csv', index=False)

print("Predictions saved to predictions.csv")
print("\nFirst few predictions:")
print(df_predictions.head(10))

In [None]:
from google.colab import files
files.download('predictions.csv')