# Boat Object Detection with YOLO

In this exercise, you will fine-tune a YOLO model for boat detection in satellite imagery. The goal is to achieve a mAP50 score of at least 0.73 on the test set.

In [None]:
!pip install ultralytics

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import requests
import pandas as pd
import os
import shutil
from PIL import Image
from ultralytics import YOLO

print("Libraries imported successfully.")

## Data Collection

Download the training and test datasets.

In [None]:
# URLs for the dataset files
data_train_features_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/X_train.pkl'
data_train_labels_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/y_train.pkl'
data_test_features_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/X_test.pkl'

def download_file(url, file_name):
    response = requests.get(url)
    response.raise_for_status()
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_name}')

# Download the files
download_file(data_train_features_url, 'X_train.pkl')
download_file(data_train_labels_url, 'y_train.pkl')
download_file(data_test_features_url, 'X_test.pkl')

In [None]:
# Load the training data
with open('X_train.pkl', 'rb') as f:
    X = pickle.load(f)

with open('y_train.pkl', 'rb') as f:
    y = pickle.load(f)

print(f"Loaded {len(X)} training images")
print(f"Sample image shape: {X[0].shape}")
print(f"Sample labels shape: {y[0].shape}")
print(f"Sample labels (YOLO format - class_id, x_center, y_center, width, height):\n{y[0]}")

## Data Visualization

Display sample images with bounding boxes.

In [None]:
def plot_image_with_boxes(image, boxes, ax=None):
    """
    Plot an image with bounding boxes
    boxes: numpy array with shape (n, 5) - [class_id, x_center, y_center, width, height]
    All box coordinates are normalized (0-1)
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(8, 8))

    ax.imshow(image)
    height, width = image.shape[:2]

    # Draw each bounding box
    for box in boxes:
        class_id, x_center, y_center, box_width, box_height = box

        # Convert from normalized YOLO format to pixel coordinates
        x_center_px = x_center * width
        y_center_px = y_center * height
        box_width_px = box_width * width
        box_height_px = box_height * height

        # Calculate top-left corner
        x1 = x_center_px - box_width_px / 2
        y1 = y_center_px - box_height_px / 2

        # Draw rectangle
        rect = patches.Rectangle(
            (x1, y1), box_width_px, box_height_px,
            linewidth=2, edgecolor='red', facecolor='none'
        )
        ax.add_patch(rect)

        # Add label
        ax.text(x1, y1-5, 'boat', color='red', fontsize=10,
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))

    ax.axis('off')
    return ax

In [None]:
# Display 6 sample images with bounding boxes
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for i in range(6):
    plot_image_with_boxes(X[i], y[i], ax=axes[i])
    axes[i].set_title(f'Image {i}: {len(y[i])} boat(s)')

plt.tight_layout()
plt.show()

## Data Preparation for YOLO Training

Convert the loaded data (images and YOLO-format labels) into a file structure required by the YOLO framework.

In [None]:
# 創建必要的目錄結構
base_dir = 'boat_dataset'
train_images_dir = os.path.join(base_dir, 'images', 'train')
train_labels_dir = os.path.join(base_dir, 'labels', 'train')

# 清理舊的目錄（如果存在）
if os.path.exists(base_dir):
    shutil.rmtree(base_dir)

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)

# 1. 保存圖像檔案 (.png)
for i, img_array in enumerate(X):
    # 確保圖像格式正確 (Uint8)
    img = Image.fromarray(img_array.astype(np.uint8))
    img_path = os.path.join(train_images_dir, f'{i:04d}.png')
    img.save(img_path)

# 2. 保存 YOLO 格式的標籤檔案 (.txt)
# 標籤格式: class_id x_center y_center width height (所有值都已正規化 0-1)
for i, boxes in enumerate(y):
    label_path = os.path.join(train_labels_dir, f'{i:04d}.txt')

    # 創建標籤內容 (每行一個物件)
    label_content = []
    # boxes 已經是 (class_id, x_center, y_center, width, height) 格式
    for box in boxes:
        # class_id, x_center, y_center, width, height
        line = f"{int(box[0])} {box[1]:.6f} {box[2]:.6f} {box[3]:.6f} {box[4]:.6f}"
        label_content.append(line)

    with open(label_path, 'w') as f:
        f.write('\n'.join(label_content))

print(f"Successfully prepared {len(X)} images and labels for training in '{base_dir}' directory.")

In [None]:
# 創建 YOLO YAML 配置文件
# For this exercise, we use the training set as the validation set.
yaml_content = f"""
# Dataset configuration for boat detection
path: {os.path.abspath(base_dir)}  # dataset root dir
train: images/train  # train images path
val: images/train  # validation images path

# Number of classes
nc: 1

# Class names
names: ['boat']
"""

# 保存 YAML 文件
yaml_path = os.path.join(base_dir, 'data.yaml')
with open(yaml_path, 'w') as f:
    f.write(yaml_content)

print(f"YOLO configuration saved to {yaml_path}")

## Model Setup

Load a pre-trained YOLO model and test it on the training images.

In [None]:
# Load a pre-trained YOLOv8 model (nano version)
initial_model = YOLO('yolov8n.pt')

print("Initial Model loaded successfully!")

In [None]:
# Test the pre-trained model on a sample image
# Save a sample image temporarily
sample_img = Image.fromarray(X[0])
sample_img.save('temp_sample.png')

# Run prediction
# Use initial_model for this test
results = initial_model.predict(source='temp_sample.png', conf=0.25, verbose=False)

print(f"Pre-trained model detected {len(results[0].boxes)} objects")
print("Note: The pre-trained model may not detect boats well - you need to fine-tune it!")

# Clean up temporary file
os.remove('temp_sample.png')

## Fine-tune your YOLO model

Your task is to fine-tune the YOLO model on the boat detection dataset to achieve a mAP50 score of at least 0.73 on the test set.

In [None]:
# Fine-tune the YOLO model
# Adjust epochs and batch size as needed to reach the target mAP50 > 0.73

print("Starting model training...")

fine_tuned_results = initial_model.train(
    data=yaml_path,       # Dataset configuration file path
    epochs=100,           # Recommended high epochs for better convergence
    imgsz=640,            # Input image size
    batch=16,             # Batch size (adjust based on GPU memory)
    name='yolov8n_boat_finetuned', # Run name
    project='runs/train',  # Project directory
    patience=50,          # Stop training if mAP50 hasn't improved in 50 epochs
    # Additional settings for better performance (optional)
    lr0=0.01,             # Initial learning rate
    lrf=0.001,            # Final learning rate
    hsv_h=0.015,          # Data augmentation: Hue
    hsv_s=0.7,            # Data augmentation: Saturation
    hsv_v=0.4,            # Data augmentation: Value
    degrees=0.0,          # No rotation augmentation
    translate=0.1,        # Translation augmentation
    scale=0.5,            # Scaling augmentation
    flipud=0.5,           # Up-down flip augmentation
    fliplr=0.5,           # Left-right flip augmentation
    mosaic=1.0,           # Mosaic augmentation
)

# Load the best trained model for prediction
best_model_path = fine_tuned_results.save_dir / 'weights/best.pt'
final_model = YOLO(best_model_path)

print(f"Training finished. Best model loaded from: {best_model_path}")

## Generate Predictions for Test Set

Generate predictions on the test set and save them in the required CSV format.

In [None]:
# Load test data
with open('X_test.pkl', 'rb') as f:
    X_test = pickle.load(f)

print(f"Loaded {len(X_test)} test images")

In [None]:
# Generate predictions on test set
# Format: CSV with columns: image_id, box_idx, class_id, confidence, x_center, y_center, width, height

predictions = []

for img_idx, img in enumerate(X_test):
    # Save image temporarily
    # Ensure img data type is correct for PIL
    temp_img = Image.fromarray(img.astype(np.uint8))
    temp_img_name = 'temp_test.png'
    temp_img.save(temp_img_name)

    # Run prediction with the fine-tuned model
    # Use a confidence threshold (e.g., 0.25) to filter weak detections
    results = final_model.predict(source=temp_img_name, conf=0.25, verbose=False)

    # Extract predictions
    if results and len(results[0].boxes) > 0:
        for box_idx, box in enumerate(results[0].boxes):
            # Get box data in YOLO format (normalized coordinates)
            # .xywhn is normalized: x_center, y_center, width, height
            x_center, y_center, width, height = box.xywhn[0].cpu().numpy()
            confidence = box.conf[0].cpu().numpy()
            class_id = int(box.cls[0].cpu().numpy())

            predictions.append({
                'image_id': img_idx,
                'box_idx': box_idx,
                'class_id': 0,  # Always 0 for boat
                'confidence': float(confidence),
                'x_center': float(x_center),
                'y_center': float(y_center),
                'width': float(width),
                'height': float(height)
            })

    # Clean up temporary file
    os.remove(temp_img_name)

    if (img_idx + 1) % 50 == 0:
        print(f"Processed {img_idx + 1}/{len(X_test)} images")

print(f"\nTotal predictions: {len(predictions)}")

In [None]:
# Save predictions to CSV
df_predictions = pd.DataFrame(predictions)
df_predictions.to_csv('predictions.csv', index=False)

print("Predictions saved to predictions.csv")
print("\nFirst few predictions:")
print(df_predictions.head(10))

## Clean Up (Optional)

Remove temporary files and directories.

In [None]:
# Remove downloaded pickle files
os.remove('X_train.pkl')
os.remove('y_train.pkl')
os.remove('X_test.pkl')

# Remove created dataset directory
shutil.rmtree('boat_dataset')

print("Clean up complete.")