In [None]:
# --- Tarek Djaker notebook profile ---
import sys, os
sys.path.append(r'C:\Users\pigio\OneDrive\Documents\OneDrive\Desktop\projets\data_science_practice_2025\Tarek Djaker\lib')
from tarek_profile import nb_init, profile_banner
nb_init()
profile_banner(title=None)
# -------------------------------------

# Boat Object Detection with YOLO

In this exercise, you will fine-tune a YOLO model for boat detection in satellite imagery.

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import requests
import pandas as pd

## Data Collection

Download the training and test datasets.

In [None]:
# URLs for the dataset files
data_train_features_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/X_train.pkl'
data_train_labels_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/y_train.pkl'
data_test_features_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/X_test.pkl'

def download_file(url, file_name):
    response = requests.get(url)
    response.raise_for_status()
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_name}')

# Download the files
download_file(data_train_features_url, 'X_train.pkl')
download_file(data_train_labels_url, 'y_train.pkl')
download_file(data_test_features_url, 'X_test.pkl')

In [None]:
# Load the training data
with open('X_train.pkl', 'rb') as f:
    X = pickle.load(f)

with open('y_train.pkl', 'rb') as f:
    y = pickle.load(f)

print(f"Loaded {len(X)} training images")
print(f"Sample image shape: {X[0].shape}")
print(f"Sample labels shape: {y[0].shape}")
print(f"Sample labels (YOLO format - class_id, x_center, y_center, width, height):\n{y[0]}")

## Data Visualization

Display sample images with bounding boxes.

In [None]:
def plot_image_with_boxes(image, boxes, ax=None):
    """
    Plot an image with bounding boxes
    boxes: numpy array with shape (n, 5) - [class_id, x_center, y_center, width, height]
    All box coordinates are normalized (0-1)
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(8, 8))

    ax.imshow(image)
    height, width = image.shape[:2]

    # Draw each bounding box
    for box in boxes:
        class_id, x_center, y_center, box_width, box_height = box

        # Convert from normalized YOLO format to pixel coordinates
        x_center_px = x_center * width
        y_center_px = y_center * height
        box_width_px = box_width * width
        box_height_px = box_height * height

        # Calculate top-left corner
        x1 = x_center_px - box_width_px / 2
        y1 = y_center_px - box_height_px / 2

        # Draw rectangle
        rect = patches.Rectangle(
            (x1, y1), box_width_px, box_height_px,
            linewidth=2, edgecolor='red', facecolor='none'
        )
        ax.add_patch(rect)

        # Add label
        ax.text(x1, y1-5, 'boat', color='red', fontsize=10,
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))

    ax.axis('off')
    return ax

In [None]:
# Display 5 sample images with bounding boxes
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for i in range(6):
    plot_image_with_boxes(X[i], y[i], ax=axes[i])
    axes[i].set_title(f'Image {i}: {len(y[i])} boat(s)')

plt.tight_layout()
plt.show()

## Model Setup

Load a pre-trained YOLO model and test it on the training images.

In [None]:
pip install ultralytics --quiet

In [None]:
from ultralytics import YOLO

# Load a pre-trained YOLOv8 model (nano version)
model = YOLO('yolov8n.pt')

print("Model loaded successfully!")

In [None]:
# Test the pre-trained model on a sample image
# Save a sample image temporarily
from PIL import Image
sample_img = Image.fromarray(X[0])
sample_img.save('temp_sample.png')

# Run prediction
results = model.predict(source='temp_sample.png', conf=0.25)

print(f"Pre-trained model detected {len(results[0].boxes)} objects")
print("Note: The pre-trained model may not detect boats well - you need to fine-tune it!")

## Fine-tune your YOLO model

Your task is to fine-tune the YOLO model on the boat detection dataset to achieve a mAP50 score of at least 0.73 on the test set.

In [None]:
import os
from PIL import Image
import numpy as np
from ultralytics import YOLO

base_dir = 'yolo_boat_data'
train_images_dir = os.path.join(base_dir, 'images', 'train')
train_labels_dir = os.path.join(base_dir, 'labels', 'train')

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)

def save_data_for_yolo(X_data, y_data, images_dir, labels_dir):
    for i, (image, boxes) in enumerate(zip(X_data, y_data)):
        img = Image.fromarray(image)
        img_path = os.path.join(images_dir, f'img_{i:04d}.png')
        img.save(img_path)

        label_path = os.path.join(labels_dir, f'img_{i:04d}.txt')
        with open(label_path, 'w') as f:
            for box in boxes:
                line = f"{int(box[0])} {box[1]:.6f} {box[2]:.6f} {box[3]:.6f} {box[4]:.6f}\n"
                f.write(line)

save_data_for_yolo(X, y, train_images_dir, train_labels_dir)


data_yaml_content = f"""
path: {os.path.abspath(base_dir)}
train: images/train
val: images/train
nc: 1
names: ['boat']
"""

yaml_path = os.path.join(base_dir, 'data.yaml')
with open(yaml_path, 'w') as f:
    f.write(data_yaml_content)

model = YOLO('yolov8n.pt')

data_yaml_path = 'yolo_boat_data/data.yaml'

model.train(
    data=data_yaml_path,
    epochs=50,
    imgsz=640,
    batch=16,
    lr0=0.001,
    optimizer='AdamW',
    name='yolov8n_100_epochs',
    project='runs/detect',
    pretrained=True
)

## Generate Predictions for Test Set

Generate predictions on the test set and save them in the required CSV format.

In [None]:
# Load test data
with open('X_test.pkl', 'rb') as f:
    X_test = pickle.load(f)

print(f"Loaded {len(X_test)} test images")

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from ultralytics import YOLO

MODEL_PATH = 'runs/detect/yolov8n_100_epochs/weights/best.pt'

try:
    model = YOLO(MODEL_PATH)
except FileNotFoundError:
    print(f"ERREUR: Le modèle finetuné n'est pas trouvé à {MODEL_PATH}.")
    print("Veuillez vérifier le nom du dossier de votre session d'entraînement.")

try:
    X_test
except NameError:
    X_test = [np.random.randint(0, 256, (640, 640, 3), dtype=np.uint8) for _ in range(10)]


predictions = []

for img_idx, img in enumerate(X_test):
    temp_img_path = 'temp_test.png'
    temp_img = Image.fromarray(img)
    temp_img.save(temp_img_path)

    results = model.predict(source=temp_img_path, conf=0.25, verbose=False)

    for box_idx, box in enumerate(results[0].boxes):
        box_data = box.xywhn[0].cpu().numpy()
        x_center, y_center, width, height = box_data

        confidence = box.conf[0].cpu().numpy()
        class_id = int(box.cls[0].cpu().numpy())

        predictions.append({
            'image_id': img_idx,
            'box_idx': box_idx,
            'class_id': 0,
            'confidence': float(confidence),
            'x_center': float(x_center),
            'y_center': float(y_center),
            'width': float(width),
            'height': float(height)
        })

    if (img_idx + 1) % 10 == 0:
        print(f"Processed {img_idx + 1}/{len(X_test)} images")

if os.path.exists(temp_img_path):
    os.remove(temp_img_path)

print(f"\nTotal predictions: {len(predictions)}")

df_predictions = pd.DataFrame(predictions)

In [None]:
# Save predictions to CSV
df_predictions = pd.DataFrame(predictions)
df_predictions.to_csv('predictions.csv', index=False)

print("Predictions saved to predictions.csv")
print("\nFirst few predictions:")
print(df_predictions.head(10))