# DeepLearning homework
## **Team Deepsea**
## Members:
  - Márton Csáki (Neptun: R0OQD4)
  - Ogleznyev Pável (Neptun: GRKO04)
  - Marák Levente (Neptun: K2DE0K)

# Project
  The Airbus Ship Detection Challenge on Kaggle is a computer vision competition that tasks participants with developing models to automatically identify and localize ships in satellite imagery. The goal is to create an algorithm that can accurately draw bounding boxes or pixel-level masks around ships, thereby aiding maritime surveillance and efficiency. This project involves significant work in image segmentation and object detection using machine learning techniques.
  https://www.kaggle.com/competitions/airbus-ship-detection

## Key Aspects of the Project
  **Goal:** To automatically detect and localize ships in satellite images.

  **Data:** A large dataset of high-resolution satellite images provided by Airbus Defence and Space.

  **Task:** This is primarily an image segmentation challenge, where models must output masks that delineate the exact shape and location of each ship.

  **Application:** The resulting models have practical applications in maritime security, tracking, and logistics.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, backend as K
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import cv2
import os
import matplotlib.pyplot as plt

# Mount Google Drive and set the data directory
Our data is too large to simply download from the copetition's site, so we uploaded it using Google Drive.

## 1. Step: Paths and Settings

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
DRIVE_BASE_DIR = "/content/drive/MyDrive/DeepLearn/airbus-ship-detection"
TRAIN_IMG_DIR = os.path.join(DRIVE_BASE_DIR, "train_v2")
CSV_PATH = os.path.join(DRIVE_BASE_DIR, "train_ship_segmentations_v2.csv")

In [None]:
# 768x768 is too large for initial training.
# Start with a smaller but still meaningful size. 224x224 is too small.
IMG_SIZE = (256, 256)
BATCH_SIZE = 16

## 2. Step: Load and Prepare Metadata (from CSV)

In [None]:
print("Loading metadata...")
df = pd.read_csv(CSV_PATH)

# Handle missing values (NaN -> NO_SHIP)
df['EncodedPixels'] = df['EncodedPixels'].fillna('NO_SHIP')

# Group by ImageId (one image can have multiple ships/rows)
metadata = df.groupby('ImageId')['EncodedPixels'].apply(list).reset_index()

# Create 'has_ship' column for filtering
def check_has_ship(rle_list):
    if len(rle_list) == 1 and rle_list[0] == 'NO_SHIP':
        return 0
    return 1

metadata['has_ship'] = metadata['EncodedPixels'].apply(check_has_ship)

# If no ship, replace the RLE list with an empty list
metadata.loc[metadata['has_ship'] == 0, 'EncodedPixels'] = metadata.loc[metadata['has_ship'] == 0, 'ImageId'].apply(lambda x: [])

print(f"Total number of images: {len(metadata)}")

Loading metadata...
Total number of images: 192556


#Split


## 3. Step: Split Data (Train/Validation IDs)
# Split the metadata DataFrame, not the actual files

In [None]:
print("Splitting and balancing data...")

# Initial split (80% Train, 20% Validation)
train_df_raw, val_df = train_test_split(
    metadata,
    test_size=0.2,
    random_state=42,
    stratify=metadata['has_ship']
)

# --- BALANCING THE DATA ---
# Keep all images with ships
train_ships = train_df_raw[train_df_raw['has_ship'] == 1]

# Sample empty images to match the number of ship images (1:1 ratio)
# This forces the model to learn features, not just predict "water" everywhere.
train_empty = train_df_raw[train_df_raw['has_ship'] == 0].sample(len(train_ships), random_state=42)

# Concatenate and shuffle
train_df_balanced = pd.concat([train_ships, train_empty]).sample(frac=1, random_state=42).reset_index(drop=True)

print(f"Original training size: {len(train_df_raw)}")
print(f"Balanced training size (used for training): {len(train_df_balanced)}")
print(f"Validation size (remains original): {len(val_df)}")

Splitting and balancing data...
Original training size: 154044
Balanced training size (used for training): 68090
Validation size (remains original): 38512


## 4. Step: Helper Functions (Mask Handling)
# Converts RLE-encoded strings into binary masks

In [None]:
def rle_to_mask(rle_list, shape=(768, 768)):
    """ Converts RLE string list to a binary mask. """
    master_mask = np.zeros(shape[0] * shape[1], dtype=np.uint8)

    for rle_string in rle_list:
        if rle_string == 'NO_SHIP' or rle_string == '':
            continue

        s = rle_string.split()
        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1

        for lo, hi in zip(starts, starts + lengths):
            master_mask[lo:hi] = 1

    master_mask = master_mask.reshape(shape).T
    master_mask = np.expand_dims(master_mask, axis=-1)
    return master_mask

## 5. Step: Main Data Pipeline (Generator)

In [None]:
def data_generator(df):
    """ Yields (image, mask) pairs for the model. """
    for _, row in df.iterrows():
        image_id = row['ImageId']
        rle_list = row['EncodedPixels']
        img_path = os.path.join(TRAIN_IMG_DIR, image_id)

        # Load Image
        img = cv2.imread(img_path)
        if img is None: continue

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, IMG_SIZE)
        img = img / 255.0 # Normalize to [0, 1]

        # Load Mask
        mask = rle_to_mask(rle_list)
        mask = cv2.resize(mask, IMG_SIZE, interpolation=cv2.INTER_NEAREST)
        mask = np.expand_dims(mask, axis=-1) if mask.ndim == 2 else mask

        yield img.astype(np.float32), mask.astype(np.float32)

## 6. Step: Create tf.data.Dataset

In [None]:
print("Building TensorFlow pipelines...")

# Training Pipeline (Uses the BALANCED dataset)
train_ds = tf.data.Dataset.from_generator(
    lambda: data_generator(train_df_balanced),
    output_signature=(
        tf.TensorSpec(shape=(*IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(*IMG_SIZE, 1), dtype=tf.float32)
    )
).shuffle(500).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Validation Pipeline (Uses the ORIGINAL distribution)
val_ds = tf.data.Dataset.from_generator(
    lambda: data_generator(val_df),
    output_signature=(
        tf.TensorSpec(shape=(*IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(*IMG_SIZE, 1), dtype=tf.float32)
    )
).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

print("Datasets ready.")

Building TensorFlow pipelines...
Datasets ready.


## 7. Step: Visualization (Verification)

In [None]:
print("Visualizing a batch...")
images, masks = next(iter(train_ds))

plt.figure(figsize=(15, 6))
for i in range(5):
    plt.subplot(2, 5, i + 1)
    plt.imshow(images[i])
    plt.title("Input Image")
    plt.axis('off')

    plt.subplot(2, 5, i + 6)
    plt.imshow(masks[i], cmap='gray')
    plt.title("Ground Truth Mask")
    plt.axis('off')

plt.tight_layout()
plt.show()

8. Step: Define U-Net Architecture
Itt építjük fel a hálót.

In [None]:
def build_unet(input_shape):
    inputs = layers.Input(shape=input_shape)

    # --- Encoder (Downsampling) ---
    c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    # --- Bottleneck ---
    c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c4)

    # --- Decoder (Upsampling) ---
    u5 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c4)
    u5 = layers.concatenate([u5, c3]) # Skip Connection
    c5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u5)
    c5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c5)

    u6 = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = layers.concatenate([u6, c2])
    c6 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u6)
    c6 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c6)

    u7 = layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = layers.concatenate([u7, c1])
    c7 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(u7)
    c7 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c7)

    # Output Layer: Sigmoid for binary classification (0 or 1)
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c7)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    return model

model = build_unet((256, 256, 3))
model.summary()

9. Step: Compile and Train

In [None]:
# --- Dice Loss and IoU Metric ---
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_loss(y_true, y_pred):
    return 1 - dice_coef(y_true, y_pred)

def iou_metric(y_true, y_pred):
    y_pred = tf.cast(y_pred > 0.5, tf.float32)
    intersection = tf.reduce_sum(y_true * y_pred)
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection
    return intersection / (union + 1e-7)

# --- Compile ---
print("Compiling model...")
model.compile(optimizer='adam', loss=dice_loss, metrics=['accuracy', iou_metric])

# --- Callbacks ---
callbacks_list = [
    # Save best model
    ModelCheckpoint("best_ship_model.keras", monitor='val_loss', save_best_only=True, verbose=1),
    # Stop if no improvement
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1),
    # Reduce learning rate if stuck
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
]

# --- Training ---
# Calculate steps based on the balanced dataset
#TRAIN_STEPS = len(train_df_balanced) // BATCH_SIZE
VAL_STEPS = 50 # Validate on a subset to save time
TRAIN_STEPS = 500
print(f"Starting training with {TRAIN_STEPS} steps per epoch...")

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,
    steps_per_epoch=TRAIN_STEPS,
    validation_steps=VAL_STEPS,
    callbacks=callbacks_list
)

Compiling model...
Starting training with 500 steps per epoch...
Epoch 1/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18s/step - accuracy: 0.7696 - iou_metric: 0.0061 - loss: 0.9948 
Epoch 1: val_loss improved from inf to 0.97468, saving model to best_ship_model.keras
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10167s[0m 20s/step - accuracy: 0.7699 - iou_metric: 0.0061 - loss: 0.9948 - val_accuracy: 0.9988 - val_iou_metric: 0.0000e+00 - val_loss: 0.9747 - learning_rate: 0.0010
Epoch 2/50
[1m 52/500[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m2:16:21[0m 18s/step - accuracy: 0.9972 - iou_metric: 0.0000e+00 - loss: 0.9993

10. Step: Evaluation & Visualization

In [None]:
# 1. Plot Training History
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Dice Loss (Lower is better)')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['iou_metric'], label='Train IoU')
plt.plot(history.history['val_iou_metric'], label='Val IoU')
plt.title('IoU Metric (Higher is better)')
plt.legend()
plt.show()

# 2. Visualize Predictions
print("Generating predictions on validation set...")
val_images, val_masks = next(iter(val_ds))
predictions = model.predict(val_images)

imgnum = 5
plt.figure(figsize=(15, imgnum * 4))

for i in range(imgnum):
    # Original Image
    plt.subplot(imgnum, 3, i*3 + 1)
    plt.imshow(val_images[i])
    plt.title(f"Original Image #{i+1}")
    plt.axis('off')

    # Ground Truth Mask
    plt.subplot(imgnum, 3, i*3 + 2)
    plt.imshow(val_masks[i], cmap='gray')
    plt.title("Ground Truth Mask")
    plt.axis('off')

    # Model Prediction (Thresholded)
    pred_mask = predictions[i] > 0.5
    plt.subplot(imgnum, 3, i*3 + 3)
    plt.imshow(pred_mask, cmap='gray')
    plt.title("Model Prediction (>0.5)")
    plt.axis('off')

plt.tight_layout()
plt.show()