# Homework 2: Ensemble Model for Mars Terrain Segmentation

## Key Features

- **Architecture**: Ensemble of multiple U-Net models with custom enhancements.
- **Data Preprocessing**: Rescale pixels, add color channel.
- **Ensemble Models**: Combines predictions from multiple pre-trained models.
- **Submission**: Generates a CSV file for Kaggle competition.

## Check production env

In [2]:
import os
# Check if we are in Google Colab

try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

# check if we are in Kaggle

IN_KAGGLE = 'KAGGLE_KERNEL_RUN_TYPE' in os.environ


In [3]:
if IN_COLAB:
    from google.colab import drive

    drive.mount("/gdrive")
    %cd /gdrive/My Drive/[2024-2025] AN2DL/Homework 2

## ⚙️ Import Libraries

In [4]:
# Install albumentations
!pip install -q -U albumentations

In [5]:
SAVE_IMAGES = False

seed = 42

import re, time
from datetime import datetime
import random

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
import albumentations as A
import tqdm as notebook_tqdm

import keras_cv

import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tfk.__version__}")
print(f"GPU devices: {len(tf.config.list_physical_devices('GPU'))}")

2024-12-10 17:05:56.628130: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733846756.679815   55546 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733846756.695684   55546 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-10 17:05:56.821116: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


TensorFlow version: 2.18.0
Keras version: 3.7.0
GPU devices: 0


W0000 00:00:1733846762.055855   55546 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


## ⏳ Load the Data

In [6]:
if IN_KAGGLE:
    data = np.load('/kaggle/input/mars-homework-2/mars_for_students.npz')
else:
    data = np.load("../mars_for_students.npz") #remember to change the path accordingly to your folder

training_set = data["training_set"]
X_train = training_set[:, 0]
y_train = training_set[:, 1]

X_test = data["test_set"]

print(f"Training X shape: {X_train.shape}")
print(f"Training y shape: {y_train.shape}")
print(f"Test X shape: {X_test.shape}")

Training X shape: (2615, 64, 128)
Training y shape: (2615, 64, 128)
Test X shape: (10022, 64, 128)


In [None]:
# Set the number of classes
NUM_CLASSES = 5

# 🏋🏻‍♂️ Data preprocessing

In [8]:
# Add color channel and rescale pixels between 0 and 1
X_train = X_train[..., np.newaxis] / 255.0
X_test = X_test[..., np.newaxis] / 255.0

input_shape = X_train.shape[1:]
num_classes = len(np.unique(y_train))

print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")

Input shape: (64, 128, 1)
Number of classes: 5


In [14]:
# Define custom Mean Intersection Over Union metric
class MeanIntersectionOverUnion(tf.keras.metrics.MeanIoU):
    def __init__(self, num_classes, labels_to_exclude=None, name="mean_iou", dtype=None):
        super(MeanIntersectionOverUnion, self).__init__(num_classes=num_classes, name=name, dtype=dtype)
        if labels_to_exclude is None:
            labels_to_exclude = [0]  # Default to excluding label 0
        self.labels_to_exclude = labels_to_exclude

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Convert predictions to class labels
        y_pred = tf.math.argmax(y_pred, axis=-1)

        # Flatten the tensors
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])

        # Apply mask to exclude specified labels
        for label in self.labels_to_exclude:
            mask = tf.not_equal(y_true, label)
            y_true = tf.boolean_mask(y_true, mask)
            y_pred = tf.boolean_mask(y_pred, mask)

        # Update the state
        return super().update_state(y_true, y_pred, sample_weight)

## 📊 Prepare Your Submission

In our Kaggle competition, submissions are made as `csv` files. To create a proper `csv` file, you need to flatten your predictions and include an `id` column as the first column of your dataframe. To maintain consistency between your results and our solution, please avoid shuffling the test set. The code below demonstrates how to prepare the `csv` file from your model predictions.




In [None]:
model_filenames = [
    "/kaggle/input/ensemble-model/model_241203_151340.keras",   # 0.666 con augmix
                                                                # https://www.kaggle.com/code/samuelepozzani/homework2-classweights-augs
    "/kaggle/input/ensemble-model/model_241208_124742.keras",   # dice + boundary + crossentropy
                                                                # https://www.kaggle.com/code/samuelepozzani/homework2-classweights-augs-4
    "/kaggle/input/ensemble-model/model_241211_092055.keras",   # residual fusion 
                                                                # https://www.kaggle.com/code/mmartini00/homework-2-aug-classw-residual-fusion-redlr
]

models = []

for name in model_filenames:
    print(f"Loading model from {name}")
    model = tfk.models.load_model(name, compile=False)
    model.compile(metrics=["accuracy", MeanIntersectionOverUnion(num_classes=NUM_CLASSES, labels_to_exclude=[0])])
    models.append(model)
    print(f"Model loaded from {name}")


Loading model from model_241203_151340.keras
Model loaded from model_241203_151340.keras
Loading model from model_241208_124742.keras
Model loaded from model_241208_124742.keras


In [None]:
# Ensemble model predictions
preds = np.mean([model.predict(X_test) for model in models], axis=0)
preds = np.argmax(preds, axis=-1)
print(f"Predictions shape: {preds.shape}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 401ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 470ms/step
Predictions shape: (10, 64, 128)


In [26]:
def y_to_df(y) -> pd.DataFrame:
    """Converts segmentation predictions into a DataFrame format for Kaggle."""
    n_samples = len(y)
    y_flat = y.reshape(n_samples, -1)
    df = pd.DataFrame(y_flat)
    df["id"] = np.arange(n_samples)
    cols = ["id"] + [col for col in df.columns if col != "id"]
    return df[cols]

In [27]:
# Create and download the csv submission file
timestep_str = datetime.now().strftime("%y%m%d_%H%M%S")
submission_filename = f"submission_{timestep_str}.csv"
submission_df = y_to_df(preds)
submission_df.to_csv(submission_filename, index=False)

if IN_COLAB:
    from google.colab import files
    files.download(submission_filename)