Here we want to sort the data in order to be able to use it more easily

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn.model_selection import train_test_split


In [2]:
import os
import cv2
import numpy as np

# Define directories
vv_dir = '../Final_database/vv/'
vh_dir = '../Final_database/vh/'
water_body_dir = '../Final_database/water_body_label/'
flood_label_dir = '../Final_database/flood_label/'
preprocessed_dir = '../preprocessed_data/'

# Create the preprocessed_data folder if it doesn't exist
os.makedirs(preprocessed_dir, exist_ok=True)

def preprocess_and_save():
    """
    Preprocess the images from vv, vh, water body, and flood label directories.
    Save the preprocessed images as numpy arrays in the preprocessed_data folder.
    """
    # Get the list of files (assuming the same file names in all directories)
    vv_files = os.listdir(vv_dir)
    vh_files = os.listdir(vh_dir)
    water_body_files = os.listdir(water_body_dir)
    flood_label_files = os.listdir(flood_label_dir)
    n = len(vv_files)

    for k in range(n):
        # Load the corresponding images from each folder
        vv_image = cv2.imread(os.path.join(vv_dir, vv_files[k]), cv2.IMREAD_GRAYSCALE)
        #vh_image = cv2.imread(os.path.join(vh_dir, vh_files[k]), cv2.IMREAD_GRAYSCALE)
        water_body_image = cv2.imread(os.path.join(water_body_dir, water_body_files[k]), cv2.IMREAD_GRAYSCALE)
        flood_label_image = cv2.imread(os.path.join(flood_label_dir, flood_label_files[k]), cv2.IMREAD_GRAYSCALE)

        # Normalize the images (0-255 -> 0-1)
        vv_image = vv_image / 255.0
        #vh_image = vh_image / 255.0
        water_body_image = water_body_image / 255.0
        flood_label_image = flood_label_image / 255.0

        # Stack the VV, VH, and water body label into a 3-channel input array
        input_image = np.stack([vv_image, water_body_image], axis=-1)
        
        np.save(os.path.join(preprocessed_dir, f'input_{vv_files[k]}.npy'), input_image)
        np.save(os.path.join(preprocessed_dir, f'label_{vv_files[k]}.npy'), flood_label_image)


# Call the preprocessing function
preprocess_and_save()


In [8]:
def load_preprocessed_data():
    """
    Load preprocessed input and label images from the preprocessed_data folder.
    Returns: (inputs, labels) arrays.
    """
    input_files = sorted([f for f in os.listdir(preprocessed_dir) if f.startswith('input_')])
    label_files = sorted([f for f in os.listdir(preprocessed_dir) if f.startswith('label_')])

    inputs = []
    labels = []

    for input_file, label_file in zip(input_files, label_files):
        # Load input and label arrays
        input_image = np.load(os.path.join(preprocessed_dir, input_file))
        label_image = np.load(os.path.join(preprocessed_dir, label_file))
        
        # Append to lists
        inputs.append(input_image)
        labels.append(label_image)

    return np.array(inputs), np.array(labels)

# Load preprocessed data
X, y = load_preprocessed_data()

# Split into training and validation sets (80/20 split)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")

Training set size: 3408
Validation set size: 852


In [9]:
def unet_model_with_water_body(input_shape=(256, 256, 2)):
    """
    Build a U-Net model that takes in VV, VH polarization images and 
    the water body label as input to predict the flooded areas.
    
    Input: (VV, VH, Water Body Label) -> 3 channels
    Output: Flooded area segmentation -> 1 channel
    """
    
    inputs = layers.Input(shape=input_shape)

    # Encoder
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Decoder
    u1 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(p2)
    u1 = layers.concatenate([u1, c2])
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)

    u2 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c3)
    u2 = layers.concatenate([u2, c1])
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u2)
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c4)

    # Output layer (flooded area prediction)
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c4)

    # Compile the model
    model = models.Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Example usage:
# The input shape is (256, 256, 3) where we have 3 channels (VV, VH, Water Body Label)
model = unet_model_with_water_body()
model.summary()


I0000 00:00:1728395885.492407    5041 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1728395885.492680    5041 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1728395885.492910    5041 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1728395885.562755    5041 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [12]:
# Train the U-Net model
model.fit(X_train, y_train, epochs=5, batch_size=2, validation_data=(X_val, y_val))

2024-10-08 15:58:09.043969: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1786773504 exceeds 10% of free system memory.
2024-10-08 15:58:10.540805: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 893386752 exceeds 10% of free system memory.
2024-10-08 15:58:11.242136: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1786773504 exceeds 10% of free system memory.
2024-10-08 15:58:12.277715: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 893386752 exceeds 10% of free system memory.


Epoch 1/5


I0000 00:00:1728395895.003348    6114 service.cc:146] XLA service 0x7c570c007370 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1728395895.003377    6114 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 2070 with Max-Q Design, Compute Capability 7.5
2024-10-08 15:58:15.079234: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-10-08 15:58:15.656880: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8902
2024-10-08 15:58:19.435898: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 8.20GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-10-08 15:58:19.781104: W external/local_tsl/tsl/framework/bfc_al

[1m   2/1704[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:27[0m 51ms/step - accuracy: 0.9932 - loss: 0.6795  

I0000 00:00:1728395903.070828    6114 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1704/1704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.9699 - loss: 0.1495

2024-10-08 15:59:49.656432: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 446693376 exceeds 10% of free system memory.
2024-10-08 15:59:51.841034: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.11GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-10-08 15:59:52.203482: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 8.20GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


[1m1704/1704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 58ms/step - accuracy: 0.9699 - loss: 0.1495 - val_accuracy: 0.9687 - val_loss: 0.1062
Epoch 2/5
[1m1704/1704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 65ms/step - accuracy: 0.9717 - loss: 0.1194 - val_accuracy: 0.9749 - val_loss: 0.1090
Epoch 3/5
[1m1704/1704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 65ms/step - accuracy: 0.9718 - loss: 0.1178 - val_accuracy: 0.9743 - val_loss: 0.1055
Epoch 4/5
[1m1704/1704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 65ms/step - accuracy: 0.9722 - loss: 0.1177 - val_accuracy: 0.9746 - val_loss: 0.1007
Epoch 5/5
[1m1704/1704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 65ms/step - accuracy: 0.9734 - loss: 0.1105 - val_accuracy: 0.9749 - val_loss: 0.0998


<keras.src.callbacks.history.History at 0x7c5a9a5153f0>

In [22]:
##### PREDICTION DU MODELE #####
# After training, make predictions on a subset of the validation set (3 samples)
y_pred = model.predict(X_val[:10])
print(y_pred.shape)

# Calculate the IoU metric for each prediction
iou_scores = []
for i in range(10):
    intersection = np.logical_and(y_val[i], y_pred[i])
    union = np.logical_or(y_val[i], y_pred[i])
    if np.sum(union) == 0:
        iou_score = 0.0
    else:
        iou_score = np.sum(intersection) / np.sum(union)
    iou_scores.append(iou_score)

print("Average IoU score for 3 samples:", np.mean(iou_scores))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step
(10, 256, 256, 1)
Average IoU score for 3 samples: 0.05790557861328125


In [24]:
##### IMPRIME LES IMAGES EN PNG ####
import imageio

# Ensure the output directory exists
output_dir = '../predicted_images'
os.makedirs(output_dir, exist_ok=True)

# Transform the 3 predicted images into PNG files
for i in range(10):
    # Rescale the image to 0-255 and convert to uint8
    img = (y_pred[i, :, :, 0] * 255).astype(np.uint8)
    # Save the image as a PNG file
    imageio.imwrite(os.path.join(output_dir, f'predicted_image_{i}.png'), img)