Here we want to sort the data in order to be able to use it more easily

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn.model_selection import train_test_split


In [2]:
import os
import cv2
import numpy as np

# Define directories
vv_dir = '../Final_database/vv/'
vh_dir = '../Final_database/vh/'
water_body_dir = '../Final_database/water_body_label/'
flood_label_dir = '../Final_database/flood_label/'
preprocessed_dir = '../preprocessed_data/'

# Create the preprocessed_data folder if it doesn't exist
os.makedirs(preprocessed_dir, exist_ok=True)

def preprocess_and_save():
    """
    Preprocess the images from vv, vh, water body, and flood label directories.
    Save the preprocessed images as numpy arrays in the preprocessed_data folder.
    """
    # Get the list of files (assuming the same file names in all directories)
    vv_files = os.listdir(vv_dir)
    vh_files = os.listdir(vh_dir)
    water_body_files = os.listdir(water_body_dir)
    flood_label_files = os.listdir(flood_label_dir)
    n = len(vv_files)

    for k in range(n):
        # Load the corresponding images from each folder
        vv_image = cv2.imread(os.path.join(vv_dir, vv_files[k]), cv2.IMREAD_GRAYSCALE)
        #vh_image = cv2.imread(os.path.join(vh_dir, vh_files[k]), cv2.IMREAD_GRAYSCALE)
        water_body_image = cv2.imread(os.path.join(water_body_dir, water_body_files[k]), cv2.IMREAD_GRAYSCALE)
        flood_label_image = cv2.imread(os.path.join(flood_label_dir, flood_label_files[k]), cv2.IMREAD_GRAYSCALE)

        # Normalize the images (0-255 -> 0-1)
        vv_image = vv_image / 255.0
        #vh_image = vh_image / 255.0
        water_body_image = water_body_image / 255.0
        flood_label_image = flood_label_image / 255.0

        # Stack the VV, VH, and water body label into a 3-channel (or 2 here) input array
        input_image = np.stack([vv_image, water_body_image], axis=-1)
        
        np.save(os.path.join(preprocessed_dir, f'input_{vv_files[k]}.npy'), input_image)
        np.save(os.path.join(preprocessed_dir, f'label_{vv_files[k]}.npy'), flood_label_image)


# Call the preprocessing function
preprocess_and_save()


In [3]:
def load_preprocessed_data():
    """
    Load preprocessed input and label images from the preprocessed_data folder.
    Returns: (inputs, labels) arrays.
    """
    input_files = sorted([f for f in os.listdir(preprocessed_dir) if f.startswith('input_')])
    label_files = sorted([f for f in os.listdir(preprocessed_dir) if f.startswith('label_')])

    inputs = []
    labels = []

    for input_file, label_file in zip(input_files, label_files):
        # Load input and label arrays
        input_image = np.load(os.path.join(preprocessed_dir, input_file))
        label_image = np.load(os.path.join(preprocessed_dir, label_file))
        
        # Append to lists
        inputs.append(input_image)
        labels.append(label_image)

    return np.array(inputs), np.array(labels)

# Load preprocessed data
X, y = load_preprocessed_data()

# Split into training and validation sets (80/20 split)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")

Training set size: 3408
Validation set size: 852


In [4]:
def unet_model_with_water_body(input_shape=(256, 256, 2)):
    """
    Build a U-Net model that takes in VV, VH polarization images and 
    the water body label as input to predict the flooded areas.
    
    Input: (VV, VH, Water Body Label) -> 3 channels
    Output: Flooded area segmentation -> 1 channel
    """
    
    inputs = layers.Input(shape=input_shape)

    # Encoder
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Decoder
    u1 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(p2)
    u1 = layers.concatenate([u1, c2])
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)

    u2 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c3)
    u2 = layers.concatenate([u2, c1])
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u2)
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c4)

    # Output layer (flooded area prediction)
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c4)

    # Compile the model
    model = models.Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Example usage:
# The input shape is (256, 256, 3) where we have 3 channels (VV, VH, Water Body Label)
model = unet_model_with_water_body()
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 256, 256, 64)         1792      ['input_1[0][0]']             
                                                                                                  
 conv2d_1 (Conv2D)           (None, 256, 256, 64)         36928     ['conv2d[0][0]']              
                                                                                                  
 max_pooling2d (MaxPooling2  (None, 128, 128, 64)         0         ['conv2d_1[0][0]']            
 D)                                                                                           

In [5]:
def iou_metric(y_true, y_pred, threshold=0.5):
    """
    Calculate Intersection over Union (IoU) between ground truth (y_true) and predicted (y_pred) masks.
    
    Arguments:
    y_true -- Ground truth binary mask (e.g., flood label).
    y_pred -- Predicted binary mask (output from the model).
    threshold -- Threshold to convert the predicted probabilities into binary output.
    
    Returns:
    IoU score.
    """

    # Convert predicted probabilities to binary (0 or 1) based on the threshold
    y_pred_binary = (y_pred > threshold).astype(np.uint8)

    # Compute intersection and union
    intersection = np.logical_and(y_true, y_pred_binary)
    union = np.logical_or(y_true, y_pred_binary)

    # Calculate IoU
    iou_score = np.sum(intersection) / np.sum(union)
    return iou_score

In [6]:
# Train the U-Net model
model.fit(X_train, y_train, epochs=5, batch_size=2, validation_data=(X_val, y_val))

Epoch 1/5
  2/213 [..............................] - ETA: 43:31 - loss: 0.6136 - accuracy: 0.9678

KeyboardInterrupt: 

In [None]:
# After training, make predictions on a validation set
y_pred = model.predict(X_val)

# Calculate the IoU metric for each prediction
iou_scores = []
for i in range(len(X_val)):
    iou_scores.append(iou_metric(y_val[i], y_pred[i]))
print("Average IoU score:", np.mean(iou_scores))
