# 1. Sorting the database

In [9]:
import os
import numpy as np
from skimage import io, color


Le script suivant permet de réaliser un premier tri de notre base de données, afin d'éliminer les fichiers qui ne contiennent pas suffisamment de données, car la database originale contient une grande quantité d'images vides, ou bien avec une portion très faible d'image satellite. Il faut également par la suite vérifier que les images restantes sont utiles pour entraîner notre algorithme. En effet, une image qui ne contient aucun corps d'eau, ou aucune étendue inondable, ne sera pas utile dans la phase d'apprentissage.

In [19]:
def is_relevant_image(image_path, threshold=0.75):
    """
        This script checks that the number of pixels containing a grey value (0<x<1) is higher than the threshold. If this is not the case, the image
        is considered to contain too few useful data, and is not saved in the final database.

        Returns a mask
    """
    # Read the image
    img = io.imread(image_path)
    
    # Convert to grayscale if it's not already
    if len(img.shape) == 3:
        img = color.rgb2gray(img)

    # Create a mask for non-zero and non-one pixel values (gray pixels)
    gray_pixel_mask = (img > 0) & (img < 1)

    # Calculate the total number of pixels
    total_pixels = gray_pixel_mask.size
    
    # Count the number of gray pixels
    gray_pixel_count = np.sum(gray_pixel_mask)
    
    # Calculate the proportion of gray pixels
    proportion_gray = gray_pixel_count / total_pixels
    
    # Check if the proportion meets the threshold
    return proportion_gray >= threshold

def filter_images(input_folder, output_folder):
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(f'{output_folder}/vh', exist_ok=True)
    os.makedirs(f'{output_folder}/vv', exist_ok=True)
    os.makedirs(f'{output_folder}/water_body_label', exist_ok=True)
    os.makedirs(f'{output_folder}/flood_label', exist_ok=True)

    # Loop through each file in the input folder
    for filename in os.listdir(f'{input_folder}/vh'):   #We choose to check in the folder vh if the image is relevant
        file_path_vh = os.path.join(f'{input_folder}/vh', f'{filename[:-7]}_vh.png')    #On utilise les f-strings de python pour modifier les noms de fichier dans la db
        file_path_vv = os.path.join(f'{input_folder}/vv', f'{filename[:-7]}_vv.png')
        file_path_water = os.path.join(f'{input_folder}/water_body_label', f'{filename[:-7]}.png')
        file_path_flood = os.path.join(f'{input_folder}/flood_label', f'{filename[:-7]}.png')
        
        if os.path.isfile(file_path_vh):
            # Check if the image is relevant
            if is_relevant_image(file_path_vh):
                # If relevant, save it to the output folder
                img_vh = io.imread(file_path_vh)
                io.imsave(os.path.join(f'{output_folder}/vh', f'{filename[:-7]}_vh.png'), img_vh)
                #print(f'Saved: {filename}')

                img_vv = io.imread(file_path_vv)
                io.imsave(os.path.join(f'{output_folder}/vv', f'{filename[:-7]}_vv.png'), img_vv)

                img_water = io.imread(file_path_water)
                io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)

                img_flood = io.imread(file_path_flood)
                io.imsave(os.path.join(f'{output_folder}/flood_label', f'{filename[:-7]}.png'), img_flood)

            else:
                continue
                #print(f'Rejected: {filename}')




In [15]:
folders = ['bangladesh_20170314t115609','bangladesh_20170606t115613','bangladesh_20170712t115615']

In [20]:
# Example usage
# Adapter les chemins
for folder in folders:
    input_folder = f'C:\\Users\\colot\\Documents\\Centrale_Supelec\\Option\\DAML\\Projet\\archive\\train\\train\\{folder}\\tiles'
    output_folder = f'C:\\Users\\colot\\Documents\\Centrale_Supelec\\Option\\DAML\\Projet\\Final_database'
    filter_images(input_folder, output_folder)

#input_folder = 'C:\\Users\\colot\\Documents\\Centrale_Supelec\\Option\\DAML\\Projet\\archive\\train\\train\\bangladesh_20170314t115609\\tiles'
#output_folder = 'C:\\Users\\colot\\Documents\\Centrale_Supelec\\Option\\DAML\\Projet\\Final_database'
#filter_images(input_folder, output_folder)

#temps de run : 4min30 pour 1 dossier sur pc benoit

  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{output_folder}/flood_label', f'{filename[:-7]}.png'), img_flood)
  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{output_folder}/flood_label', f'{filename[:-7]}.png'), img_flood)
  io.imsave(os.path.join(f'{output_folder}/water_body_label', f'{filename[:-7]}.png'), img_water)
  io.imsave(os.path.join(f'{ou