In [2]:
#| default_exp ndwi_class

# NDWI 

In [3]:
#|hide
from nbdev.showdoc import *

In [4]:
#|export
import rasterio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from flood_exercise import utils_func
from flood_exercise import const_vals as CONST

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [5]:
#|export

class ndwi():

  def __init__(self,
               path_to_s2_tiles : str , #path to the folder that contains the S2 images
               path_to_labeled_tiles : str , #path to the labled images (values -1,0,1)
               ):
    # get the list of tiles from each folder , get only files end with .tif
    self.tiles_s2 = utils_func.load_list_paths(path_to_s2_tiles, filter_file = True)
    self.labels = utils_func.load_list_paths(path_to_labeled_tiles , filter_file = True)


    # get only tiles from specific region (Bolivia for the task) 
    self.tiles_s2_for_evaluation = [x for x in self.tiles_s2 if CONST.REGION_STR_2 in x]

    #calculate threshold
    self.threshold = self._get_ndwi_threshold_()

    # iterate through sentinel-2 images , get % of water in label image and S2 image
    collector = {CONST.WATER_DF_TILE_ID_STR : [],
                 CONST.WATER_DF_WATER_PERC_STR : [],
                 CONST.WATER_DF_WATER_PERC_LABEL_STR : [],
                 CONST.WATER_DF_PATH_STR : []}
    for s2_path in self.tiles_s2:
        #find the matching label tile 
        path_id = s2_path.split(CONST.SPLIT_TILES_NAMES_STR1)[-1].split(CONST.SPLIT_TILES_NAMES_STR2)[1]
        match_label_tile_path = [x for x in self.labels if path_id in x][0]

        #calculate NDWI for S2 image 
        masked_ndwi,perc_water=self._water_prob_new_img_(path = s2_path)

        #calculate water percentage in label image
        label_water_perc = self._water_perc_label_img_(path = match_label_tile_path)

        collector[CONST.WATER_DF_TILE_ID_STR].append(path_id)
        collector[CONST.WATER_DF_WATER_PERC_STR].append(perc_water)
        collector[CONST.WATER_DF_WATER_PERC_LABEL_STR].append(label_water_perc)
        collector[CONST.WATER_DF_PATH_STR].append(s2_path)

    self.water_df = pd.DataFrame(collector)

  def _ndwi_s2_(self ,
                     path):
         """
         Recieves path of image, returns NDWI per image
         :
         path (str): The path to the TIFF file.

         Returns:
         np.array with NDWI values per pixel
         """                      
     
         with rasterio.open(path) as src:
            green = src.read(CONST.GREEN_BAND)
            nir = src.read(CONST.NIR_BAND)

            ndwi = (green - nir) / (green + nir)

         return ndwi 
  


  def _get_ndwi_threshold_(self,
                           ):
                           
       """
       This function takes the path of an image and calculates the Normalized Difference Water Index (NDWI) for each image.

       Parameters:
         paths (list): A list of file paths to the TIFF images.

       Returns:
        list: A list containing two values representing the minimum and maximum NDWI values calculated based on the provided images.
       """
       threshold = [None , None]

       #for each image , calculate NDWI ,get the match labled image
       for s2_path in self.tiles_s2_for_evaluation:
          
          #calculate NDWI, get the image id to match with the labels
          path_id = s2_path.split(CONST.SPLIT_TILES_NAMES_STR1)[-1].split(CONST.SPLIT_TILES_NAMES_STR2)[1]
          ndwi_img  = self._ndwi_s2_(s2_path)

          # match labeled image
          match_label_tile_path = [x for x in self.labels if path_id in x][0]
          match_label_tile = rasterio.open(match_label_tile_path).read(1)  

          #create water mask : change -1 to 0 
          water_mask = np.where(match_label_tile == -1, 0, match_label_tile)
          ndwi_img = np.where(ndwi_img == -1, np.NaN , ndwi_img)

          #multiply water_mask with NDWI :
          mask_ndwi = ndwi_img * water_mask

          # plt.imshow(mask_ndwi)
          # plt.title(path_id)
          # plt.show()
          
          #caluclate the min and max 
          min_val = np.nanmin(mask_ndwi)
          max_val = np.nanmax(mask_ndwi)


          if threshold[0] is None or min_val < threshold[0]:
              threshold[0] = min_val
          if threshold[1] is None or max_val > threshold[1]:
              threshold[1] = max_val

          # print(f' current threshold : {threshold} , path: {path_id}')


       return threshold



  def _water_prob_new_img_(self,
                           path : str , # path to raster image
                           ):

        """
        This function takes the path of S2 image, calculates the Normalized Difference Water Index (NDWI) for each image,
        and then calculate the percentage of water pixels in the image 
        Parameters:
          paths (str): A list of file paths to the LABELED TIFF images.

        Returns:
        float: the percentage of water pixels in the labeled image
        """      
     
        #calcualte NDWI
        ndwi = self._ndwi_s2_(path)

        # mask the image to get only water pixels based on threshold
        mask_with_threshold_condition = (ndwi > self.threshold[0]) & (ndwi < 0)

        # masked_ndwi = np.where(mask_with_threshold_condition, 0 , 1)
        masked_ndwi = np.where(mask_with_threshold_condition, 0 , ndwi)
        masked_ndwi = np.nan_to_num(masked_ndwi, nan=0)
        masked_ndwi = np.where(masked_ndwi==0, 0 , 1)
        
        # plt.imshow(masked_ndwi)
        # plt.title(s2_path)
        # plt.colorbar()
        # plt.show()

        #calculate precentage of water pixel out of all the pixels in the image
        # perc_water = round((np.sum(masked_ndwi) / (masked_ndwi.shape[0]*masked_ndwi.shape[1]))*100,2)
        perc_water = round(( np.count_nonzero(masked_ndwi == CONST.WATER_VALUE) / (masked_ndwi.shape[0]*masked_ndwi.shape[1]))*100,2)

        
        return masked_ndwi,perc_water
  

  def _water_perc_label_img_(self,
                             path : str , # path to labeled image
                             ):

      """
      This function takes the path of LABELED image and calculates the % of pixels labeles as water

      Parameters:
        paths (str): A list of file paths to the LABELED TIFF images.

      Returns:
      float: the percentage of water pixels in the labeled image
      """      
      
      with rasterio.open(path) as src:
          #read the image as numpy array
          arr = src.read(1)
          # printpercentage of pixels with water flag (value=1)
          perc_water = round(( np.count_nonzero(arr == CONST.WATER_VALUE) / (arr.shape[0]*arr.shape[1]))*100,2)

      return perc_water

     


In [7]:
instance = ndwi(path_to_s2_tiles = r"D:\git\flood_exercise\S2",
                path_to_labeled_tiles= r"D:\git\flood_exercise\S2_HANDLABELED")
instance.water_df
# instance.water_perc.to_csv(r'D:\git\flood_exercise\RESULTS\water_perc_Bolivia.csv')

  ndwi = (green - nir) / (green + nir)


Unnamed: 0,tile_id,water_perc,water_label_perc,path
0,417184,0.81,0.53,D:\git\flood_exercise\S2\Nigeria_417184_S2Hand...
1,1396181,57.02,55.32,D:\git\flood_exercise\S2\Mekong_1396181_S2Hand...
2,1191208,4.69,5.82,D:\git\flood_exercise\S2\Mekong_1191208_S2Hand...
3,1248200,75.63,78.04,D:\git\flood_exercise\S2\Mekong_1248200_S2Hand...
4,900498,58.69,47.28,D:\git\flood_exercise\S2\India_900498_S2Hand.tif
...,...,...,...,...
394,741178,2.67,0.55,D:\git\flood_exercise\S2\USA_741178_S2Hand.tif
395,761032,3.02,4.62,D:\git\flood_exercise\S2\USA_761032_S2Hand.tif
396,955053,5.62,13.67,D:\git\flood_exercise\S2\USA_955053_S2Hand.tif
397,986268,1.89,3.47,D:\git\flood_exercise\S2\USA_986268_S2Hand.tif
