# 03-prepare-image-stacks

`prepare_image_stacks()` creates a series of NumPy array objects, which are saved to file in a Google Drive folder, containing bands that are used as
predictors of flood extent (combinations of Sentinel-1 derived predictors, NDVI and NDWI from synthetic Sentinel-2 like images, and topographic variables). Flood and water masks are also generated. This data is used as inputs and labels for machine learning model training and testing to segment flooded extents.

In [None]:
!pip install rasterio

In [None]:
import os
import rasterio
import numpy as np
import datetime
from rasterio.windows import Window

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def prepare_image_stacks(gdrive_folder, event_name, window):
    """
    Prepares a GeoTIFF files in a Google Drive folder into a NumPy array
    that can be used for machine learning model development.

    Note - S1 predictors are commented out.
    """

    files = os.listdir(gdrive_folder)
    cday_check = len([f for f in files if f.startswith("starfm_synth_ndvi_0")])

    window = Window(0, 0, 512, 512)

    for cday in range(0, cday_check):
        # S1 images
        # s1_path = os.path.join(gdrive_folder, f"s1_{event_name}.tif")

        # with rasterio.open(s1_path, "r") as src:
        #     vv_post = src.read(1, window=window) / 10000
        #     vv_post = np.nan_to_num(
        #         vv_post, posinf=0, neginf=0)

        #     vh_post = src.read(2, window=window) / 10000
        #     vh_post = np.nan_to_num(
        #         vh_post, posinf=0, neginf=0)

        #     vvvh_post = src.read(3, window=window) / 10000
        #     vvvh_post = np.nan_to_num(
        #         vvvh_post, posinf=0, neginf=0)
        #     upper_ptile = np.percentile(vvvh_post, 95)
        #     lower_ptile = np.percentile(vvvh_post, 5)
        #     vvvh_post = np.clip(
        #         vvvh_post, lower_ptile, upper_ptile)

        #     vv_pre = src.read(4, window=window) / 10000
        #     vv_pre = np.nan_to_num(vv_pre, posinf=0, neginf=0)

        #     vh_pre = src.read(5, window=window) / 10000
        #     vh_pre = np.nan_to_num(vh_pre, posinf=0, neginf=0)

        #     vvvh_pre = src.read(6, window=window) / 10000
        #     vvvh_pre = np.nan_to_num(
        #         vvvh_pre, posinf=0, neginf=0)
        #     upper_ptile = np.percentile(vvvh_pre, 95)
        #     lower_ptile = np.percentile(vvvh_pre, 5)
        #     vvvh_pre = np.clip(
        #         vvvh_pre, lower_ptile, upper_ptile)

        #     # make diff images - relative difference based on ESRI change detection
        #     # https://pro.arcgis.com/en/pro-app/latest/help/analysis/image-analyst/pixel-value-change-detection.htm
        #     vv_diff = (vv_post - vv_pre) / \
        #         np.maximum(vv_post, vv_pre)
        #     upper_ptile = np.percentile(vv_diff, 95)
        #     lower_ptile = np.percentile(vv_diff, 5)
        #     vv_diff = np.clip(
        #         vv_diff, lower_ptile, upper_ptile)

        #     vh_diff = (vh_post - vh_pre) / \
        #         np.maximum(vh_post, vh_pre)
        #     upper_ptile = np.percentile(vh_diff, 95)
        #     lower_ptile = np.percentile(vh_diff, 5)
        #     vh_diff = np.clip(
        #         vh_diff, lower_ptile, upper_ptile)

        #     vvvh_diff = (vvvh_post - vvvh_pre) / \
        #         np.maximum(vvvh_post, vvvh_pre)
        #     upper_ptile = np.percentile(vvvh_diff, 95)
        #     lower_ptile = np.percentile(vvvh_diff, 5)
        #     vvvh_diff = np.clip(
        #         vvvh_diff, lower_ptile, upper_ptile)

        # process static files
        gt_topo_path = os.path.join(gdrive_folder, f"topo_gt_{event_name}.tif")

        with rasterio.open(gt_topo_path, "r") as src:
            dem = src.read(2, window=window) / 10000
            dem = np.nan_to_num(dem, posinf=0, neginf=0)
            slope = src.read(3, window=window) / 10000
            slope = np.nan_to_num(slope, posinf=0, neginf=0)
            aspect = src.read(4, window=window) / 10000
            aspect = np.nan_to_num(aspect, posinf=0, neginf=0)
            jrc_water = src.read(5, window=window)
            jrc_water = np.nan_to_num(jrc_water, posinf=0, neginf=0)

        # starfm ndvi
        starfm_ndvi = {}
        for idx in range(0, 2):
            tmp_starfm_path = os.path.join(gdrive_folder, f"starfm_synth_ndvi_{idx}_cday_idx_{cday}.tif")
            with rasterio.open(tmp_starfm_path, "r") as src:
                tmp_img = src.read(1) / 10000
                tmp_img = np.nan_to_num(
                    tmp_img, posinf=0, neginf=0)
                tmp_img = np.clip(tmp_img, -1, 1)
                tmp_key = f"starfm_{idx}"
                starfm_ndvi[tmp_key] = tmp_img

                if idx >= 1:
                    starfm_0 = starfm_ndvi["starfm_0"]
                    tmp_diff = (tmp_img - starfm_0) / \
                        np.maximum(tmp_img, starfm_0)
                    tmp_diff = np.nan_to_num(
                        tmp_diff, posinf=0, neginf=0)
                    upper_ptile = np.percentile(tmp_diff, 95)
                    lower_ptile = np.percentile(tmp_diff, 5)
                    tmp_diff = np.clip(
                        tmp_diff, lower_ptile, upper_ptile)
                    tmp_key = f"starfm_diff_{idx}"
                    starfm_ndvi[tmp_key] = tmp_diff

        # starfm ndwi
        starfm_ndwi = {}
        for idx in range(0, 2):
            tmp_starfm_path = os.path.join(gdrive_folder, f"starfm_synth_ndwi_{idx}_cday_idx_{cday}.tif")
            with rasterio.open(tmp_starfm_path, "r") as src:
                tmp_img = src.read(1) / 10000
                tmp_img = np.nan_to_num(
                    tmp_img, posinf=0, neginf=0)
                tmp_img = np.clip(tmp_img, -1, 1)
                tmp_key = f"starfm_{idx}"
                starfm_ndwi[tmp_key] = tmp_img

                if idx >= 1:
                    starfm_0 = starfm_ndwi["starfm_0"]
                    tmp_diff = (tmp_img - starfm_0) / \
                        np.maximum(tmp_img, starfm_0)
                    tmp_diff = np.nan_to_num(
                        tmp_diff, posinf=0, neginf=0)
                    upper_ptile = np.percentile(tmp_diff, 95)
                    lower_ptile = np.percentile(tmp_diff, 5)
                    tmp_diff = np.clip(
                        tmp_diff, lower_ptile, upper_ptile)
                    tmp_key = f"starfm_diff_{idx}"
                    starfm_ndwi[tmp_key] = tmp_diff

        # starfm, s1, and JRC water
        ndvi_post_1 = starfm_ndvi["starfm_1"]
        ndvi_diff_post_1 = starfm_ndvi["starfm_diff_1"]
        ndwi_post_1 = starfm_ndwi["starfm_1"]
        ndwi_diff_post_1 = starfm_ndwi["starfm_diff_1"]

        # starfm_s1_preds_water = np.stack(
        #     [vv_post, vh_post, vvvh_post, vv_diff, vh_diff, vvvh_diff, dem, slope, aspect, ndvi_post_1, ndvi_diff_post_1, ndwi_post_1, ndwi_diff_post_1, jrc_water], axis=0)
        # starfm_s1_preds_water = np.moveaxis(starfm_s1_preds_water, 0, 2)
        # starfm_s1_preds_water = np.nan_to_num(
        #     starfm_s1_preds_water, posinf=0, neginf=0)
        # np.save(os.path.join(gdrive_folder, f"starfm_s1_preds_jrc_water_cday_{cday}.npy"), starfm_s1_preds_water)

        # # starfm s1
        # starfm_s1_preds = np.stack(
        #     [vv_post, vh_post, vvvh_post, vv_diff, vh_diff, vvvh_diff, dem, slope, aspect, ndvi_post_1, ndvi_diff_post_1, ndwi_post_1, ndwi_diff_post_1], axis=0)
        # starfm_s1_preds = np.moveaxis(starfm_s1_preds, 0, 2)
        # starfm_s1_preds = np.nan_to_num(
        #     starfm_s1_preds, posinf=0, neginf=0)
        # np.save(os.path.join(
        #     gdrive_folder, f"starfm_s1_preds_cday_{cday}.npy"), starfm_s1_preds)

        # # starfm s1
        # starfm_s1_preds = np.stack(
        #     [vv_post, vh_post, vvvh_post, vv_diff, vh_diff, vvvh_diff, dem, slope, aspect, ndvi_post_1, ndvi_diff_post_1, ndwi_post_1, ndwi_diff_post_1], axis=0)
        # starfm_s1_preds = np.moveaxis(starfm_s1_preds, 0, 2)
        # starfm_s1_preds = np.nan_to_num(
        #     starfm_s1_preds, posinf=0, neginf=0)
        # np.save(os.path.join(
        #     gdrive_folder, f"starfm_s1_preds_cday_{cday}.npy"), starfm_s1_preds)

        # starfm and no s1
        starfm_preds = np.stack(
            [dem, slope, aspect, ndvi_post_1, ndvi_diff_post_1, ndwi_post_1, ndwi_diff_post_1], axis=0)
        starfm_preds = np.moveaxis(starfm_preds, 0, 2)
        starfm_preds = np.nan_to_num(
            starfm_preds, posinf=0, neginf=0)
        np.save(os.path.join(
            gdrive_folder, f"starfm_preds_cday_{cday}.npy"), starfm_preds)

        # # post flood only
        # starfm_s1_preds = np.stack(
        #     [vv_post, vh_post, vvvh_post, dem, slope, aspect, ndvi_post_1, ndwi_post_1], axis=0)
        # starfm_s1_preds = np.moveaxis(starfm_s1_preds, 0, 2)
        # starfm_s1_preds = np.nan_to_num(
        #     starfm_s1_preds, posinf=0, neginf=0)
        # np.save(os.path.join(
        #     gdrive_folder, f"starfm_s1_preds_post_flood_cday_{cday}.npy"), starfm_s1_preds)

        # starfm and no s1
        starfm_preds = np.stack(
            [dem, slope, aspect, ndvi_post_1, ndwi_post_1], axis=0)
        starfm_preds = np.moveaxis(starfm_preds, 0, 2)
        starfm_preds = np.nan_to_num(
            starfm_preds, posinf=0, neginf=0)
        np.save(os.path.join(
            gdrive_folder, f"starfm_preds_post_flood_cday_{cday}.npy"), starfm_preds)


In [None]:
# TC Yasa
gdrive_folder = os.path.join(os.getcwd(), "drive", "MyDrive", "tc-yasa-aoi2")
event_name = "fiji_yasa"
window = Window(0, 0, 512, 512)
prepare_image_stacks(gdrive_folder, event_name, window)