# 01-prepare-images-for-fusion

For a given flood event, a series of Sentinel-2 and MODIS images with green, red, and near infrared bands have been downloaded from GEE. These images should be in a directory accessible from Google Drive.

This notebook demonstrates how these images are processed to i) compute NDVI and NDWI bands, and ii) to find the closest cloud free pair of Sentinel-2 and MODIS images to the flood event. The closest cloud free pair of Sentinel-2 and MODIS images to the flood event is used in the STARFM data fusion algorithm to generate synthetic Sentinel-2 like pre- and post-flood event images that are cloud free.

The main function `prepare_images_for_fusion()` returns GeoTIFF files ready for data fusion and CSV file recording image pairs in a Google Drive folder.

In [None]:
# rasterio is needed for working with raster geospatial data
!pip install rasterio

In [None]:
import datetime
import os
import pandas as pd
import rasterio
from rasterio.windows import Window
import numpy as np

def prepare_images_for_fustion(
    event,
    gdrive_folder,
    event_date,
    window):
    """
    Convert Sentinel-2 and MODIS images to NDVI and NDWI bands.

    Find the closest pair of Sentinel-2 and MODIS cloud free images to a target
    flood event date.

    Args:
        event (str): GEE Polygon Geometry
        gdrive_folder (str): Google Drive folder to download images to
        event_date (str): date of flood event
        window (int): rasterio window object for area of interest

    Returns:
        GeoTIFF files to Google Drive and CSV file recording image pairs.
    """

    # list of days from the event
    # roughly centred on an 8-day MODIS composite
    days_from_event = [-6, 6, 18, 30, 42]

    event_datetime = datetime.datetime.fromisoformat(event_date)
    event_doy = event_datetime.strftime("%j")

    # get list of files downloaded from GEE
    files = os.listdir(gdrive_folder)

    # check for low-high pair
    lh_check = 0
    for f in files:
        if f.startswith("low_high_pair") and f.endswith(".tif"):
            lh_check = lh_check + 1

    # only process if there is a low-high pair
    if lh_check > 0:
        fine_coarse_pairs = []  # list of filenames for fine-coarse pairs
        fine_coarse_dates = []  # list of dates of fine-coarse pairs
        coarse_images = []  # list of filenames for coarse images
        coarse_dates = []  # list of dates for coarse images
        chip_idx = [] # list of fine-coarse pair chip indices

        # get names and dates of images
        for f in files:
            if f.endswith(".tif") and f.startswith("low_high_pair"):
                fine_coarse_pairs.append(f)
                date_tmp = f.split(event+"_")[1]
                date_tmp = date_tmp.split("_clearpct")[0]
                date_tmp = date_tmp.split("_")
                date_tmp = datetime.datetime(
                    int(date_tmp[0]), int(date_tmp[1]), int(date_tmp[2]))
                fine_coarse_dates.append(date_tmp)

            if f.endswith(".tif") and f.startswith("coarse_feature"):
                coarse_images.append(f)
                image_date_id = f.split(".tif")[0]
                image_date_id = image_date_id.split("_")[-1]
                image_day = days_from_event[int(image_date_id)]
                image_date = event_datetime + \
                    datetime.timedelta(image_day)
                coarse_dates.append(image_date)

        # index location in list of coarse images
        idx = 0
        for img in coarse_images:
            print(img)
            image_date_id = img.split(".tif")[0]
            image_date_id = image_date_id.split("_")[-1]

            # find closest fine-coarse pair image
            img_date = coarse_dates[idx]
            tmp_diff_dates = []
            for d in fine_coarse_dates:
                dates_diff = img_date - d
                dates_diff = abs(dates_diff.days)
                tmp_diff_dates.append(dates_diff)

            # this is the closest fine-coarse pair to the coarse prediction image
            closest_days = min(tmp_diff_dates)
            closest_days_list = [i for i, val in enumerate(
                tmp_diff_dates) if val == closest_days]

            # index for same dates
            i = 0
            # loop here in case there are two closest images on the same date from different S2 tiles
            for closest_days_index in closest_days_list:

                # date in days from event date
                fine_coarse_date = int(
                    (fine_coarse_dates[closest_days_index] - event_datetime).days)

                # date in days from event date
                coarse_date = int((img_date - event_datetime).days)

                fine_coarse_path = os.path.join(gdrive_folder, fine_coarse_pairs[closest_days_index])

                with rasterio.open(fine_coarse_path, "r") as src:
                    meta = src.meta
                    fine_green = src.read(1, window=window)
                    fine_green = np.where(
                        fine_green > 0, fine_green, 0)
                    fine_green = np.where(
                        fine_green <= 10000, fine_green, 10000)

                    fine_red = src.read(2, window=window)
                    fine_red = np.where(fine_red > 0, fine_red, 0)
                    fine_red = np.where(
                        fine_red <= 10000, fine_red, 10000)

                    fine_nir = src.read(3, window=window)
                    fine_nir = np.where(fine_nir > 0, fine_nir, 0)
                    fine_nir = np.where(
                        fine_nir <= 10000, fine_nir, 10000)

                    coarse_green = src.read(4, window=window)
                    coarse_green = np.where(
                        coarse_green > 0, coarse_green, 0)
                    coarse_green = np.where(
                        coarse_green <= 10000, coarse_green, 10000)

                    coarse_red = src.read(5, window=window)
                    coarse_red = np.where(
                        coarse_red > 0, coarse_red, 0)
                    coarse_red = np.where(
                        coarse_red <= 10000, coarse_red, 10000)

                    coarse_nir = src.read(6, window=window)
                    coarse_nir = np.where(
                        coarse_nir > 0, coarse_nir, 0)
                    coarse_nir = np.where(
                        coarse_nir <= 10000, coarse_nir, 10000)

                    fine_ndvi = ((fine_nir - fine_red) /
                                    (fine_nir + fine_red))
                    fine_ndvi = fine_ndvi * 10000
                    fine_ndvi = fine_ndvi.astype(meta["dtype"])

                    fine_ndwi = ((fine_green - fine_nir) /
                                    (fine_green + fine_nir))
                    fine_ndwi = fine_ndwi * 10000
                    fine_ndwi = fine_ndwi.astype(meta["dtype"])

                    coarse_ndvi = ((coarse_nir - coarse_red) /
                                    (coarse_nir + coarse_red))
                    coarse_ndvi = coarse_ndvi * 10000
                    coarse_ndvi = coarse_ndvi.astype(meta["dtype"])

                    coarse_ndwi = ((coarse_green - coarse_nir) /
                                    (coarse_green + coarse_nir))
                    coarse_ndwi = coarse_ndwi * 10000
                    coarse_ndwi = coarse_ndwi.astype(meta["dtype"])

                # write out low-high pairs
                out_path = os.path.join(gdrive_folder)

                meta_ndvi = meta
                meta_ndvi["count"] = 1
                meta_ndvi.update({
                    "height": window.height,
                    "width": window.width,
                    "transform": rasterio.windows.transform(window, src.transform)})
                with rasterio.open(os.path.join(out_path, "chip_fine_ndvi_" + str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"), "w", **meta_ndvi) as dst:
                    fine_ndvi = np.nan_to_num(
                        fine_ndvi, posinf=0, neginf=0)  # catch divide errors
                    dst.write(fine_ndvi, 1)

                meta_ndwi = meta
                meta_ndwi["count"] = 1
                meta_ndwi.update({
                    "height": window.height,
                    "width": window.width,
                    "transform": rasterio.windows.transform(window, src.transform)})
                with rasterio.open(os.path.join(out_path, "chip_fine_ndwi_" + str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"), "w", **meta_ndwi) as dst:
                    fine_ndwi = np.nan_to_num(
                        fine_ndwi, posinf=0, neginf=0)  # catch divide errors
                    dst.write(fine_ndwi, 1)

                meta_ndvi = meta
                meta_ndvi["count"] = 1
                meta_ndvi.update({
                    "height": window.height,
                    "width": window.width,
                    "transform": rasterio.windows.transform(window, src.transform)})
                with rasterio.open(os.path.join(out_path, "chip_coarse_ndvi_" + str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"), "w", **meta_ndvi) as dst:
                    coarse_ndvi = np.nan_to_num(
                        coarse_ndvi, posinf=0, neginf=0)  # catch divide errors
                    dst.write(coarse_ndvi, 1)

                meta_ndwi = meta
                meta_ndwi["count"] = 1
                meta_ndwi.update({
                    "height": window.height,
                    "width": window.width,
                    "transform": rasterio.windows.transform(window, src.transform)})
                with rasterio.open(os.path.join(out_path, "chip_coarse_ndwi_" + str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"), "w", **meta_ndwi) as dst:
                    coarse_ndwi = np.nan_to_num(
                        coarse_ndwi, posinf=0, neginf=0)  # catch divide errors
                    dst.write(coarse_ndwi, 1)

                coarse_path = os.path.join(gdrive_folder, img)
                with rasterio.open(coarse_path, "r") as src:
                    meta = src.meta
                    coarse_pred_green = src.read(1, window=window)
                    coarse_pred_green = np.where(
                        coarse_pred_green > 0, coarse_pred_green, 0)
                    coarse_pred_green = np.where(
                        coarse_pred_green <= 10000, coarse_pred_green, 10000)

                    coarse_pred_red = src.read(2, window=window)
                    coarse_pred_red = np.where(
                        coarse_pred_red > 0, coarse_pred_red, 0)
                    coarse_pred_red = np.where(
                        coarse_pred_red <= 10000, coarse_pred_red, 10000)

                    coarse_pred_nir = src.read(3, window=window)
                    coarse_pred_nir = np.where(
                        coarse_pred_nir > 0, coarse_pred_nir, 0)
                    coarse_pred_nir = np.where(
                        coarse_pred_nir <= 10000, coarse_pred_nir, 10000)

                    coarse_pred_ndvi = ((coarse_pred_nir - coarse_pred_red) /
                                        (coarse_pred_nir + coarse_pred_red))
                    coarse_pred_ndvi = coarse_pred_ndvi * 10000
                    coarse_pred_ndvi = coarse_pred_ndvi.astype(
                        meta["dtype"])

                    coarse_pred_ndwi = ((coarse_pred_green - coarse_pred_nir) /
                                        (coarse_pred_green + coarse_pred_nir))
                    coarse_pred_ndwi = coarse_pred_ndwi * 10000
                    coarse_pred_ndwi = coarse_pred_ndwi.astype(
                        meta["dtype"])

                meta_ndvi = meta
                meta_ndvi["count"] = 1
                meta_ndvi.update({
                    "height": window.height,
                    "width": window.width,
                    "transform": rasterio.windows.transform(window, src.transform)})
                with rasterio.open(os.path.join(out_path, "chip_coarse_pred_ndvi_" + str(image_date_id) + "_cday_idx_" + str(i) + ".tif"), "w", **meta_ndvi) as dst:
                    coarse_pred_ndvi = np.nan_to_num(
                        coarse_pred_ndvi, posinf=0, neginf=0)  # catch divide errors
                    dst.write(coarse_pred_ndvi, 1)

                meta_ndwi = meta
                meta_ndwi["count"] = 1
                meta_ndwi.update({
                    "height": window.height,
                    "width": window.width,
                    "transform": rasterio.windows.transform(window, src.transform)})
                with rasterio.open(os.path.join(out_path, "chip_coarse_pred_ndwi_" + str(image_date_id) + "_cday_idx_" + str(i) + ".tif"), "w", **meta_ndwi) as dst:
                    coarse_pred_ndwi = np.nan_to_num(
                        coarse_pred_ndwi, posinf=0, neginf=0)  # catch divide errors
                    dst.write(coarse_pred_ndwi, 1)

                params = [
                    "date_fine_coarse",
                    "date_coarse_pred",
                    "fine_ndvi",
                    "fine_ndwi",
                    "coarse_ndvi",
                    "coarse_ndwi",
                    "coarse_pred_ndvi",
                    "coarse_pred_ndwi",
                    "starfm_synth_ndvi",
                    "starfm_synth_ndwi",
                    "fitfc_synth_ndvi",
                    "fitfc_synth_ndwi"
                ]

                values = [
                    fine_coarse_date,
                    coarse_date,
                    os.path.join("chip_fine_ndvi_" +
                                    str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"),
                    os.path.join("chip_fine_ndwi_" +
                                    str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"),
                    os.path.join("chip_coarse_ndvi_" +
                                    str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"),
                    os.path.join("chip_coarse_ndwi_" +
                                    str(fine_coarse_date) + "_cday_idx_" + str(i) + ".tif"),
                    os.path.join("chip_coarse_pred_ndvi_" +
                                    str(image_date_id) + "_cday_idx_" + str(i) + ".tif"),
                    os.path.join("chip_coarse_pred_ndwi_" +
                                    str(image_date_id) + "_cday_idx_" + str(i) + ".tif"),
                    f"starfm_synth_ndvi_{image_date_id}_cday_idx_{str(i)}.tif",
                    f"starfm_synth_ndwi_{image_date_id}_cday_idx_{str(i)}.tif",
                    f"fitfc_synth_ndvi_{image_date_id}_cday_idx_{str(i)}.tif",
                    f"fitfc_synth_ndwi_{image_date_id}_cday_idx_{str(i)}.tif"
                ]

                tmp_df = pd.DataFrame(values).T
                tmp_df.columns = params

                tmp_df.to_csv(os.path.join(gdrive_folder, f"cday_idx_{str(i)}_stif_params_{image_date_id}.csv"), index=False)

                i += 1

            idx += 1


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# TC Yasa
gdrive_folder = os.path.join(os.getcwd(), "drive", "MyDrive", "tc-yasa-aoi2")
prepare_images_for_fustion(
    event="fiji_yasa",
    gdrive_folder=gdrive_folder,
    event_date="2020-12-17",
    window=Window(0, 0, 512, 512))