In [3]:
from dask_gateway import GatewayCluster
import dask.distributed
import dask.utils
import dask.array
import dask
import planetary_computer
from pystac_client import Client
import odc.stac
import numpy
import xarray
import rasterio
import rasterio.enums
import gc
import math
import os
import json
from azure.storage.blob import BlobClient

In [6]:
def get_img_metadata(img_file):
    img_data_obj = rasterio.open(img_file)
    img_bounds = img_data_obj.bounds
    img_bbox = [img_bounds.left, img_bounds.bottom, img_bounds.right, img_bounds.top]
    img_x_res, img_y_res  = img_data_obj.res
    if img_y_res > 0:
        img_y_res = img_y_res * (-1)
    img_data_obj = None
    return img_bbox, img_x_res, img_y_res

def get_img_band_array(img_file, band=1):
    img_data_obj = rasterio.open(img_file)
    img_arr = img_data_obj.read(band)
    img_data_obj = None
    return img_arr

In [7]:
def expand_ls_qa_pixel_msks(scn_xa, qa_pxl_msk="qa_pixel"):
    scn_lcl_xa = scn_xa.copy()
    unq_img_vals = numpy.unique(numpy.squeeze(scn_xa[qa_pxl_msk].values))

    fill_da = scn_xa[qa_pxl_msk].copy()
    fill_da[...] = 0
    fill_da = fill_da.astype(numpy.uint8)

    dilated_clouds_da = scn_xa[qa_pxl_msk].copy()
    dilated_clouds_da[...] = 0
    dilated_clouds_da = dilated_clouds_da.astype(numpy.uint8)

    cirrus_da = scn_xa[qa_pxl_msk].copy()
    cirrus_da[...] = 0
    cirrus_da = cirrus_da.astype(numpy.uint8)

    clouds_da = scn_xa[qa_pxl_msk].copy()
    clouds_da[...] = 0
    clouds_da = clouds_da.astype(numpy.uint8)

    cloud_shadows_da = scn_xa[qa_pxl_msk].copy()
    cloud_shadows_da[...] = 0
    cloud_shadows_da = cloud_shadows_da.astype(numpy.uint8)

    snow_da = scn_xa[qa_pxl_msk].copy()
    snow_da[...] = 0
    snow_da = snow_da.astype(numpy.uint8)

    clear_da = scn_xa[qa_pxl_msk].copy()
    clear_da[...] = 0
    clear_da = clear_da.astype(numpy.uint8)

    water_da = scn_xa[qa_pxl_msk].copy()
    water_da[...] = 0
    water_da = water_da.astype(numpy.uint8)

    all_clouds_da = scn_xa[qa_pxl_msk].copy()
    all_clouds_da[...] = 0
    all_clouds_da = all_clouds_da.astype(numpy.uint8)

    for val in unq_img_vals:
        val_bin = numpy.flip(
            numpy.unpackbits(numpy.flip(numpy.array([val]).view(numpy.uint8)))
        )

        if val_bin[0] == 1:
            fill_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if val_bin[1] == 1:
            dilated_clouds_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if val_bin[2] == 1:
            cirrus_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if val_bin[3] == 1:
            clouds_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if val_bin[4] == 1:
            cloud_shadows_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if val_bin[5] == 1:
            snow_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if val_bin[6] == 1:
            clear_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if val_bin[7] == 1:
            water_da.values[scn_xa[qa_pxl_msk].values == val] = 1
        if (
            (val_bin[1] == 1)
            or (val_bin[2] == 1)
            or (val_bin[3] == 1)
            or (val_bin[4] == 1)
        ):
            all_clouds_da.values[scn_xa[qa_pxl_msk].values == val] = 1

    scn_lcl_xa["FILL"] = fill_da
    scn_lcl_xa["DILATED_CLOUDS"] = dilated_clouds_da
    scn_lcl_xa["CIRRUS"] = cirrus_da
    scn_lcl_xa["CLOUDS"] = clouds_da
    scn_lcl_xa["CLOUD_SHADOWS"] = cloud_shadows_da
    scn_lcl_xa["SNOW"] = snow_da
    scn_lcl_xa["CLEAR"] = clear_da
    scn_lcl_xa["WATER"] = water_da
    scn_lcl_xa["ALL_CLOUDS"] = all_clouds_da
    return scn_lcl_xa

In [32]:
def apply_cloud_msk(scns_xa, bands, clouds_var="ALL_CLOUDS", fill_var="FILL"):
    scns_lcl_xa = scns_xa.copy()
    for band in bands:
        scns_lcl_xa[band].values[scns_lcl_xa[clouds_var].values == 1] = 0.0
        scns_lcl_xa[band].values[scns_lcl_xa[fill_var].values == 1] = 0.0
    return scns_lcl_xa

In [2]:
def read_json_to_dict(input_file: str):
    """
    Read a JSON file. Will return a list or dict.

    :param input_file: input JSON file path.

    """
    with open(input_file) as f:
        data = json.load(f)
    return data

In [None]:
cluster = GatewayCluster()  # Creates the Dask Scheduler. Might take a minute.
cluster.adapt(minimum=4, maximum=24)
print(cluster.dashboard_link)

client = dask.distributed.Client(cluster, timeout=10)
odc.stac.configure_rio(cloud_defaults=True, client=client)

In [9]:
catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")

In [10]:
# Date range of the ROI
time_range = "2020-01-01/2020-02-28"
date_str = "2020"
# Bands to be read
bands = ["red", "nir08", "swir16", "qa_pixel"]

In [13]:
tiles_lut_file = "/home/jovyan/gmw_v4_change_layers/04_create_base_chng_rngs/06_create_2020_baseline/03_get_tile_lst/vld_tile_lst.json"
tile_img_dir = "/home/jovyan/gmw_v4_change_layers/99_plantary_comp_dev/99_combined_coastal_msk_roi/combined_coastal_msk_roi/"

tiles_lut = read_json_to_dict(tiles_lut_file)
tiles_lut = {"GMW_N00E117": tiles_lut["GMW_N00E117"]}
tiles_name_lst = list(tiles_lut.keys())

n_tiles = len(tiles_name_lst)
print(f"Number of tiles: {n_tiles}")

Number of tiles: 1


In [14]:
gmw_tile = tiles_name_lst[0]
print(f"Processing {gmw_tile}")
gmw_tile_img = os.path.join(tile_img_dir, f"{gmw_tile}_comb_coastal_roi_msk.kea")
tilename = gmw_tile.replace("GMW_", "")



Processing GMW_N00E117


In [15]:
# Get the bbox and image resolution of the input image.
bbox, img_x_res, img_y_res = get_img_metadata(gmw_tile_img)

# Read the GMW Coastal extent into a numpy array
gmw_msk_arr = get_img_band_array(gmw_tile_img)

In [16]:
gmw_msk_arr.shape

(4500, 4500)

In [17]:
search = catalog.search(collections=["landsat-c2-l2"], bbox=bbox, datetime=time_range, query={"eo:cloud_cover": {"lt": 50}},)
items = search.get_all_items()
n_items = len(items)
print(f"\tN Scenes: {n_items}")

	N Scenes: 6




In [18]:
signed_items = [planetary_computer.sign(item) for item in items]

In [19]:
# Read the data into dask xarray structure
ls_scn_xa = odc.stac.stac_load(
    signed_items,
    bands=bands,
    groupby="solar_day",
    #dtype=numpy.uint16,
    #chunks={"time":24, "latitude": 1024, "longitude": 1024},
    bbox=bbox,
    crs="EPSG:4326",
    resolution=img_x_res
)

In [20]:
ls_scn_xa

In [26]:
ls_scn_xa = ls_scn_xa.assign({"gmw_msk": (("latitude", "longitude"),gmw_msk_arr)})

In [27]:
ls_scn_xa

In [28]:
ls_scn_qa_xa = ls_scn_xa.map_blocks(expand_ls_qa_pixel_msks)
ls_scn_qa_xa.coords["spatial_ref"] = ls_scn_xa.coords["spatial_ref"]



In [29]:
ls_scn_qa_xa

In [33]:
ls_scn_qa_mskd_xa = ls_scn_qa_xa.map_blocks(apply_cloud_msk, kwargs={"bands": ["red", "nir08", "swir16"]})
ls_scn_qa_mskd_xa.coords["spatial_ref"] = ls_scn_xa.coords["spatial_ref"]

In [34]:
ls_scn_qa_mskd_xa

In [35]:
ls_scn_qa_mskd_xa = ls_scn_qa_mskd_xa.drop_vars(["qa_pixel", "FILL", "DILATED_CLOUDS", "CIRRUS", "CLOUDS", "CLOUD_SHADOWS", "SNOW", "CLEAR", "WATER", "ALL_CLOUDS"])


In [36]:
ls_scn_qa_mskd_xa

In [37]:
scns_lcl_tmp_df = ls_scn_qa_mskd_xa.to_dataframe().reset_index()

In [39]:
scns_lcl_tmp_df

Unnamed: 0,latitude,longitude,time,spatial_ref,red,nir08,swir16,gmw_msk
0,-0.000111,117.000110,2020-01-22 02:07:26.125786,4326,8081,17078,11146,0
1,-0.000111,117.000110,2020-01-23 02:21:33.101792,4326,8023,17685,11318,0
2,-0.000111,117.000110,2020-02-23 02:05:47.847947,4326,0,0,0,0
3,-0.000111,117.000110,2020-02-24 02:21:24.397864,4326,8283,17848,11692,0
4,-0.000111,117.000332,2020-01-22 02:07:26.125786,4326,8154,17385,11146,0
...,...,...,...,...,...,...,...,...
80999995,-0.999889,117.999665,2020-02-24 02:21:24.397864,4326,7538,7780,8259,0
80999996,-0.999889,117.999888,2020-01-22 02:07:26.125786,4326,0,0,0,0
80999997,-0.999889,117.999888,2020-01-23 02:21:33.101792,4326,5091,6489,7974,0
80999998,-0.999889,117.999888,2020-02-23 02:05:47.847947,4326,0,0,0,0


In [40]:
scns_lcl_vld_tmp_df = scns_lcl_tmp_df.drop(scns_lcl_tmp_df[scns_lcl_tmp_df["gmw_msk"]==0].index)

In [41]:
scns_lcl_vld_tmp_df

Unnamed: 0,latitude,longitude,time,spatial_ref,red,nir08,swir16,gmw_msk
8660,-0.000111,117.481221,2020-01-22 02:07:26.125786,4326,0,0,0,1
8661,-0.000111,117.481221,2020-01-23 02:21:33.101792,4326,0,0,0,1
8662,-0.000111,117.481221,2020-02-23 02:05:47.847947,4326,0,0,0,1
8663,-0.000111,117.481221,2020-02-24 02:21:24.397864,4326,10690,23076,17197,1
8664,-0.000111,117.481443,2020-01-22 02:07:26.125786,4326,0,0,0,1
...,...,...,...,...,...,...,...,...
80984635,-0.999889,117.146332,2020-02-24 02:21:24.397864,4326,0,0,0,1
80984636,-0.999889,117.146554,2020-01-22 02:07:26.125786,4326,0,0,0,1
80984637,-0.999889,117.146554,2020-01-23 02:21:33.101792,4326,9092,7194,7591,1
80984638,-0.999889,117.146554,2020-02-23 02:05:47.847947,4326,0,0,0,1


In [44]:
scns_lcl_vld_tmp_df["NDVI"] = scns_lcl_vld_tmp_df["nir08"] / scns_lcl_vld_tmp_df["red"]

In [46]:
scns_lcl_vld_tmp_df

Unnamed: 0,latitude,longitude,time,spatial_ref,red,nir08,swir16,gmw_msk,NDVI
8660,-0.000111,117.481221,2020-01-22 02:07:26.125786,4326,0,0,0,1,
8661,-0.000111,117.481221,2020-01-23 02:21:33.101792,4326,0,0,0,1,
8662,-0.000111,117.481221,2020-02-23 02:05:47.847947,4326,0,0,0,1,
8663,-0.000111,117.481221,2020-02-24 02:21:24.397864,4326,10690,23076,17197,1,2.158653
8664,-0.000111,117.481443,2020-01-22 02:07:26.125786,4326,0,0,0,1,
...,...,...,...,...,...,...,...,...,...
80984635,-0.999889,117.146332,2020-02-24 02:21:24.397864,4326,0,0,0,1,
80984636,-0.999889,117.146554,2020-01-22 02:07:26.125786,4326,0,0,0,1,
80984637,-0.999889,117.146554,2020-01-23 02:21:33.101792,4326,9092,7194,7591,1,0.791245
80984638,-0.999889,117.146554,2020-02-23 02:05:47.847947,4326,0,0,0,1,


In [45]:
scns_lcl_tmp_df["NDVI"] = scns_lcl_vld_tmp_df["NDVI"]

In [47]:
scns_lcl_tmp_df

Unnamed: 0,latitude,longitude,time,spatial_ref,red,nir08,swir16,gmw_msk,NDVI
0,-0.000111,117.000110,2020-01-22 02:07:26.125786,4326,8081,17078,11146,0,
1,-0.000111,117.000110,2020-01-23 02:21:33.101792,4326,8023,17685,11318,0,
2,-0.000111,117.000110,2020-02-23 02:05:47.847947,4326,0,0,0,0,
3,-0.000111,117.000110,2020-02-24 02:21:24.397864,4326,8283,17848,11692,0,
4,-0.000111,117.000332,2020-01-22 02:07:26.125786,4326,8154,17385,11146,0,
...,...,...,...,...,...,...,...,...,...
80999995,-0.999889,117.999665,2020-02-24 02:21:24.397864,4326,7538,7780,8259,0,
80999996,-0.999889,117.999888,2020-01-22 02:07:26.125786,4326,0,0,0,0,
80999997,-0.999889,117.999888,2020-01-23 02:21:33.101792,4326,5091,6489,7974,0,
80999998,-0.999889,117.999888,2020-02-23 02:05:47.847947,4326,0,0,0,0,


In [49]:
scns_out_tmp_xa = xarray.Dataset.from_dataframe(scns_lcl_tmp_df.set_index(['latitude', 'longitude', 'time']))

In [50]:
scns_out_tmp_xa

In [51]:
def apply_flatten(scns_xa, bin_msk_var, out_var, **kwargs):
    scns_lcl_df = scns_xa.to_dataframe().reset_index()

    scns_lcl_vld_df = scns_lcl_df.drop(scns_lcl_df[scns_lcl_df[bin_msk_var]==0].index)

    scns_lcl_vld_df[out_var] = (scns_lcl_vld_df["nir08"] - scns_lcl_vld_df["red"])/(scns_lcl_vld_df["nir08"] + scns_lcl_vld_df["red"])
    scns_lcl_df[out_var] = scns_lcl_vld_df[out_var]
    
    scns_out_xa = xarray.Dataset.from_dataframe(scns_lcl_df.set_index(['latitude', 'longitude', 'time']))
    return scns_out_xa

In [52]:
ls_scn_ndvi_xa = ls_scn_qa_mskd_xa.map_blocks(apply_flatten, kwargs={"bin_msk_var": "gmw_msk", "out_var":"NDVI_calc"})


In [53]:
ls_scn_ndvi_xa

In [None]:











ls_scn_qa_mskd_xa = ls_scn_qa_mskd_xa.where(ls_scn_qa_mskd_xa > 1)
ls_scn_qa_mskd_xa = ls_scn_qa_mskd_xa.where(gmw_msk_arr == 1)

ls_scn_med_xa = ls_scn_qa_mskd_xa.median(dim="time", skipna=True).compute()

ls_scn_med_xa = ls_scn_med_xa.fillna(0)

img_shp = gmw_msk_arr.shape
out_img_transform = rasterio.transform.Affine(img_x_res, 0.0, bbox[0], 0.0, img_y_res, bbox[3])

"""
# Create the output image file.
with rasterio.open(out_lcl_img_file,
                    'w',
                    driver='COG',
                    height=img_shp[0],
                    width=img_shp[1],
                    count=4,
                    nodata=0,
                    dtype=numpy.uint16,
                    crs='epsg:4326',
                    transform=out_img_transform,
                ) as out_img_dataset:
    img_profile = out_img_dataset.profile

    # Write output array to the image file
    out_img_dataset.write(ls_scn_med_xa["green"].values, 1)
    out_img_dataset.set_band_description(1, "green")

    out_img_dataset.write(ls_scn_med_xa["red"].values, 2)
    out_img_dataset.set_band_description(2, "red")

    out_img_dataset.write(ls_scn_med_xa["nir08"].values, 3)
    out_img_dataset.set_band_description(3, "nir08")

    out_img_dataset.write(ls_scn_med_xa["swir16"].values, 4)
    out_img_dataset.set_band_description(4, "swir16")
"""
