In [2]:
# -*- coding: utf-8 -*-
"""
@author: Etienne Kras
"""

# generic imports
import sys
import os
import numpy as np
import geopandas as gpd
import time
import geemap
import geojson
import ee
import rioxarray as rxr
import matplotlib.pyplot as plt
ee.Initialize()

# specific imports
from typing import Any, Dict, List, Optional
from geojson import Feature, FeatureCollection, dump
from shapely.geometry import Polygon, MultiPolygon, shape
from dateutil.relativedelta import *
from google.cloud import storage
from logging import Logger, getLogger
from googleapiclient.discovery import build
from re import sub
from ctypes import ArgumentError
from functools import partial
from dateutil.parser import parse

# custom functionality import without requirement to pip install package
local_path = r"C:\Users\kras\Documents\GitHub\ee-packages-py"  # path to local GitHub clone
sys.path.append(local_path)
from eepackages.applications.bathymetry import Bathymetry
from eepackages import tiler

logger: Logger = getLogger(__name__)


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [3]:
# TODO: validate the output on in-situ data

# Project specific toggles

In [4]:
# acknowledgements & code references:
# https://github.com/openearth/eo-bathymetry/
# https://github.com/openearth/eo-bathymetry-functions/
# https://github.com/gee-community/ee-packages-py

In [5]:
# see scheme at https://github.com/openearth/eo-bathymetry/blob/master/notebooks/rws-bathymetry/acces_api.pdf for a workflow visualization 

# project toggles
main_fol = r"p:\11209821-cmems-global-sdb" # name of the main local folder 
bucket = "cmems-sdb" # name of the Google Cloud Storage bucket to store files in the cloud
credential_file = r"p:\11209821-cmems-global-sdb\00_miscellaneous\KEYS\bathymetry-543b622ddce7.json" # Cloud Storage credential key
output_fol = r"01_intertidal\02_data\01_proxy" # name of the overall project
project_name = "AOI_GER_WaddenSea" # name of the project AoI
draw_AoI = 0 # toggle 1 to draw AoI, 0 to load

# composite image toggles
mode = "intertidal" # specify mode, either "intertidal" or "subtidal"
start_date = "2021-01-01" # start date of the composites
stop_date = "2022-01-01" # end date of the composites
compo_int = 12 # composite interval [months]
compo_len = 12 # composite length [months]
scale = 30  # output resolution of the image from download proxies [m]
upscale = 100 # upscaled resolution for the image [m]
crs = "EPSG:4326" # output projection of the image

# tiling options
zoomed_list = [9, 10, 11] # list with zoom levels to be inspected
sel_tile = 0 # idx of chosen tile level in zoomed_list (inspect the map to chose it accordingly), z9 too big for in memory computations
# note, see https://www.openearth.nl/rws-bathymetry/2019.html; Z9 is optimal size..

# load google credentials, if specified
if not credential_file == "":  
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(credential_file)

# Post-processing the API proxies

In [6]:
# set scale factor and imae path
scale_factor = upscale / scale # new resolution / scale of download_proxies

# make dedicated directory for the output
if not os.path.exists(os.path.join(main_fol, output_fol, mode, "rsp_rpj")):
    os.makedirs(os.path.join(main_fol, output_fol, mode, "rsp_rpj"))
if not os.path.exists(os.path.join(main_fol, output_fol, mode, "rsp_rpj", "netcdf")):
    os.makedirs(os.path.join(main_fol, output_fol, mode, "rsp_rpj", "netcdf"))

# remove all files present in the folder before running the below (else deleting errors)
# for file in os.listdir(os.path.join(main_fol, output_fol, mode, "rsp_rpj")):
#     if file.endswith(".tif"):
#         os.remove(os.path.join(main_fol, output_fol, mode, "rsp_rpj", file))
# for file in os.listdir(os.path.join(main_fol, output_fol, mode, "rsp_rpj", "netcdf")):
#     if file.endswith(".nc"):
#         os.remove(os.path.join(main_fol, output_fol, mode, "rsp_rpj", file))

# loop over the files and reproject, resample & export to NetCDF (the latter cant be done in GEE)
# TODO: mind the relative to MSL!
for file in os.listdir(os.path.join(main_fol, output_fol, mode)):
    if file.endswith(".tif"):
        xds = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, file), masked=True) # open the raster file, note CRS is EPSG:4326 by default, standard S2 & L8 crs
        rsp_rpj = xds.rio.reproject(crs, shape=(int(xds.rio.height / scale_factor), int(xds.rio.width / scale_factor))) # reproject to WGS84 and resample to scale factor
        #rsp_rpj.rio.resolution()
        rsp_rpj.rio.to_raster(os.path.join(main_fol, output_fol, mode, "rsp_rpj", file.replace(".tif", "_100m.tif")), driver="GTiff", compress="LZW") # save the raster file
        #rsp_rpj.plot()
        xds_rsp_rpj = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, "rsp_rpj", file.replace(".tif", "_100m.tif")), masked=True) # open the raster file
        xds_rsp_rpj.to_netcdf(os.path.join(main_fol, output_fol, mode, "rsp_rpj", "netcdf", file.replace(".tif", "_100m.nc")), mode="w", format='NetCDF4') # save the raster file

        # Open the GeoTIFF and NetCDF files
        #data_tif = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, "rsp_rpj", file.replace(".tif", "_100m.tif")))
        #data_nc = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, "rsp_rpj", "netcdf", file.replace(".tif", "_100m.nc")))

        # print the datasets
        #data_tif
        #data_nc

        # replace the fill value with NaN
        #data_nc_masked = data_nc.where(data_nc.values != data_nc.attrs["_FillValue"])  

        # test if equal data arrays (where matching nan's are true)
        #np.array_equal(data_nc_masked, data_tif, equal_nan=True)

        # plot the data
        #data_nc.plot()
        #data_nc_masked.plot()
        #data_tif.plot()


# Statistics

In [365]:
# to derive statistics (calculate areas (km2) and percetage coverage (%) for the test cases - 10 in total)
#V 1 use the global optical feasibility area (to be improved / replaced by intertidal feasibility area)
#V - map this on the AoI to have 10 cases
#V 2 running the automated processing and cleaning procedures on the 10 sites (masking, etc)
#V - calculate the number of non-nan pixels of the tiled AoI output (xds_cm, after clipping and masking) in the optical feasbility area of the AoI
#V - determine the percentage by dividing by the total number of pixels in the optical feasibility area of the AoI (out_image)
#V - calculate the km2 in the 10 cases studies and the total km2 in the case studies
#V - scale this to the globe, i.e. globe will be a similar percentage.. i.e. 34.7 % of the optical feasibility area!
#V - say something about the file sizes.. (export NC files)

#X (3.	Calculating the difference between 1 & 2 and give an estimation of the total global sqkm based on the 11 sites.)

# look into the worst site and the best one.. (Best; Wadden Sea, Tean, Okha, Worst processed; King William Island, Worst output; Cruz Bay, SaoPaulo, IleDePins, Mayotte, PrinceRupertIsland, WoodyCape)

In [375]:
# TODO: clip tiles covering the AOI to pixels in the global coastal mask
# TODO: statistics on the data (area, perc, etc..)
from shapely.geometry import box
import rasterio

limit = [-0.04, 0.14] # between -0.05 & 0.15

# compute which file intersects with the AOI feasOPT geometry and clip & mask it
image_cm = []
image_mask = []
for file in os.listdir(os.path.join(main_fol, "00_miscellaneous\AOI_feasOPT")):
    for shp in os.listdir(os.path.join(main_fol, "00_miscellaneous\AOI_feasOPT", file, "AOI_Id_0")):
        if shp.endswith(".shp"): #"WaddenSea" in file and 
            print(file)
            image_cms = [] 
            image_masks = []
            gdf = gpd.read_file(os.path.join(main_fol, "00_miscellaneous\AOI_feasOPT", file, "AOI_Id_0", shp)) # load the optical feasibility layer shapefile

            for fileAOI in os.listdir(os.path.join(main_fol, "00_miscellaneous\AOIs")):
                if fileAOI.endswith(".geojson"):
                    gdfAOI = gpd.read_file(os.path.join(main_fol, "00_miscellaneous\AOIs", fileAOI)) # load the AOI shapefile

                    gdf_mask = gdf.intersection(gdfAOI) # calculate intersection between AoIs
                    #print(check.is_empty[0])
                    if gdf_mask.is_empty[0] == False: # here we continue
                        #print(gdf_mask.is_empty, print(fileAOI))

                        for filetif in os.listdir(os.path.join(main_fol, output_fol, mode, "rsp_rpj")):
                            if filetif.endswith(".tif"):
                                xds = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, "rsp_rpj", filetif), masked=True)
                                gdf_mask = gdf_mask.to_crs(xds.rio.crs) # match crs's
                                boxed_geom_AOIs = box(*gdf_mask.geometry.unary_union.bounds) # get bounding box of the AOI
                                boxed_geom_tile = box(*xds.rio.bounds()) # get bounding box of the tiles

                                if boxed_geom_AOIs.contains(boxed_geom_tile) or boxed_geom_AOIs.intersects(boxed_geom_tile):
                                    print(filetif)
                                    # gdf_mask.plot()
                                    # plt.plot(*boxed_geom_AOIs.exterior.xy)
                                    # plt.plot(*boxed_geom_tile.exterior.xy)
                                    try: # eliminates the boxes where box AOIs intersect but not the AOIs itself (i.e. no data values in the tile)
                                        xds_clipped = xds.rio.clip(gdf_mask.geometry.unary_union.geoms, xds.rio.crs, drop=True) # clip the raster to the geometry
                                        xds_cm = xds_clipped.where((xds_clipped > limit[0]) & (xds_clipped < limit[1])) # mask the raster to the limits
                                        image_cms.append(xds_cm)
                                        with rasterio.open(os.path.join(main_fol, output_fol, mode, "rsp_rpj", filetif)) as src: # get raster file for total pixels
                                            out_image, transformed = rasterio.mask.mask(src, gdf_mask.geometry.unary_union.geoms, crop=True, filled=False)
                                            image_masks.append(out_image)
                                            #plt.imshow(out_image.mask.squeeze())
                                        xds_cm.rio.to_raster(os.path.join(main_fol, output_fol, mode, "rsp_rpj/clipped_masked", filetif.replace(".tif", "_cm.tif")), driver="GTiff", compress="LZW") # export tif
                                        xds_cm.to_netcdf(os.path.join(main_fol, output_fol, mode, "rsp_rpj/clipped_masked", "netcdf", filetif.replace(".tif", "_cm.nc")), mode="w", format='NetCDF4') # save the raster file as nc
                                    except:
                                        continue
                                    
            image_cm.append(image_cms)
            image_mask.append(image_masks)

CruzBay
z9_x163_y229_t2021-01-01_2022-01-01_100m.tif
z9_x164_y229_t2021-01-01_2022-01-01_100m.tif
IleDePins
z9_x493_y288_t2021-01-01_2022-01-01_100m.tif
z9_x493_y289_t2021-01-01_2022-01-01_100m.tif
z9_x494_y288_t2021-01-01_2022-01-01_100m.tif
z9_x494_y289_t2021-01-01_2022-01-01_100m.tif
KingWilliamIsland
z9_x114_y117_t2021-01-01_2022-01-01_100m.tif
z9_x114_y118_t2021-01-01_2022-01-01_100m.tif
z9_x115_y117_t2021-01-01_2022-01-01_100m.tif
z9_x115_y118_t2021-01-01_2022-01-01_100m.tif
Mayotte
z9_x319_y273_t2021-01-01_2022-01-01_100m.tif
z9_x319_y274_t2021-01-01_2022-01-01_100m.tif
z9_x320_y273_t2021-01-01_2022-01-01_100m.tif
z9_x320_y274_t2021-01-01_2022-01-01_100m.tif
Okha
z9_x354_y222_t2021-01-01_2022-01-01_100m.tif
z9_x354_y223_t2021-01-01_2022-01-01_100m.tif
PrinceRupertIsland
z9_x69_y162_t2021-01-01_2022-01-01_100m.tif
z9_x69_y163_t2021-01-01_2022-01-01_100m.tif
z9_x69_y164_t2021-01-01_2022-01-01_100m.tif
z9_x70_y162_t2021-01-01_2022-01-01_100m.tif
z9_x70_y163_t2021-01-01_2022-01-01_1

In [376]:
# actual statistics
iat = []
tat = []
for idx,i in enumerate(os.listdir(os.path.join(main_fol, "00_miscellaneous\AOI_feasOPT"))):
    print(idx, i)
    intertidal_area = []
    total_area = []
    for j, k in zip(image_cm[idx], image_mask[idx]):
        intertidal_area.append(np.count_nonzero(~np.isnan(j))) # number of non-nan pixels
        total_area.append(k.count()) # counts the total number of falses (i.e. non masked elements)
    print(sum(intertidal_area), sum(total_area), round(sum(intertidal_area)/sum(total_area)*100,1)) # pixels
    #print(intertidal_area, total_area)
    print((sum(intertidal_area)*100*100)/1000000, (sum(total_area)*100*100)/1000000) # km2
    iat.append(sum(intertidal_area))
    tat.append(sum(total_area))

print("")
print("Summed total for 10 cases:")
print(sum(iat), "intertidal pixels,", sum(tat), "total pixels,", round(sum(iat)/sum(tat)*100,1), "%")
print((sum(iat)*100*100)/1000000, "intertidal km2,", (sum(tat)*100*100)/1000000, "total intertidal km2")   

0 CruzBay
510 29894 1.7
5.1 298.94
1 IleDePins
1371 80100 1.7
13.71 801.0
2 KingWilliamIsland
181647 181705 100.0
1816.47 1817.05
3 Mayotte
1814 67194 2.7
18.14 671.94
4 Okha
20076 114724 17.5
200.76 1147.24
5 PrinceRupertIsland
8204 70554 11.6
82.04 705.54
6 SaoPaolo
143 35260 0.4
1.43 352.6
7 Tean
2778 29792 9.3
27.78 297.92
8 WaddenSea
29948 84562 35.4
299.48 845.62
9 WoodyCape
1764 22125 8.0
17.64 221.25

Summed total for 10 cases:
248255 intertidal pixels, 715910 total pixels, 34.7 %
2482.55 intertidal km2, 7159.1 total intertidal km2


In [325]:
# single file test

# dum_file = r"z9_x266_y165_t2021-01-01_2022-01-01_100m.tif"
# dum_file_mask = r"AOI_GER_WaddenSea.geojson"
# dum_file_AOI = r"WaddenSea"

# gdf = gpd.read_file(os.path.join(main_fol, "00_miscellaneous\AOI_feasOPT", dum_file_AOI, "AOI_Id_0", "AOI_0_Dissolved_-30_100.shp")) # load the optical feasibility layer shapefile
# gdfAOI = gpd.read_file(os.path.join(main_fol, "00_miscellaneous\AOIs", dum_file_mask))
# gdf_mask = gdf.intersection(gdfAOI) # calculate intersection between AoIs
# xds = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, "rsp_rpj", dum_file), masked=True)
# test = xds.rio.clip(gdf_mask.geometry.unary_union.geoms, xds.rio.crs, drop=True) # clip the raster to the geometry
# xds_cm = test.where((test > limit[0]) & (test < limit[1])) # mask the raster to the limits
# with rasterio.open(os.path.join(main_fol, output_fol, mode, "rsp_rpj", dum_file)) as src:
#      out_image, transformed = rasterio.mask.mask(src, gdf_mask.geometry.unary_union.geoms, crop=True, filled=False)
# #test.plot()

# print(np.count_nonzero(~np.isnan(xds_cm))) # number of non-nan pixels
# print(out_image.count()) # counts the falses (i.e. non masked elements)
# xds_cm.plot()
# # print(test)

In [387]:
# # Open the GeoTIFF and NetCDF files to check
# file = "z9_x266_y165_t2021-01-01_2022-01-01_100m.tif"
# data_tif = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, "rsp_rpj/clipped_masked", file.replace(".tif", "_cm.tif")))
# data_nc = rxr.open_rasterio(os.path.join(main_fol, output_fol, mode, "rsp_rpj/clipped_masked", "netcdf", file.replace(".tif", "_cm.nc")))

# # print the datasets
# #data_tif
# #data_nc

# # replace the fill value with NaN
# data_nc_masked = data_nc.where(data_nc.values != data_nc.attrs["_FillValue"])  

# # test if equal data arrays (where matching nan's are true)
# np.array_equal(data_nc_masked, data_tif, equal_nan=True)

# # plot the data
# #data_nc.plot()
# data_nc_masked.plot()
# #data_tif.plot()