## Coastal Mask

Notebook environment to migrate TIF files to CF compliant CoG

In [50]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

### Configure OS independent paths

In [1]:
# Import standard packages
import os
import pathlib
import sys
import json
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from dotenv import load_dotenv
import math
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import rioxarray as rio
#load_dotenv()

# Import custom functionality
from coclicodata.drive_config import p_drive
from coclicodata.etl.cf_compliancy_checker import check_compliancy, save_compliancy
from coastmonitor.io.utils import name_block

# Define (local and) remote drives
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")

# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    pathlib.Path().home().joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)

# use local or remote data dir
use_local_data = False
ds_dirname = "WP4"

if use_local_data: 
    ds_dir = pathlib.Path().home().joinpath("data", "tmp", ds_dirname)
else: 
    ds_dir = coclico_data_dir.joinpath(ds_dirname)

if not ds_dir.exists():
    raise FileNotFoundError("Directory with data does not exist.")

# directory to export result (make if not exists)
cog_dir = ds_dir.joinpath("cog") # for checking CF compliancy
cog_dirs = ds_dir.joinpath("cogs_final") # for making all files CF compliant
cog_dir.mkdir(parents=True, exist_ok=True)


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
print('Data is already CF_compliant')

Data is already CF_compliant


In [3]:
# Set up file structure for coastal flooding hazard maps

import glob
import rioxarray
import rasterio
from datacube.utils.cog import write_cog

def generate_slices(num_chunks: int, chunk_size: int) -> Tuple[slice, slice]:
    """Generate slices for chunk-based iteration."""
    for i in range(num_chunks):
        yield slice(i * chunk_size, (i + 1) * chunk_size)

def get_paths(folder_structure, base_dir=''):
    """Generate paths for a folder structure defined by a dict"""
    paths = []
    for key, value in folder_structure.items():
        if isinstance(value, dict):
            paths.extend(get_paths(value, os.path.join(base_dir, key)))
        elif isinstance(value, list):
            if value:
                for item in value:
                    if item != "":
                        paths.append(os.path.join(base_dir, key, item))
            else:
                paths.append(os.path.join(base_dir, key))
        else:
            continue
    return paths

# List different types on map folders
map_types = [   'HIGH_DEFENDED_MAPS',
                'LOW_DEFENDED_MAPS',
                'UNDEFENDED_MAPS']

# List all tif files present in first folder (note: it is assumed that the same files are present in all folders)
tif_list = glob.glob(str(ds_dir.joinpath("data", map_types[0],"*.tif")))

# List the desired folder structure as a dict
# NOTE: make sure the resulting path_list (based on folder structure) matches the tif_list
folder_structure = {
    "Mean_spring_tide": [],
    "RP": ["1000", "100", "1"],
    "SLR": {
        "High_end": ["2100", "2150"],
        "SSP126": ["2100"],
        "SSP245": ["2050", "2100"],
        "SSP585": ["2030", "2050", "2100"]
    }
}

# Get list of paths for the folder structure
path_list = get_paths(folder_structure)



In [4]:
# NOTE: Not all meta_data.json files were correct and will break the loop. 
# CHECK if all meta_data 

for map_type in map_types:

    # Get list of original tif's per map_type
    tif_list = glob.glob(str(ds_dir.joinpath("data", map_type,"*.tif")))
    
    for cur_path, cur_tif in zip(path_list, tif_list):
        
        print('trying to open: ' + str(os.path.basename(cur_tif.replace('tif','json'))))
        
        # Load meta data
        cur_meta_data = open(os.path.join(os.path.dirname(cur_tif),'Metadata',os.path.basename(cur_tif.replace('tif','json'))))
        cur_meta = json.load(cur_meta_data)

        if map_type == map_types[-1] and cur_tif == tif_list[-1]:
            print('All .json files are working')

trying to open: Mean_spring_tide_HD.json
trying to open: RP1000_HD.json
trying to open: RP100_HD.json
trying to open: RP1_HD.json
trying to open: SLR_High-End_2100_subs_2050_HD.json
trying to open: SLR_High-End_2150_subs_2050_HD.json
trying to open: SLR_SSP126_2100_subs_2050_HD.json
trying to open: SLR_SSP245_2050_subs_HD.json
trying to open: SLR_SSP245_2100_subs_2050_HD.json
trying to open: SLR_SSP585_2030_subs_HD.json
trying to open: SLR_SSP585_2050_subs_HD.json
trying to open: SLR_SSP585_2100_subs_2050_HD.json
trying to open: Mean_spring_tide_LD.json
trying to open: RP1000_LD.json
trying to open: RP100_LD.json
trying to open: RP1_LD.json
trying to open: SLR_High-End_2100_subs_2050_LD.json
trying to open: SLR_High-End_2150_subs_2050_LD.json
trying to open: SLR_SSP126_2100_subs_2050_LD.json
trying to open: SLR_SSP245_2050_subs_LD.json
trying to open: SLR_SSP245_2100_subs_2050_LD.json
trying to open: SLR_SSP585_2030_subs_LD.json
trying to open: SLR_SSP585_2050_subs_LD.json
trying to op

In [32]:
# DO THE WORK

# Iterate over the original tif files
for map_type in [map_types[0]]:
    
    # Get list of original tif's per map_type
    tif_list = glob.glob(str(ds_dir.joinpath("data", map_type,"*.tif")))

    for cur_path, cur_tif in zip([path_list[3]], [tif_list[3]]):

        print('currently working on: '+str(cur_path)+' '+str(cur_tif))
        
        cur_dir = pathlib.Path(os.path.join(cog_dirs,map_type,cur_path))
        cur_dir.mkdir(parents=True,exist_ok=True)

        fm = rioxarray.open_rasterio(
            cur_tif, mask_and_scale=False
        )  # .isel({"x":slice(0, 40000), "y":slice(0, 40000)})
        fm = fm.assign_coords(band=("band", [f"B{k+1:02}" for k in range(1)])) # NOTE: hard coded to 1, because one band
        fm = fm.to_dataset("band")

        # chunk size 
        chunk_size = 2**15 # 16384, which is large, but OK for int8 datatype.

        fm_chunked = fm.chunk({"x": chunk_size, "y": chunk_size})

        num_x_chunks = math.ceil(fm_chunked.dims["x"] / chunk_size)
        num_y_chunks = math.ceil(fm_chunked.dims["y"] / chunk_size)

        # Load meta data
        cur_meta_data = open(os.path.join(os.path.dirname(cur_tif),'Metadata',os.path.basename(cur_tif.replace('tif','json'))))
        cur_meta = json.load(cur_meta_data)

        for x_slice in generate_slices(num_x_chunks, chunk_size):
            for y_slice in generate_slices(num_y_chunks, chunk_size):
                chunk = fm_chunked.isel(x=x_slice, y=y_slice)

                chunk = chunk.assign_coords(time=pd.Timestamp(2024, 3, 18).isoformat())

                for var in chunk:

                    da = chunk[var]
                    
                    da = (
                        da.where(da != chunk.attrs['_FillValue'],-9999)
                        .astype("float32")
                        .rio.write_nodata(-9999)
                        .rio.set_spatial_dims(x_dim="x", y_dim="y")
                    )

                    item_name = name_block(
                        da,
                        storage_prefix="",
                        name_prefix=da.name,
                        include_band=None, 
                        time_dim=False,
                        x_dim="x",
                        y_dim="y",
                    )

                    # hacky fix to get rid of the espg=None string
                    if "None" in item_name:
                        item_name = item_name.replace("None", str(rasterio.crs.CRS(da.rio.crs).to_epsg()))

                    print(item_name)

                    # convert to dataset
                    dad = da.to_dataset()

                    # add all attributes (again)
                    for attr_name, attr_val in cur_meta.items():
                        if attr_name == 'PROVIDERS':
                            attr_val = json.dumps(attr_val)
                        if attr_name == "MEDIA_TYPE": # change media type to tiff, leave the rest as is
                            attr_val = "IMAGE/TIFF"
                        dad.attrs[attr_name] = attr_val

                    dad.attrs['Conventions'] = "CF-1.8"

                    # make parent dir if not exists

                    outpath = cur_dir.joinpath(item_name)
                    outpath.parent.mkdir(parents=True, exist_ok=True)

                    # export file
                    dad.rio.to_raster(outpath, compress="DEFLATE", driver="COG")

                    # set overwrite is false because tifs should be unique
                    # try:
                    #     write_cog(da, fname=outpath, overwrite=False).compute()
                    # except OSError as e:
                    #     continue


currently working on: RP\1 P:\11207608-coclico\FULLTRACK_DATA\WP4\data\HIGH_DEFENDED_MAPS\RP1_HD.tif


  num_x_chunks = math.ceil(fm_chunked.dims["x"] / chunk_size)
  num_y_chunks = math.ceil(fm_chunked.dims["y"] / chunk_size)


B01_epsg=3035_x=4023387_y=4596462.tif
B01_epsg=3035_x=4023387_y=3777262.tif
B01_epsg=3035_x=4023387_y=2958062.tif
B01_epsg=3035_x=4023387_y=2138862.tif
B01_epsg=3035_x=4023387_y=1425012.tif
B01_epsg=3035_x=4842587_y=4596462.tif
B01_epsg=3035_x=4842587_y=3777262.tif
B01_epsg=3035_x=4842587_y=2958062.tif
B01_epsg=3035_x=4842587_y=2138862.tif
B01_epsg=3035_x=4842587_y=1425012.tif
B01_epsg=3035_x=5661787_y=4596462.tif
B01_epsg=3035_x=5661787_y=3777262.tif
B01_epsg=3035_x=5661787_y=2958062.tif
B01_epsg=3035_x=5661787_y=2138862.tif
B01_epsg=3035_x=5661787_y=1425012.tif
B01_epsg=3035_x=6480987_y=4596462.tif
B01_epsg=3035_x=6480987_y=3777262.tif
B01_epsg=3035_x=6480987_y=2958062.tif
B01_epsg=3035_x=6480987_y=2138862.tif
B01_epsg=3035_x=6480987_y=1425012.tif


In [31]:
chunk.attrs['_FillValue']


3.0