In [1]:
import logging
import os

import click
import datacube
import fsspec

import deafrica_waterbodies.io
import deafrica_waterbodies.make_polygons
from deafrica_waterbodies.cli.logs import logging_setup

In [2]:
import os

# These are the default AWS configurations for the Analysis Sandbox.
# that are set in the environmnet variables.
aws_default_config = {
    # "AWS_NO_SIGN_REQUEST": "YES",
    "AWS_SECRET_ACCESS_KEY": "fake",
    "AWS_ACCESS_KEY_ID": "fake",
}

# To access public bucket, need to remove the AWS credentials in
# the environment variables or the following error will occur.
# PermissionError: The AWS Access Key Id you provided does not exist in our records.

for key in aws_default_config.keys():
    if key in os.environ:
        del os.environ[key]

In [3]:
verbose = 1
primary_threshold: float = 0.1
secondary_threshold: float = 0.05
minimum_valid_observations: int = 128
output_directory = "s3://deafrica-waterbodies-dev/test_out_dir"
dataset_ids_text_file = "s3://deafrica-waterbodies-dev/test_out_dir/dataset_ids.txt"
overwrite = True

In [4]:
# Set up logger.
logging_setup(verbose=verbose)
_log = logging.getLogger(__name__)

In [5]:
# Support pathlib paths.
output_directory = str(output_directory)
dataset_ids_text_file = str(dataset_ids_text_file)

In [7]:
# Parameters to use when loading datasets.
dask_chunks = {"x": 3200, "y": 3200, "time": 1}
resolution = (-30, 30)
output_crs = "EPSG:6933"

In [8]:
# Read the dataset ids from the text file.
if not deafrica_waterbodies.io.check_file_exists(dataset_ids_text_file):
    _log.error(f"Could not find text file {dataset_ids_text_file}!")
    raise FileNotFoundError(f"Could not find text file {dataset_ids_text_file}!")
else:
    if deafrica_waterbodies.io.check_if_s3_uri(dataset_ids_text_file):
        fs = fsspec.filesystem("s3")
    else:
        fs = fsspec.filesystem("file")
    with fs.open(dataset_ids_text_file, "r") as file:
        lines = file.readlines()
        dataset_ids = [line.strip() for line in lines]

[2023-10-04 07:32:22,301] {credentials.py:620} INFO - Found credentials in shared credentials file: ~/.aws/credentials


In [9]:
# Directory to write generated waterbody polygons to.
polygons_from_thresholds_dir = os.path.join(output_directory, "polygons_from_thresholds")

In [10]:
# Set the filesystem to use.
if deafrica_waterbodies.io.check_if_s3_uri(polygons_from_thresholds_dir):
    fs = fsspec.filesystem("s3")
else:
    fs = fsspec.filesystem("file")

In [11]:
# Check if the directory exists. If it does not, create it.
if not deafrica_waterbodies.io.check_dir_exists(polygons_from_thresholds_dir):
    fs.mkdirs(polygons_from_thresholds_dir, exist_ok=True)
    _log.info(f"Created directory {polygons_from_thresholds_dir}")

In [12]:
# Check if the wetness thresholds have been set correctly.
minimum_wet_thresholds = [secondary_threshold, primary_threshold]
_log.info(deafrica_waterbodies.make_polygons.check_wetness_thresholds(minimum_wet_thresholds))

[2023-10-04 07:33:00,791] {2036669620.py:3} INFO - We will be running a hybrid wetness threshold. 
**You have set 0.1 as the primary threshold, which will define the location of the waterbody polygons 
 with 0.05 set as the supplementary threshold, which will define the extent/shape of the waterbody polygons.**


In [13]:
# Connect to the datacube.
dc = datacube.Datacube(app="GenerateWaterbodyPolygons")

In [14]:
# For each dataset id, threshold the scene to generate the primary and secondary threshold
# waterbody polygons.
for dataset_id in dataset_ids:
    primary_threshold_polygons_fp = os.path.join(
        polygons_from_thresholds_dir, f"{dataset_id}_primary_threshold_polygons.parquet"
    )
    secondary_threshold_polygons_fp = os.path.join(
        polygons_from_thresholds_dir, f"{dataset_id}_secondary_threshold_polygons.parquet"
    )

    if not overwrite:
        _log.info(
            f"Checking existence of {primary_threshold_polygons_fp} and {secondary_threshold_polygons_fp}"
        )
        exists = deafrica_waterbodies.io.check_file_exists(
            primary_threshold_polygons_fp
        ) and deafrica_waterbodies.io.check_file_exists(secondary_threshold_polygons_fp)

    if overwrite or not exists:
        (
            primary_threshold_polygons,
            secondary_threshold_polygons,
        ) = deafrica_waterbodies.make_polygons.get_polygons_using_thresholds(
            dataset_id=dataset_id,
            dask_chunks=dask_chunks,
            resolution=resolution,
            output_crs=output_crs,
            min_valid_observations=minimum_valid_observations,
            primary_threshold=primary_threshold,
            secondary_threshold=secondary_threshold,
            dc=dc,
        )
        # Write the polygons to parquet files.
        primary_threshold_polygons.to_parquet(primary_threshold_polygons_fp)
        secondary_threshold_polygons.to_parquet(secondary_threshold_polygons_fp)

[2023-10-04 07:33:45,642] {make_polygons.py:280} INFO - Generating water body polygons for dataset cd198bae-43a1-566a-8e8e-01b110bfbaf5
[2023-10-04 07:33:48,114] {make_polygons.py:280} INFO - Generating water body polygons for dataset fc10a5ae-00d0-5998-bbc0-b7d29f5807fb
[2023-10-04 07:33:49,911] {make_polygons.py:280} INFO - Generating water body polygons for dataset bb6c330e-f7c9-5164-85a6-a10e5ed36ce8
[2023-10-04 07:33:52,260] {make_polygons.py:280} INFO - Generating water body polygons for dataset 3180edab-0678-59a6-9cce-70437f6d8e8b
[2023-10-04 07:33:54,326] {make_polygons.py:280} INFO - Generating water body polygons for dataset f6d24d9a-4399-5d5e-9a0b-b4edfcea710f
[2023-10-04 07:33:56,238] {make_polygons.py:280} INFO - Generating water body polygons for dataset 68180140-e074-5c12-a5e7-8ed3d0dee5a9
[2023-10-04 07:33:58,396] {make_polygons.py:280} INFO - Generating water body polygons for dataset 7e5d3cf6-2dd0-5830-b6dc-39c1ecc69713
[2023-10-04 07:34:00,189] {make_polygons.py:280}