In [1]:
import logging
import os

import click
import geopandas as gpd

from deafrica_waterbodies.attributes import (
    add_area_and_perimeter_attributes,
    add_timeseries_attribute,
    assign_unique_ids,
)
from deafrica_waterbodies.cli.logs import logging_setup
from deafrica_waterbodies.io import write_waterbodies_to_file

In [2]:
# some_file.py
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../deafrica_waterbodies/cli/logs.py')

In [3]:
import os

# These are the default AWS configurations for the Analysis Sandbox.
# that are set in the environmnet variables.
aws_default_config = {
    # "AWS_NO_SIGN_REQUEST": "YES",
    "AWS_SECRET_ACCESS_KEY": "fake",
    "AWS_ACCESS_KEY_ID": "fake",
}

# To access public bucket, need to remove the AWS credentials in
# the environment variables or the following error will occur.
# PermissionError: The AWS Access Key Id you provided does not exist in our records.

for key in aws_default_config.keys():
    if key in os.environ:
        del os.environ[key]

In [4]:
verbose = 1
output_directory = "s3://deafrica-waterbodies-dev/test_out_dir/0-0-1/shapefile2"
product_version = "0.0.1"
timeseries_bucket = "deafrica-waterbodies-dev"

In [5]:
# Set up logger.
logging_setup(verbose=verbose)
_log = logging.getLogger(__name__)

In [6]:
# Support pathlib paths.
output_directory = str(output_directory)

In [7]:
_log.info("Loading filtered waterbody polygons...")
filtered_polygons_fp = os.path.join(output_directory, "filtered_polygons.parquet")
filtered_polygons = gpd.read_parquet(filtered_polygons_fp)
_log.info(f"Waterbody polygons count {len(filtered_polygons)}.")

[2023-10-19 23:19:06,808] {1292984903.py:1} INFO - Loading filtered waterbody polygons...
[2023-10-19 23:19:23,202] {1292984903.py:4} INFO - Waterbody polygons count 6543.


In [None]:
waterbodies_gdf = assign_unique_ids(polygons=filtered_polygons)
waterbodies_gdf = add_area_and_perimeter_attributes(polygons=waterbodies_gdf)
waterbodies_gdf = add_timeseries_attribute(
    polygons=waterbodies_gdf,
    product_version=product_version,
    timeseries_bucket=timeseries_bucket,
)

In [None]:
# Reproject to EPSG:4326
waterbodies_gdf_4326 = waterbodies_gdf.to_crs("EPSG:4326")

In [None]:
# Write to disk.
write_waterbodies_to_file(
    waterbodies_gdf=waterbodies_gdf_4326,
    product_version=product_version,
    output_directory=output_directory,
)