In [None]:
import os
import pathlib
import sys
import json
from posixpath import join as urljoin

# make modules importable when running this file as script
sys.path.append(r"C:\Users\rowe\OneDrive - Stichting Deltares\Documents\GitHub\coclicodata\src")

import pystac
from coclicodata.drive_config import p_drive
from coclicodata.etl.cloud_utils import dataset_from_google_cloud
from coclicodata.etl.extract import get_mapbox_url, zero_terminated_bytes_as_str
from pystac import Catalog, CatalogType, Collection, Summaries
from coclicodata.coclico_stac.io import CoCliCoStacIO
from coclicodata.coclico_stac.layouts import CoCliCoZarrLayout
from coclicodata.coclico_stac.templates import (
    extend_links,
    gen_default_collection_props,
    gen_default_item,
    gen_default_item_props,
    gen_default_summaries,
    gen_mapbox_asset,
    gen_zarr_asset,
    get_template_collection,
)
from coclicodata.coclico_stac.extension import CoclicoExtension
from coclicodata.coclico_stac.datacube import add_datacube
from coclicodata.coclico_stac.utils import (
    get_dimension_dot_product,
    get_dimension_values,
    get_mapbox_item_id,
    rm_special_characters,
)

if __name__ == "__main__":
    # hard-coded input params at project level
    BUCKET_NAME = "dgds-data-public"
    BUCKET_PROJ = "gca/SOTC"
    MAPBOX_PROJ = "global-data-viewer"

    # hard-coded input params at project level
    gca_data_dir = pathlib.Path(
        p_drive,
        r"11209197-018-global-coastal-atlas",
        r"MSc_students\ClenmarRowe\Data\All_Datasets",
        r"Orig_Datasets",
    )
    dataset_dir = gca_data_dir.joinpath(r"02_Exposure\Population\worldpop")

    # opening metadata
    metadata_fp = dataset_dir.joinpath("World_Pop_100m_metadata.json")
    with open(metadata_fp, "r") as f:
        metadata = json.load(f)

    # STAC configs
    STAC_DIR = "current"
    TEMPLATE_COLLECTION = "template"  # stac template for dataset collection
    COLLECTION_TITLE = metadata["TITLE"]
    COLLECTION_ID = metadata["TITLE_ABBREVIATION"]  # name of stac collection
    DATASET_DESCRIPTION = metadata["DESCRIPTION"]

In [None]:
def list_tiff_files(bucket_name, prefix=''):
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(bucket_name, prefix=prefix)

    tiff_files = []
    for blob in blobs:
        if blob.name.endswith('.tif') or blob.name.endswith('.tiff'):
            tiff_files.append(blob.name)

    return tiff_files

bucket_name = 'dgds-data-public'
prefix = 'gca/SOTC/Haz-Land_Sub_2010_COGs/'  # Prefix for the folder
tiff_files = list_tiff_files(bucket_name, prefix)

# Print the names of all TIFF files
items=[]

for tiff_file in tiff_files:
    print((tiff_file.replace(prefix,"")))
    items.append(tiff_file.replace(prefix,""))
# %%
stac_io = CoCliCoStacIO()
layout = CoCliCoCOGLayout()

collection = create_collection()

for i in items:
    collection.add_item(i)

collection.update_extent_from_items()


catalog = pystac.Catalog.from_file(str(STAC_DIR / "catalog.json"))

if catalog.get_child(collection.id):
    catalog.remove_child(collection.id)
    print(f"Removed child: {collection.id}.")

catalog.add_child(collection)

collection.normalize_hrefs(str(STAC_DIR / collection.id), strategy=layout)

catalog.save(
    catalog_type=pystac.CatalogType.SELF_CONTAINED,
    dest_href=str(STAC_DIR),
    stac_io=stac_io,
)

# %%
# TODO: # check coastal_mask_stacs.py validate funcs with coclico_new..
collection.validate_all()

# # %%
catalog.validate_all()
