In [0]:
import os
import sys

# Dynamically resolve notebook path
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
notebook_dir = "/Workspace" + os.path.dirname(notebook_path)

# Assuming src is a sibling of notebooks
src_path = os.path.abspath(os.path.join(notebook_dir, "..", "src"))
print(src_path)

if src_path not in sys.path:
    sys.path.append(src_path)

In [0]:
catalog_name = "geospatial"
schema_names = ["greenspaces", "lookups", "networks"]

for item in schema_names:
    drop_query = f"""
        DROP SCHEMA IF EXISTS {catalog_name}.{item} CASCADE;
        """;
    spark.sql(drop_query)

    create_query = f"""
        CREATE SCHEMA IF NOT EXISTS {catalog_name}.{item}
        COMMENT 'This schema contains {item} data of the UK';
        """;

    spark.sql(create_query)

In [0]:
cloud_provider = dbutils.widgets.get("cloud_provider")
print(f"Cloud Provider: {cloud_provider}")

# Import from your package
from geo_ingest.geopackage_unzipper import GeoPackageUnzipper

if cloud_provider == "azure":    
    dataset_storage_account_name = "melikadatabricksstorage"
    dataset_container_name="geospatial-dataset"
    dataset_input_dir="vector/uk"
    dataset_output_dir="vector/uk"

    # For Azure
    unzipper = GeoPackageUnzipper(
        dbutils=dbutils,
        cloud_provider="azure",
        container_or_bucket=dataset_storage_account_name,
        storage_account_name=dataset_container_name,
        input_dir=dataset_input_dir,
        output_dir=dataset_output_dir
    )
elif cloud_provider == "aws":
    dataset_bucket_name = "revodata-databricks-geospatial"
    dataset_input_dir="geospatial-dataset/vector/uk"
    dataset_output_dir="geospatial-dataset/vector/uk"

zip_files = ["bdline_gpkg_gb.zip", "opgrsp_gpkg_gb.zip", "oproad_gpkg_gb.zip"]
unzipper.unzip_selected_and_upload(zip_files)