In [0]:
import os
import sys

# Dynamically resolve notebook path
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
notebook_dir = "/Workspace" + os.path.dirname(notebook_path)

# Assuming src is a sibling of notebooks
src_path = os.path.abspath(os.path.join(notebook_dir, "..", "src"))
print(src_path)

if src_path not in sys.path:
    sys.path.append(src_path)

In [0]:
dataset_storage_account_name="melikadatabricksstorage"
dataset_container_name="geospatial-dataset"
dataset_input_dir="vector/uk"
dataset_output_dir="vector/uk"

catalog_storage_account_name = "melikadatabricksstorage"
catalog_container_name = "geospatial-catalog"
catalog_name = "geospatial"
schema_names = ["greenspaces", "heights", "lookups", "networks"]

In [0]:
spark.conf.set(f"spark.hadoop.fs.azure.account.auth.type.{dataset_storage_account_name}.dfs.core.windows.net", "ManagedIdentity")
spark.conf.set(f"spark.hadoop.fs.azure.account.oauth2.client.id.{dataset_storage_account_name}.dfs.core.windows.net","a6d7d9b6-d9a1-4711-a74b-3cdfc43f3dd8")


In [0]:
# Import from your package
from geo_ingest.geopackage_unzipper import GeoPackageUnzipper

# Set up and run the unzipper
unzipper = GeoPackageUnzipper(
    storage_account_name=dataset_storage_account_name,
    container_name=dataset_container_name,
    input_dir=dataset_input_dir,
    output_dir=dataset_output_dir,
    dbutils= dbutils
)

zip_files = ["bdline_gpkg_gb.zip", "opgrsp_gpkg_gb.zip", "opname_gpkg_gb.zip", "oproad_gpkg_gb.zip", "oprvrs_gpkg_gb.zip", "terr50_gpkg_gb.zip"]
unzipper.unzip_selected_and_upload(zip_files)

In [0]:
for item in schema_names:
    drop_query = f"""
        DROP SCHEMA IF EXISTS {catalog_name}.{item} CASCADE;
        """;
    spark.sql(drop_query)

    create_query = f"""
        CREATE SCHEMA IF NOT EXISTS {catalog_name}.{item}
        COMMENT 'This schema contains {item} data of the UK'
        MANAGED LOCATION 'abfss://{catalog_container_name}@{catalog_storage_account_name}.dfs.core.windows.net/';
        """;

    spark.sql(create_query)