In [0]:
from src.data.data_utils import write_to_sdf, write_to_delta_table
from src.data.log_utils import check_for_new_gdbs

In [0]:
gcs_landing_zone = "/Volumes/land_auto-gen-kart_dev/external_dev/landing_zone"
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
bronze_table = "hospitals_bronze"
log_table = "logs_processed_hospital_gdbs"
layer = "BygningsPunkt_Sykehus_med_akuttmottak"

layer_crs = 25833

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {bronze_table} (
    row_hash STRING,
    source_file STRING,
    source_layer STRING,
    geometry BINARY,
    ingest_time TIMESTAMP
) USING DELTA
"""
spark.sql(q)

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {log_table} (
  gdb_name STRING,
  processed_time TIMESTAMP,
  num_inserted INT,
  num_updated INT,
  num_deleted INT
) USING DELTA
"""
spark.sql(q)

In [0]:
def main():
    """
    Finner nye geodatabaser og skriver til deltatabellen.
    """
    gdbs = check_for_new_gdbs(gcs_landing_zone, log_table, "Sykehus")
    for gdb in gdbs:
        gdb_name = gdb.rstrip("/").split("/")[-1]
        gdb_path = gdb.removeprefix("dbfs:")

        sdf = write_to_sdf(gdb_path, gdb_name, layer, bronze_table, layer_crs)
        write_to_delta_table(sdf, gdb_name, bronze_table, log_table, "row_hash")

In [0]:
main()