In [0]:
%pip install python-dotenv

In [0]:
from pyspark.sql.functions import *

from src.data.data_utils import write_delta_table
from src.data.geometry_utils import (
    random_adjusted_bbox_centered,
    make_envelope,
    make_bbox,
    add_bbox_columns,
)
from src.data.token_utils import get_token
from src.data.image_utils import enrich_output

In [0]:
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
bronze_table = "polygons_bronze"
silver_table = "polygons_silver"

SUBDIR = {"image": "storreimage", "dom": "storredom", "mask": "storrelabel"}

bbox_length = 128
max_offset = 30
image_width, image_height = 512, 512

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {silver_table} (
    row_hash STRING,
    geometry BINARY,
    source_file STRING,
    source_layer STRING,
    bbox ARRAY<DOUBLE>,
    adjusted_struct STRUCT<bbox: ARRAY<DOUBLE>, bbox_str: STRING>,
    Adjusted_bbox ARRAY<DOUBLE>,
    bbox_str STRING,
    image_path STRING,
    dom_path STRING,
    mask_path STRING,
    dom_wms STRING,
    image_wms STRING,
    dom_status STRING,
    image_status STRING,
    mask_status STRING,
    ingest_time TIMESTAMP,
    photo_time DATE
) USING DELTA
"""
spark.sql(q)

In [0]:
def read_table_to_wkt():
    """
    Leser polygonene fra bronsetabellen og returnerer dem som en GeoDataFrame med WKT geometri.
    """
    df_bronze = spark.read.table(bronze_table).withColumn(
        "geometry", expr("ST_GeomFromWKT(geometry)")
    )
    return df_bronze

In [0]:
token = get_token()

df = read_table_to_wkt()
df = make_envelope(df, bbox_length)
df = make_bbox(df, bbox_length)
df = add_bbox_columns(df, bbox_length, max_offset)

df = enrich_output(df, token, "row_hash", SUBDIR, image_width, image_height)

df = df.withColumn("geometry", expr("ST_AsBinary(geometry)"))
df = df.dropDuplicates(["row_hash"])

write_delta_table(df, silver_table, "row_hash")