In [0]:
%pip install python-dotenv

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import StringType

from src.data.data_utils import write_delta_table
from src.data.token_utils import get_token
from src.data.image_utils import enrich_output

In [0]:
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
helipads_table = "helipads_silver"
noHelipads_table = "nohelipads"

BASE_PATH = (
    "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering"
)
SUBDIR = {"image": "noHelipad_images", "mask": "noHelipad_labels"}

bbox_length = 128
image_width, image_height = 512, 512

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {noHelipads_table} (
    row_hash STRING,
    geometry BINARY,
    bbox ARRAY<DOUBLE>,
    bbox_str STRING,
    image_path STRING,
    mask_path STRING,
    image_wms STRING,
    image_status STRING,
    mask_status STRING,
    ingest_time TIMESTAMP,
    photo_time TIMESTAMP
) USING DELTA
"""
spark.sql(q)

In [0]:
def read_table_to_wkt():
    """
    Leser polygonene fra helikopterplasstabellen og returnerer dem som en GeoDataFrame med WKT geometri.
    """
    df = (
        spark.read.table(helipads_table)
        .withColumn("geometry", expr("ST_GeomFromWKB(geometry)"))
        .select("row_hash", "geometry")
    )
    return df

In [0]:
def make_bbox(df: DataFrame, bbox_length: int) -> DataFrame:
    """
    Lager en boks rundt polygonen med en fast avstand fra sentrum.
    """
    df = df.withColumn(
        "bbox",
        expr(
            f"""
        array(
            ST_XMin(geometry) + {bbox_length},
            ST_YMin(geometry) + {bbox_length},
            ST_XMax(geometry) + {bbox_length},
            ST_YMax(geometry) + {bbox_length}
        )
        """
        ),
    )
    df = df.withColumn(
        "bbox_str",
        concat_ws(
            ",",
            format_string("%.6f", col("bbox")[0]),
            format_string("%.6f", col("bbox")[1]),
            format_string("%.6f", col("bbox")[2]),
            format_string("%.6f", col("bbox")[3]),
        ),
    )

    return df

In [0]:
token = get_token()

df = read_table_to_wkt()
df = df.withColumn("row_hash", reverse(col("row_hash")))
df = make_bbox(df, bbox_length)

df = enrich_output(df, token, "row_hash", SUBDIR, image_width, image_height)
df = df.withColumn("geometry", expr("ST_AsBinary(geometry)"))

write_delta_table(df, noHelipads_table, "row_hash")