In [0]:
%pip install scikit-image==0.20.0

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, DateType

from src.data.data_utils import write_delta_table
from src.data.predict_utils import make_masks_grouped_udf

In [0]:
predicted_masks = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/predicted_helipads"
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
log_table = "logs_predicted_helipads"
bronze_table = "predicted_helipads_bronze"
silver_table = "predicted_helipads_silver"
hospitals_table = "hospitals_gold"

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {silver_table} (
    row_hash STRING,
    centroid_x DOUBLE,
    centroid_y DOUBLE,
    photo_time TIMESTAMP,
    source_file STRING,
    ingest_time TIMESTAMP
) USING DELTA
"""
spark.sql(q)

In [0]:
df_helipads = spark.read.table(bronze_table).filter(col("helipad"))

# Henter fototid for hver helikopterplass
df_photo_time = df_helipads.withColumn(
    "row_hash", expr("substring(source_file, 12, length(source_file)-15)")
).join(
    spark.read.table(hospitals_table).select("row_hash", "photo_time"),
    on="row_hash",
    how="left",
)

schema = StructType(
    [
        StructField("source_file", StringType(), False),
        StructField("photo_time", DateType(), True),
        StructField("geometry_wkt", StringType(), False),
        StructField("centroid_x", DoubleType(), False),
        StructField("centroid_y", DoubleType(), False),
    ]
)

df_grouped = df_photo_time.mapInPandas(
    make_masks_grouped_udf(predicted_masks_path=predicted_masks), schema=schema
)

df_clean = df_grouped.drop("geometry_wkt")
sdf = df_clean.withColumn("ingest_time", current_timestamp()).withColumn(
    "row_hash", sha2(concat_ws("||", *df_clean.columns), 256)
)

write_delta_table(sdf, silver_table, id_col="row_hash")