In [0]:
from pyspark.sql.functions import *

from src.data.data_utils import write_delta_table

In [0]:
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
silver_table = "predicted_snuplasser_silver"
gold_table = "predicted_snuplasser_gold"

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {gold_table} (
    row_hash STRING,
    category STRING,
    geometry BINARY,
    diameter DOUBLE,
    source_file STRING,
    ingest_time TIMESTAMP,
    photo_time DATE
) USING DELTA
"""
spark.sql(q)

In [0]:
df_silver = spark.read.table(silver_table)

df_geom = df_silver.withColumn(
    "geometry", expr("ST_Point(centroid_x, centroid_y)")
)  # Lager en punktgeometri fra koordinatene
df_geom = df_geom.drop("centroid_x", "centroid_y")

df_category = df_geom.withColumn(
    "category",
    when((col("diameter") >= 20) & (col("diameter") < 25), "20-25")
    .when((col("diameter") >= 25) & (col("diameter") < 30), "25-30")
    .when((col("diameter") >= 30) & (col("diameter") < 35), "30-35")
    .when(col("diameter") >= 35, "35+"),
)  # Kategoriserer basert p√• diameter
df_category = df_category.filter(col("category").isNotNull())
df = df_category.withColumn("geometry", expr("ST_AsBinary(geometry)"))

In [0]:
write_delta_table(df, gold_table, id_col="row_hash")