In [0]:
from sedona.spark import *
from sedona.maps.SedonaPyDeck import SedonaPyDeck
from sedona.maps.SedonaKepler import SedonaKepler
from pyspark.sql import functions as F
from sedona.sql import st_functions as st
from sedona.sql.types import GeometryType
from pyspark.sql.functions import expr


In [0]:
config = SedonaContext.builder() .\
    config('spark.jars.packages',
           'org.apache.sedona:sedona-spark-shaded-3.3_2.12:1.7.1,'
           'org.datasyslab:geotools-wrapper:1.7.1-28.5'). \
    getOrCreate()
sedona = SedonaContext.create(config)

In [0]:
def transform_geometry_sql(df, geometry_col="geometry", temp_view_name="input_geometries"):
    """
    Transforms a Spark DataFrame by:
    - Removing the geometry column from output
    - Adding derived columns: geometry_ewkb, xmin, xmax, ymin, ymax using Sedona SQL

    Parameters:
    - df (DataFrame): Input Spark DataFrame with a geometry column of GeometryType
    - geometry_col (str): Name of the geometry column (default: "geometry")
    - temp_view_name (str): Temporary view name to use in SQL (default: "input_geometries")

    Returns:
    - DataFrame: Transformed DataFrame ready for write
    """
    # Register temporary view
    df.createOrReplaceTempView(temp_view_name)

    # Get all columns except the geometry column
    cols_to_select = [col for col in df.columns if col != geometry_col]
    select_expr = ",\n       ".join(cols_to_select)

    # Construct SQL query
    query = f"""
    SELECT 
        {select_expr},
        ST_AsEWKB({geometry_col}) AS geometry,
        ST_XMin({geometry_col}) AS xmin,
        ST_XMax({geometry_col}) AS xmax,
        ST_YMin({geometry_col}) AS ymin,
        ST_YMax({geometry_col}) AS ymax
    FROM {temp_view_name}
    """

    # Execute SQL and return the result
    return spark.sql(query)

In [0]:
road_nodes_silver = spark.sql("""
SELECT a.fid, a.id, a.form_of_road_node, COUNT(DISTINCT b.id) AS degree, ST_GeomFromEWKB(a.geometry) AS geometry, ST_Geohash(ST_Transform(ST_GeomFromEWKB(a.geometry),'epsg:27700','epsg:4326'), 5) AS geohash 
FROM geospatial.networks.road_node a
JOIN geospatial.networks.road_link b
ON a.id = b.start_node
OR a.id = b.end_node
GROUP BY a.fid, a.id, a.form_of_road_node, a.geometry
ORDER BY COUNT(DISTINCT b.id) DESC""").repartitionByRange(2, "geohash")

road_nodes_silver.createOrReplaceTempView("road_nodes_silver_vw")
road_nodes_silver.count()

In [0]:
df_road_nodes_ewkb = transform_geometry_sql(road_nodes_silver)
df_road_nodes_ewkb.write.mode("overwrite").saveAsTable(f"geospatial.networks.road_node_silver")