In [0]:
%run ../get_user

In [0]:
user_email = spark.sql("SELECT current_user()").collect()[0][0]
username = get_username_from_email(user_email)
print(username)

In [0]:
from sedona.spark import *
from sedona.maps.SedonaPyDeck import SedonaPyDeck
from sedona.maps.SedonaKepler import SedonaKepler
from pyspark.sql import functions as F
from sedona.sql import st_functions as st
from sedona.sql.types import GeometryType
from pyspark.sql.functions import expr


In [0]:
config = SedonaContext.builder() .\
    config('spark.jars.packages',
           'org.apache.sedona:sedona-spark-shaded-3.3_2.12:1.7.1,'
           'org.datasyslab:geotools-wrapper:1.7.1-28.5'). \
    getOrCreate()
sedona = SedonaContext.create(config)

In [0]:
administrative_boundaries  = spark.sql(f"""
  SELECT b.fid, b.name, ST_Area(b.geometry) AS area, geometry, ST_Geohash(ST_Transform(geometry,'epsg:27700','epsg:4326'), 5) AS geohash
  FROM geospatial.lookups.boundary_line_ceremonial_counties_{username} b 
""").repartitionByRange(2, "geohash")

In [0]:
total_locations = 1000
uk_arae = administrative_boundaries.selectExpr("SUM(area) AS total_area").first().total_area

administrative_boundaries = administrative_boundaries.withColumn(
    "number_of_locations",
    F.round(F.col("area") / uk_arae * F.lit(total_locations)).cast("integer")
).orderBy("number_of_locations", ascending=True)

administrative_boundaries.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"geospatial.lookups.boundary_line_ceremonial_counties_silver_{username}")

In [0]:
administrative_boundaries.createOrReplaceTempView("administrative_boundaries_vw")
display(administrative_boundaries)

In [0]:
df_greenspaces_bronze = spark.table(f"geospatial.greenspaces.greenspace_site_{username}") \
    .filter("function IN ('Play Space', 'Playing Field', 'Public Park Or Garden')")
df_greenspaces_bronze.createOrReplaceTempView("greenspace_site_bronze_vw")


In [0]:
greenspace_site_covered = spark.sql("""
  SELECT 
    g1.id AS g1_id,
    g2.id AS g2_id,
    g1.function AS g1_function,
    g2.function AS g2_function,
    g2.distinctive_name_1 AS g2_name,
    g2.geometry AS geometry,
    ST_Geohash(ST_Transform(g2.geometry ,'epsg:27700','epsg:4326'), 5) AS geohash
  FROM greenspace_site_bronze_vw g1
  INNER JOIN greenspace_site_bronze_vw g2
    ON ST_CoveredBy(g1.geometry, g2.geometry)
   AND g1.id != g2.id
""").repartitionByRange(10, "geohash")

greenspace_site_covered.createOrReplaceTempView("greenspace_site_covered_vw")

In [0]:
greenspace_site_aggregated = spark.sql("""
  SELECT 
    g2_id AS id,
    concat_ws(', ', any_value(g2_function), collect_set(g1_function)) AS functions,
    count(*) + 1 AS num_functions,
    g2_name AS name,
    ST_Area(geometry) AS area,
    geometry,
    ST_Geohash(ST_Transform(geometry,'epsg:27700','epsg:4326'), 5) AS geohash
  FROM greenspace_site_covered_vw
  GROUP BY g2_id, g2_name, geometry
""").repartitionByRange(10, "geohash")
greenspace_site_aggregated.createOrReplaceTempView("greenspace_site_aggregated_vw")


In [0]:
greenspace_site_non_covered = spark.sql("""
  SELECT id, function, 1 AS num_functions, distinctive_name_1 AS name, ST_Area(geometry) AS area, geometry,
  ST_Geohash(ST_Transform(geometry,'epsg:27700','epsg:4326'), 5) AS geohash
  FROM greenspace_site_bronze_vw
  WHERE id NOT IN (SELECT g1_id FROM greenspace_site_covered_vw)
    AND id NOT IN (SELECT g2_id FROM greenspace_site_covered_vw)
""").repartitionByRange(10, "geohash")
greenspace_site_non_covered.createOrReplaceTempView("greenspace_site_non_covered_vw")



In [0]:
greenspace_site_all = spark.sql("""
SELECT * FROM greenspace_site_aggregated_vw
UNION
SELECT * FROM greenspace_site_non_covered_vw""").repartitionByRange(10, "geohash")

# Calculate 0%, 20%, 40%, 60%, 80%, 100% quantiles
quantiles = greenspace_site_all.approxQuantile("area", [0.0, 0.2, 0.4, 0.6, 0.8, 1.0], 0.001)
print("Quintile breakpoints:", quantiles)

q0, q20, q40, q60, q80, q100 = quantiles

greenspace_site_all = greenspace_site_all.withColumn(
    "area_category",
    F.when(F.col("area") <= q20, 20)
     .when(F.col("area") <= q40, 40)
     .when(F.col("area") <= q60, 60)
     .when(F.col("area") <= q80, 80)
     .otherwise(100)
)

display(greenspace_site_all.groupBy("area_category").count().orderBy("area_category"))
greenspace_site_all.createOrReplaceTempView("greenspace_site_all_vw")


In [0]:
greenspace_site_silver = spark.sql("""
WITH tmp AS (
SELECT a.id, a.functions, a.num_functions, a.name, a.area, a.area_category, a.geometry, a.geohash,
RANK() OVER(PARTITION BY a.id ORDER BY ST_Area(ST_Intersection(a.geometry, b.geometry)) DESC) AS administrative_rank,
b.fid as administrative_fid
FROM greenspace_site_all_vw a
INNER JOIN administrative_boundaries_vw b
ON ST_Intersects(a.geometry, b.geometry))
SELECT tmp.id, tmp.functions, tmp.num_functions, tmp.name, tmp.area, tmp.area_category, tmp.administrative_fid, tmp.geometry, tmp.geohash
FROM tmp
WHERE  administrative_rank = 1
""").repartitionByRange(10, "geohash")

greenspace_site_silver.createOrReplaceTempView("greenspace_site_silver_vw")
greenspace_site_silver.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"geospatial.greenspaces.greenspace_site_silver_{username}")

In [0]:
road_nodes_silver = spark.sql(f"""
SELECT a.fid, a.id, a.form_of_road_node, COUNT(DISTINCT b.id) AS degree, a.geometry, ST_Geohash(ST_Transform(a.geometry,'epsg:27700','epsg:4326'), 5) AS geohash 
FROM geospatial.networks.road_node_{username} a
JOIN geospatial.networks.road_link_{username} b
ON a.id = b.start_node
OR a.id = b.end_node
GROUP BY a.fid, a.id, a.form_of_road_node, a.geometry
ORDER BY COUNT(DISTINCT b.id) DESC""").repartitionByRange(10, "geohash")

road_nodes_silver.write.mode("overwrite").saveAsTable(f"geospatial.networks.road_node_silver_{username}")

In [0]:
greenspace_entries = spark.sql(f"""
SELECT a.fid, a.id, a.access_type, a.ref_to_greenspace_site, a.geometry, ST_Geohash(ST_Transform(a.geometry,'epsg:27700','epsg:4326'), 5) AS geohash
FROM geospatial.greenspaces.access_point_{username} a
WHERE a.ref_to_greenspace_site IN (SELECT id FROM greenspace_site_silver_vw) AND a.access_type IN ('Pedestrian', 'Motor Vehicle And Pedestrian')""").repartitionByRange(10, "geohash")

greenspace_entries.createOrReplaceTempView("greenspace_entries_vw")


In [0]:
entry_road_1nn = spark.sql(f"""
SELECT
    a.fid,
    a.id,
    a.access_type,
    a.ref_to_greenspace_site,
    b.fid AS nearest_road_node_fid,
    ST_Distance(a.geometry, b.geometry) AS distance_to_road_node,
    a.geometry, 
    a.geohash
FROM greenspace_entries_vw a 
INNER JOIN geospatial.networks.road_node_silver_{username} b 
ON ST_kNN(a.geometry, b.geometry, 1, FALSE)""").repartitionByRange(10, "geohash")

entry_road_1nn.write.mode("overwrite").saveAsTable(f"geospatial.greenspaces.access_point_silver_{username}")