## Goal

Identify and rank the top public parks in each ceremonial county based on area and functional diversity, and determine the most accessible entrance for each top-ranked park using geospatial and network data.

In [0]:
%run ../get_user

In [0]:
# Getting the current user
user_email = spark.sql("SELECT current_user()").collect()[0][0]
username = get_username_from_email(user_email)
print(username)

In [0]:
# This code initializes the Apache Sedona context for geospatial processing in Spark.
# It configures the required Sedona and GeoTools packages, then creates a SedonaContext object for reading and manipulating geospatial data in Spark DataFrames.

from sedona.spark import *
from pyspark.sql.functions import expr

config = SedonaContext.builder() .\
    config('spark.jars.packages',
           'org.apache.sedona:sedona-spark-shaded-3.3_2.12:1.8.0,'
           'org.datasyslab:geotools-wrapper:1.8.0-33.1'). \
    getOrCreate()

sedona = SedonaContext.create(config)

In [0]:
# Query parks data, ranking top public parks in each ceremonial county by area and functional diversity,
# and calculating accessibility degree using geospatial and network data.
top_parks_gold = spark.sql(
    f"""
    WITH tmp AS (
        SELECT 
            gs.id,
            gs.name AS park_name,
            gs.functions,
            gs.num_functions,
            gs.area_category,
            SUM(rn.degree) AS accessibility_degree,
            cc.name AS county,
            cc.number_of_locations,
            ROW_NUMBER() OVER (
                PARTITION BY cc.name 
                ORDER BY gs.area_category DESC, gs.num_functions DESC, SUM(degree) DESC
            ) AS park_rank,
            ST_AsEWKB(gs.geometry) AS geometry
        FROM geospatial.greenspaces.greenspace_site_silver_{username} gs
        LEFT JOIN geospatial.greenspaces.access_point_silver_{username} ga
            ON gs.id = ga.ref_to_greenspace_site
        LEFT JOIN geospatial.networks.road_node_silver_{username} rn
            ON ga.nearest_road_node_fid = rn.fid
        LEFT JOIN geospatial.lookups.boundary_line_ceremonial_counties_silver_{username} cc
            ON gs.administrative_fid = cc.fid
        GROUP BY 
            gs.name, gs.functions, gs.num_functions, gs.area_category, 
            cc.number_of_locations, cc.name, gs.geometry, gs.id, cc.name
    )
    SELECT * FROM tmp WHERE park_rank <= number_of_locations
    """
)

top_parks_gold.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(
    f"geospatial.greenspaces.top_parks_gold_{username}"
)

In [0]:
# Selects the most accessible entrance for each top-ranked park based on network degree and proximity, and saves results as a table.

top_entrances_gold = spark.sql(
    f"""
    WITH A AS (
        SELECT 
        ga.id,
        ga.ref_to_greenspace_site AS park_id,
        gs.park_rank,
        ga.access_type,
        rn.degree,
        ga.distance_to_road_node,
        row_number() OVER (PARTITION BY gs.id ORDER BY rn.degree DESC, ga.distance_to_road_node ASC) AS entry_rank,
        ST_AsEWKB(ga.geometry) AS geometry,
        ST_X(ST_Transform(ga.geometry,'epsg:27700','epsg:4326')) AS longitude,
        ST_Y(ST_Transform(ga.geometry,'epsg:27700','epsg:4326')) AS latitude
        FROM geospatial.greenspaces.top_parks_gold_{username} gs
        LEFT JOIN geospatial.greenspaces.access_point_silver_{username} ga
        ON gs.id = ga.ref_to_greenspace_site
        LEFT JOIN geospatial.networks.road_node_silver_{username} rn
        ON ga.nearest_road_node_fid = rn.fid) 
        SELECT * FROM A WHERE entry_rank = 1
        """)

top_entrances_gold.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(
    f"geospatial.greenspaces.top_entrances_gold_{username}"
)