This notebook is for calculating distances, elevations and timings between Dovetrek checkpoints. It then produces a routecard for a set route.

###Contents:
* [Imports](#Imports)
* [Start SparkSession](#Start-SparkSession)
* [Set Parameters](#set-params)
* [Get Secrets](#get-secrets)
* [Fetch Checkpoint Information](#fetch-cp-info)
* [Convert BNG to NGR](#bgn-to-ngr)
* [Convert NGR to Latitude/Logitude](#ngr-to-latlong)
* [Create Matrix of Checkpoint combinations](#cp-combos)
* [Fetch Distances & Elevations from Google Maps API](#D&E-google)
* [Fetch Distances & Elevations from OpenRouteService API](#D&E-ors)
* [Fetch Distances & Elevations from Bing Maps API](#D&E-bing)
* [Calculate leg timings for each Checkpoint Combination](#cp-combo-times)
* [Pick a route](#pick-route)
* [Calculate route timings](#route-timings)
* [Calculate checkpoint arrival and departure times for route](#route-times)

### Imports <a class="anchor" id="imports"></a>

In [None]:
from pyspark.sql.functions import col, expr, first
from pyspark.sql import functions as F, DataFrame, Window, SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, FloatType, ArrayType, TimestampType
from datetime import datetime, time, timedelta
import requests
import time
import ipywidgets
import base64
import getpass
import builtins
import json
import itertools
from functools import reduce
import numpy as np

### Start SparkSession <a class="anchor" id="start-sparksession"></a>

In [None]:
try:
    SparkSession.active()
except:
    spark = SparkSession.builder \
        .appName("BinderPySpark") \
        .config("spark.sql.execution.arrow.pyspark.enabled", "true") \
        .getOrCreate()
else:
    SparkSession.active()
finally:
    # Test if Spark is working
    df = spark.createDataFrame([(1, "Alice"), (2, "Bob")], ["id", "name"])
    df.show()
    df.unpersist()

+---+-----+
| id| name|
+---+-----+
|  1|Alice|
|  2|  Bob|
+---+-----+



### Set Parameters <a class="anchor" id="set-params"></a>

In [None]:
Year_List = ["2025", "2024", "2019", "2018", "2017"]
API_Service_List = ["Bing Maps","Google Maps", "OpenRouteService"]

try:
  dbutils.widgets.dropdown("Competition_Year", "2025", Year_List)
  Competition_Year = dbutils.widgets.get("Competition_Year")

  dbutils.widgets.dropdown("API_Service", "Google Maps", API_Service_List)
  API_Service = dbutils.widgets.get("API_Service")

  dbutils.widgets.text("Dwell", "7")
  StopTimeAtCheckPoints = dbutils.widgets.get("Dwell")

  dbutils.widgets.text("Speed", "5.30")
  Speed = dbutils.widgets.get("Speed")

  dbutils.widgets.text("StartTime", "10:00:00")
  StartTime =  datetime.strptime(dbutils.widgets.get("StartTime"), '%H:%M:%S')

  dbutils.widgets.text("Naismith", "10")
  AddMinutesPer100mHeight = dbutils.widgets.get("Naismith")

except:
  Competition_Year_Picker = ipywidgets.Dropdown(options=Year_List, value = '2024', description = 'Pick a competition year:')
  API_Service_Picker = ipywidgets.Dropdown(options=API_Service_List, value = "Google Maps", description = 'Pick an API Service for distance & elevations:')
  StopTimePicker = ipywidgets.IntSlider(value = 7
                                            , min = 0
                                            , max= 20
                                            , description = 'Pick a time in minutes to stop at checkpoints'
                                            , readout = True
                                          , step = 1)
  NaismithPicker = ipywidgets.IntSlider(value = 10
                                            , min = 0
                                            , max= 20
                                            , description = 'Add minutes per 100m elevation gain'
                                            , readout = True
                                          , step = 1)
  SpeedPicker = ipywidgets.FloatSlider(
                                          value=5.3,
                                          min=0,
                                          max=10.0,
                                          step=0.1,
                                          description='Hiking speed:',
                                          disabled=False,
                                          continuous_update=False,
                                          orientation='horizontal',
                                          readout=True,
                                          readout_format='.1f',
                                      )
  StartTimePicker = ipywidgets.Text(
                                              description='Type a StartTime in the format HH:MM:SS',
                                              value = '10:00:00',
                                              disabled=False
                                          )
  display(Competition_Year_Picker)
  display(API_Service_Picker)
  display(StopTimePicker)
  display(NaismithPicker)
  display(StartTimePicker)
  display(SpeedPicker)

  input("Fill and ctrl + enter when all parameters are set to the desired state")

  Competition_Year = Competition_Year_Picker.value
  API_Service = API_Service_Picker.value
  StopTimeAtCheckPoints = StopTimePicker.value
  AddMinutesPer100mHeight = NaismithPicker.value
  Speed = SpeedPicker.value
  StartTime = datetime.strptime(StartTimePicker.value, '%H:%M:%S')

finally:

  FinishTime = StartTime + timedelta(hours=7)


Dropdown(description='Pick a competition year:', index=1, options=('2025', '2024', '2019', '2018', '2017'), va…

Dropdown(description='Pick an API Service for distance & elevations:', index=1, options=('Bing Maps', 'Google …

IntSlider(value=7, description='Pick a time in minutes to stop at checkpoints', max=20)

IntSlider(value=10, description='Add minutes per 100m elevation gain', max=20)

Text(value='10:00:00', description='Type a StartTime in the format HH:MM:SS')

FloatSlider(value=5.3, continuous_update=False, description='Hiking speed:', max=10.0, readout_format='.1f')

Fill and ctrl + enter when all parameters are set to the desired state 

### Get Secrets <a class="anchor" id="get-secrets"></a>

In [None]:
GitHubPAT = getpass.getpass("Enter your GitHub Secrets Repo PAT: ")  # Prompt for PAT if not set

# 🔹 GitHub API URL to get the file
url = f"https://api.github.com/repos/liamj-f/Secrets/contents/Dovetrek_Secrets.json"

# 🔹 GitHub API headers
headers = {"Authorization": f"token {GitHubPAT}"}

# 🔹 Fetch the secrets file
response = requests.get(url, headers=headers)

if response.status_code == 200:
    # Decode Base64 content
    content = response.json()["content"]
    decoded_content = base64.b64decode(content).decode("utf-8")

    # Load JSON into a dictionary
    secrets = json.loads(decoded_content)

    # 🔹 Assign values to variables
    BingMapsAPIKey = secrets.get("BingMApsAPIKey", "")
    AzureMapsAPIKey = secrets.get("AzureMapsAPIKey", "")
    OrdnanceSurveyAPIKey = secrets.get("OrdnanceSurveyAPIKey", "")
    OpenRouteServiceAPIKey = secrets.get("OpenRouteServiceAPIKey", "")
    GoogleMapsAPIKey = secrets.get("GoogleMapsAPIKey", "")
    GitHubRepo_Dovetrek_LJF_PAT = secrets.get("GitHubRepo-Dovetrek-LJF-PAT", "")

    print("✅ Secrets loaded successfully!")

else:
    print(f"❌ Error: {response.status_code} - {response.text}")


Enter your GitHub Secrets Repo PAT:  [REDACTED]

✅ Secrets loaded successfully!


### Fetch Checkpoint information <a class="anchor" id="fetch-cp-info"></a>

In [None]:
# GitHub API URL for file content
url = f"https://api.github.com/repos/liamj-f/Dovetrek/contents/Openings_{Competition_Year}.csv"

# Headers with authentication
headers = {"Authorization": f"token {GitHubRepo_Dovetrek_LJF_PAT}"}

# Fetch file content
response = requests.get(url, headers=headers)
content = response.json()["content"]
decoded_content = base64.b64decode(content).decode("utf-8")
csv_lines = decoded_content.split("\n")
# Create an RDD from the list
rdd = spark.sparkContext.parallelize(csv_lines)
# Convert RDD to DataFrame
openings_df = spark.read.csv(rdd, header=True, inferSchema=True)

### Convert BNG to NGR <a class="anchor" id="bng-to-ngr"></a>
The actual grid square identifiers are missing from the datasets because Dovetrek always takes place in the same area, so conversion from letter to number not necessary.

In [None]:
openings_df = openings_df.withColumn("NGR_Easting", F.concat(F.lit("4"), F.substring(openings_df.BNG, 1, 3), F.lit("00")).cast("double")) \
                    .withColumn("NGR_Northing", F.concat(F.lit("3"), F.substring(openings_df.BNG, 5, 3), F.lit("00")).cast("double"))
display(openings_df)

CP,BNG,1000,1030,1100,1130,1200,1230,1300,1330,1400,1430,1500,1530,1600,1630,1700,NGR_Easting,NGR_Northing
Start,258 779,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,425800.0,377900.0
CP1,251 801,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,425100.0,380100.0
CP2,265 801,1,1,1,1,1,1,0,0,0,1,1,0,0,0,0,426500.0,380100.0
CP3,268 789,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,426800.0,378900.0
CP4,262 747,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,426200.0,374700.0
CP5,254 775,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,425400.0,377500.0
CP6,251 786,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,425100.0,378600.0
CP7,247 801,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,424700.0,380100.0
E1,251 807,0,1,1,1,0,0,1,1,1,0,0,1,1,1,0,425100.0,380700.0
E2R,274 814,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,427400.0,381400.0


### Convert NGR to Latitude/Longitude <a class="anchor" id="NGR-to-LatLong"></a>

In [None]:
LatLong_Df = spark.createDataFrame(
    [(1, 1.0, 1.0)],
    schema='CP string, Latitude float, Longitude float'
)
LatLong_Df = LatLong_Df.filter('1!=1')

# Convert DataFrame to a list
gridrefs_list = openings_df.select(
    openings_df.CP,
    openings_df.NGR_Easting,
    openings_df.NGR_Northing
).collect()

# Iterate over the list
for row in gridrefs_list:
    # Perform desired operations with the column values
    resp = requests.get(
        "http://webapps.bgs.ac.uk/data/webservices/CoordConvert_LL_BNG.cfc?method=BNGtoLatLng&easting="
        + str(row.NGR_Easting)
        + "&northing="
        + str(row.NGR_Northing)
    )

    # Parse the JSON response
    json_resp = json.loads(resp.text)

    # Check if 'LATITUDE' and 'LONGITUDE' keys exist in the response
    if 'LATITUDE' in json_resp and 'LONGITUDE' in json_resp:
        # Extract latitude and longitude values
        latitude = json_resp['LATITUDE']
        longitude = json_resp['LONGITUDE']
        cp = row["CP"]

        df = spark.createDataFrame(
            [(cp, latitude, longitude)],
            'CP string, Latitude float, Longitude float'
        )
        LatLong_Df = LatLong_Df.union(df)

openings_df = openings_df.join(LatLong_Df, openings_df.CP == LatLong_Df.CP).drop(LatLong_Df.CP)

LatLong_Df.unpersist()
# Show DataFrame
display(openings_df)

CP,BNG,1000,1030,1100,1130,1200,1230,1300,1330,1400,1430,1500,1530,1600,1630,1700,NGR_Easting,NGR_Northing,Latitude,Longitude
Start,258 779,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,425800.0,377900.0,53.297432,-1.6143645
CP1,251 801,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,425100.0,380100.0,53.31724,-1.6246936
CP2,265 801,1,1,1,1,1,1,0,0,0,1,1,0,0,0,0,426500.0,380100.0,53.317173,-1.6036792
CP3,268 789,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,426800.0,378900.0,53.30637,-1.5992771
CP4,262 747,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,426200.0,374700.0,53.26865,-1.6086259
CP5,254 775,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,425400.0,377500.0,53.293858,-1.6203977
CP6,251 786,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,425100.0,378600.0,53.303757,-1.6248118
CP7,247 801,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,424700.0,380100.0,53.31726,-1.6306977
E1,251 807,0,1,1,1,0,0,1,1,1,0,0,1,1,1,0,425100.0,380700.0,53.32263,-1.6246463
E2R,274 814,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,427400.0,381400.0,53.32881,-1.5900581


### Create Matrix of Checkpoint combinations <a class="anchor" id="cp-combos"></a>

In [None]:
# So this isn't the best way to do this, i realise, but it is reused code from another part of the project and doesn't take too long so we may as well just get every combination and run it through the api

StartCPs = openings_df.select("CP").withColumnRenamed("CP","StartCP")
FinishCPs = openings_df.select("CP").withColumnRenamed("CP","FinishCP")
CP_Combinations_DF = StartCPs.crossJoin(FinishCPs)

StartCPs.unpersist
FinishCPs.unpersist

CP_Combinations_DF = CP_Combinations_DF.alias("df").join(openings_df.select("CP", "Latitude", "Longitude").withColumnRenamed('Latitude','StartLatitude').withColumnRenamed('Longitude','StartLongitude'), CP_Combinations_DF.StartCP == openings_df.CP).drop("CP")

CP_Combinations_DF = CP_Combinations_DF.alias("df").join(openings_df.select("CP", "Latitude", "Longitude").withColumnRenamed('Latitude','FinishLatitude').withColumnRenamed('Longitude','FinishLongitude'), CP_Combinations_DF.FinishCP == openings_df.CP).drop("CP")

CP_Combinations_DF = CP_Combinations_DF.filter(CP_Combinations_DF.StartCP != CP_Combinations_DF.FinishCP)
display(CP_Combinations_DF)

StartCP,FinishCP,StartLatitude,StartLongitude,FinishLatitude,FinishLongitude
CP2,CP1,53.317173,-1.6036792,53.31724,-1.6246936
CP3,CP1,53.30637,-1.5992771,53.31724,-1.6246936
CP4,CP1,53.26865,-1.6086259,53.31724,-1.6246936
CP5,CP1,53.293858,-1.6203977,53.31724,-1.6246936
CP6,CP1,53.303757,-1.6248118,53.31724,-1.6246936
CP7,CP1,53.31726,-1.6306977,53.31724,-1.6246936
E1,CP1,53.32263,-1.6246463,53.31724,-1.6246936
E10U,CP1,53.331615,-1.623066,53.31724,-1.6246936
E2R,CP1,53.32881,-1.5900581,53.31724,-1.6246936
E3,CP1,53.322712,-1.6501671,53.31724,-1.6246936


### Fetch Distances & Elevations from Google Maps API <a class="anchor" id="D&E-Google"></a>

In [None]:
if API_Service == "Google Maps":

  CP_Combinations_List = CP_Combinations_DF.collect()

  Distances_DF = spark.createDataFrame(
      [(1, 1.0, 1.0, 1)],
      schema='StartCP string, FinishCP string, Distance float, Height_Gain int'
  )
  Distances_DF = Distances_DF.filter("1!=1")

  def make_request(method, url, **kwargs):
      """Helper function to handle API requests with 429 retry logic."""
      while True:
          response = requests.request(method, url, **kwargs)
          if response.status_code == 429:
              print("Rate limit exceeded (429). Waiting 60 seconds before retrying...")
              time.sleep(60)
          else:
              return response  # Return response if successful

  for row in CP_Combinations_List:
      wp1 = f"{row.StartLongitude},{row.StartLatitude}"
      wp2 = f"{row.FinishLongitude},{row.FinishLatitude}"

      # Route API Call with Retry Handling
      route_url = "https://routes.googleapis.com/directions/v2:computeRoutes"
      route_params = {
    "origin":{
      "location":{
        "latLng":{
          "latitude": row.StartLatitude,
          "longitude": row.StartLongitude
        }
      }
    },
    "destination":{
      "location":{
        "latLng":{
          "latitude": row.FinishLatitude,
          "longitude": row.FinishLongitude
        }
      }
    },
    "travelMode": "WALK",
    "languageCode": "en-GB",
    "units": "METRIC"
  }
      route_headers = {
          "X-Goog-Api-Key": GoogleMapsAPIKey,
          "X-Goog-FieldMask": "routes.duration,routes.distanceMeters,routes.polyline,routes.legs"
      }

      route_resp = make_request("POST", route_url, headers=route_headers, json=route_params)
      route_json_resp = route_resp.json()

      travel_distance = float(route_json_resp["routes"][0]["legs"][0]["distanceMeters"]) / 1000
      StartCP, FinishCP = row["StartCP"], row["FinishCP"]

      # Prepare height points for elevation request
      height_polyline = route_json_resp["routes"][0]["polyline"]["encodedPolyline"]


      # Elevation API Call with Retry Handling
      elevations_url = "https://maps.googleapis.com/maps/api/elevation/json"
      elevations_params = {
          "path": "enc:"+height_polyline,
          "samples": 100,
          "key": GoogleMapsAPIKey
      }

      elevations_resp = make_request("GET", elevations_url, params=elevations_params)
      elevation_json_resp = elevations_resp.json()
      elevations = [point["elevation"] for point in elevation_json_resp["results"]]
      differences = [builtins.max(0, elevations[i+1] - elevations[i]) for i in range(len(elevations)-1)]
      height_gain = float(sum(differences))

      # Append to Spark DataFrame
      df = spark.createDataFrame(
          [(StartCP, FinishCP, travel_distance, height_gain)],
          'StartCP string, FinishCP string, Distance float, Height_Gain float'
      )
      Distances_DF = Distances_DF.union(df)

  #   time.sleep(2.0)  # Keep a delay between requests to reduce rate limits

  display(Distances_DF)

else:
    print(f"Skipping Google Maps Cell, selected service: {API_Service}")

Skipping Google Maps Cell, selected service: Bing Maps


### Fetch Distances & Elevations from Azure Maps and OpenTopoData APIs

In [None]:
# Takes ~17 minutes due to OpenTopoData API limits 1 per second
if API_Service == "Azure Maps & OpenTopoData":

    CP_Combinations_List = CP_Combinations_DF.collect()

    Distances_DF = spark.createDataFrame(
        [(1, 1.0, 1.0, 1)],
        schema='StartCP string, FinishCP string, Distance float, Height_Gain int'
    )
    Distances_DF = Distances_DF.filter("1!=1")

    MAX_LOCATIONS = 100  # OpenTopoData API limit

    for row in CP_Combinations_List:
        wp1 = f"{row.StartLatitude},{row.StartLongitude}"
        wp2 = f"{row.FinishLatitude},{row.FinishLongitude}"

        # Azure Maps Routing API
        route_url = "https://atlas.microsoft.com/route/directions/json"
        route_params = {
            "subscription-key": AzureMapsAPIKey,
            "api-version": "1.0",
            "query": f"{wp1}:{wp2}",
            "travelMode": "pedestrian",
            "routeType": "shortest",
            "traffic": "false",
            "computeBestOrder": "false",
            "computeTravelTimeFor": "all",
        }
        route_resp = requests.get(route_url, params=route_params)
        route_json_resp = json.loads(route_resp.text)

        travel_distance = float(route_json_resp['routes'][0]['summary']['lengthInMeters']) / 1000  # Convert meters to km
        StartCP = row["StartCP"]
        FinishCP = row["FinishCP"]

        # Extract route coordinates
        height_points = route_json_resp['routes'][0]['legs'][0]['points']
        coordinates = [f"{pt['latitude']},{pt['longitude']}" for pt in height_points]

        # Batch coordinates into chunks of 100
        elevation_results = []
        for i in range(0, len(coordinates), MAX_LOCATIONS):
            chunk = "|".join(coordinates[i:i + MAX_LOCATIONS])  # Format chunk

            # OpenTopoData API call
            opentopo_url = "https://api.opentopodata.org/v1/eudem25m"  # Example dataset
            opentopo_params = {"locations": chunk}
            elevations_resp = requests.get(opentopo_url, params=opentopo_params)
            elevations_json_resp = json.loads(elevations_resp.text)

            # Extract elevations
            if 'results' in elevations_json_resp:
                elevation_results.extend([result['elevation'] for result in elevations_json_resp['results']])

            time.sleep(1)

        # Calculate height gain
        differences = [max(0, elevation_results[i+1] - elevation_results[i]) for i in range(len(elevation_results)-1)]
        height_gain = float(sum(differences))

        df = spark.createDataFrame(
            [(StartCP, FinishCP, travel_distance, height_gain)],
            'StartCP string, FinishCP string, Distance float, Height_Gain float'
        )
        Distances_DF = Distances_DF.union(df)

    display(Distances_DF)

else:
    print(f"Skipping Azure Maps Cell, selected service: {API_Service}")


### Fetch Distances & Elevations from OpenRouteService API <a class="anchor" id="D&E-ORS"></a>

In [None]:
if API_Service == "OpenRouteService":

    CP_Combinations_List = CP_Combinations_DF.collect()

    Distances_DF = spark.createDataFrame(
        [(1, 1.0, 1.0, 1)],
        schema='StartCP string, FinishCP string, Distance float, Height_Gain int'
    )
    Distances_DF = Distances_DF.filter("1!=1")

    def make_request(method, url, **kwargs):
        """Helper function to handle API requests with 429 retry logic."""
        while True:
            response = requests.request(method, url, **kwargs)
            if response.status_code == 429:
                print("Rate limit exceeded (429). Waiting 60 seconds before retrying...")
                time.sleep(60)
            else:
                return response  # Return response if successful

    for row in CP_Combinations_List:
        wp1 = f"{row.StartLongitude},{row.StartLatitude}"
        wp2 = f"{row.FinishLongitude},{row.FinishLatitude}"

        # Route API Call with Retry Handling
        route_url = "https://api.openrouteservice.org/v2/directions/foot-hiking"
        route_params = {
            "start": wp1,
            "end": wp2,
            "api_key": OpenRouteServiceAPIKey
        }
        route_resp = make_request("GET", route_url, params=route_params)
        route_json_resp = route_resp.json()

        travel_distance = float(route_json_resp["features"][0]["properties"]["summary"]["distance"]) / 1000
        StartCP, FinishCP = row["StartCP"], row["FinishCP"]

        # Prepare height points for elevation request
        height_points = route_json_resp["features"][0]["geometry"]["coordinates"]
        flattened_height_points = ",".join([f"{lon},{lat}" for lon, lat in height_points])

        # Elevation API Call with Retry Handling
        elevations_url = "https://api.openrouteservice.org/elevation/line"
        elevations_params = {
            "format_in": "polyline",
            "format_out": "polyline",
            "dataset": "srtm",
            "geometry": height_points
        }
        elevations_headers = {
            "Authorization": OpenRouteServiceAPIKey,
            "Content-Type": "application/json"
        }
        elevations_resp = make_request("POST", elevations_url, headers=elevations_headers, json=elevations_params)
        elevation_json_resp = elevations_resp.json()
        elevation_coordinates = elevation_json_resp["geometry"]
        elevations = [elev for _, _, elev in elevation_coordinates]
        differences = [builtins.max(0, elevations[i+1] - elevations[i]) for i in range(len(elevations)-1)]
        height_gain = sum(differences)

        # Append to Spark DataFrame
        df = spark.createDataFrame(
            [(StartCP, FinishCP, travel_distance, height_gain)],
            'StartCP string, FinishCP string, Distance float, Height_Gain float'
        )
        Distances_DF = Distances_DF.union(df)

        time.sleep(2.0)  # Keep a delay between requests to reduce rate limits

    display(Distances_DF)

else:
    print(f"Skipping OpenRouteService Cell, selected service: {API_Service}")

Skipping OpenRouteService Cell, selected service: Bing Maps


### Fetch Distances & Elevations from Bing Maps API <a class="anchor" id="D&E-bing"></a>

In [None]:
%python
if API_Service == "Bing Maps":

    CP_Combinations_List = CP_Combinations_DF.collect()

    Distances_DF = spark.createDataFrame(
        [(1, 1.0, 1.0, 1)],
        schema='StartCP string, FinishCP string, Distance float, Height_Gain int'
    )
    Distances_DF = Distances_DF.filter("1!=1")

    for row in CP_Combinations_List:
        wp1 = str(row.StartLatitude) + ',' + str(row.StartLongitude)
        wp2 = str(row.FinishLatitude) + ',' + str(row.FinishLongitude)

        route_url = "http://dev.virtualearth.net/REST/v1/Routes/walking"
        route_params = {
            "wayPoint.1": wp1,
            "waypoint.2": wp2,
            "optimize": "distance",
            "avoid": "ferry",
            "routeAttributes": "routePath,excludeItinerary",
            "distanceUnit": "km",
            "key": BingMapsAPIKey
        }
        route_resp = requests.get(route_url, params=route_params)
        route_json_resp = json.loads(route_resp.text)
        travel_distance = float(route_json_resp['resourceSets'][0]['resources'][0]['travelDistance'])
        StartCP = row["StartCP"]
        FinishCP = row["FinishCP"]

        height_points = route_json_resp['resourceSets'][0]['resources'][0]['routePath']['line']['coordinates']
        flattened_height_points = ",".join([f"{lat},{lon}" for lat, lon in height_points])  # Fix formatting

        elevations_url = "http://dev.virtualearth.net/REST/v1/Elevation/List"
        elevations_params = {
            "points": flattened_height_points,
            "heights": "ellipsoid",
            "key": BingMapsAPIKey
        }
        elevations_resp = requests.get(elevations_url, params=elevations_params)
        elevations_json_resp = json.loads(elevations_resp.text)
        elevations = elevations_json_resp['resourceSets'][0]['resources'][0]['elevations']
        differences = [builtins.max(0, elevations[i+1] - elevations[i]) for i in range(len(elevations)-1)]
        height_gain = sum(differences)

        df = spark.createDataFrame(
            [(StartCP, FinishCP, travel_distance, height_gain)],
            'StartCP string, FinishCP string, Distance float, Height_Gain int'
        )
        Distances_DF = Distances_DF.union(df)

    display(Distances_DF)

else:
    print(f"Skipping BingMaps Cell, selected service: {API_Service}")

StartCP,FinishCP,Distance,Height_Gain
CP2,CP1,1.755,39
CP3,CP1,2.729,48
CP4,CP1,6.986,197
CP5,CP1,3.443,132
CP6,CP1,2.036,123
CP7,CP1,0.765,33
E1,CP1,0.747,3
E10U,CP1,1.894,35
E2R,CP1,3.42,18
E3,CP1,2.678,212


### Calculate leg timings for each Checkpoint Combination<a class="anchor" id="cp-combo-times"></a>

In [None]:
Distances_DF = Distances_DF.withColumn("TimeInMinutes",(col("Distance")/ Speed)*60 + (col("Height_Gain") / AddMinutesPer100mHeight ) )
display(Distances_DF)

StartCP,FinishCP,Distance,Height_Gain,TimeInMinutes
CP2,CP1,1.755,39,23.76792447432032
CP3,CP1,2.729,48,35.69434065908756
CP4,CP1,6.986,197,98.78679314379424
CP5,CP1,3.443,132,52.1773593758637
CP6,CP1,2.036,123,35.34905675492197
CP7,CP1,0.765,33,11.96037719654587
E1,CP1,0.747,3,8.75660353606602
E10U,CP1,1.894,35,24.941510038555798
E2R,CP1,3.42,18,40.51698199578051
E3,CP1,2.678,212,51.5169808297787


### Pick a Route <a class="anchor" id="pick-route"></a>

In [None]:
# Define the route and start time
route = ["Start","CP3", "E7U", "CP4", "E8", "E6", "CP5", "E5", "E4R", "CP6", "E9U","CP2","E2R","E10U", "E1", "CP1", "CP7", "E3", "Finish"]

### Calculate route timings <a class="anchor" id="route-timings"></a>

In [None]:
# Create a DataFrame for the route sequence
route_df = spark.createDataFrame([(route[i], route[i+1], i) for i in range(len(route)-1)],
                                 ["StartCP", "FinishCP", "RouteOrder"])

# Join with Distances_Df to get TimeInMinutes for each leg
timing_results_df = (route_df
      .join(Distances_DF, ["StartCP", "FinishCP"], "left")
      .orderBy("RouteOrder"))

# Use a window function to calculate cumulative time
window_spec = Window.orderBy("RouteOrder").rowsBetween(Window.unboundedPreceding, 0)
timing_results_df = timing_results_df.withColumn("CumulativeTime", F.sum("TimeInMinutes").over(window_spec))

# Show results
display(timing_results_df)

StartCP,FinishCP,RouteOrder,Distance,Height_Gain,TimeInMinutes,CumulativeTime
Start,CP3,0,1.706,60,25.31320721248411,25.31320721248411
CP3,E7U,1,3.647,61,47.386793294942606,72.70000050742672
E7U,CP4,2,1.492,3,17.19056585419853,89.89056636162525
CP4,E8,3,1.828,2,20.894339050436923,110.78490541206216
E8,E6,4,2.045,57,28.850944259931456,139.63584967199364
E6,CP5,5,1.829,122,32.905660334173234,172.54151000616687
CP5,E5,6,1.631,13,19.764151418434,192.30566142460088
E5,E4R,7,1.725,131,32.62830215670028,224.9339635813012
E4R,CP6,8,0.925,37,14.17169824816146,239.1056618294626
CP6,E9U,9,1.241,96,23.64905724075605,262.75471907021864


### Calculate checkpoint arrival and departure times for route <a class="anchor" id="route-times"></a>

In [None]:
# Define window specification to order rows by RouteOrder
window_spec = Window.orderBy("RouteOrder")

# Calculate the CumulativeTimeWithDwell (CumulativeTime + StopTimeAtCheckpoints)
timing_results_df = timing_results_df.withColumn(
    "CumulativeTimeWithDwell",
    F.col("CumulativeTime") + (F.col("RouteOrder") * F.lit(StopTimeAtCheckPoints))
)

# Calculate the LeavingTime (StartTime + CumulativeTimeWithDwell)
timing_results_df = timing_results_df.withColumn(
    "LeavingTime",
    F.to_timestamp(F.lit(StartTime)) + F.col("CumulativeTimeWithDwell").cast("int").cast("interval minute")
)

# Calculate the ArrivalTime (LeavingTime - StopTimeAtCheckpoints)
timing_results_df = timing_results_df.withColumn(
    "ArrivalTime",
    F.col("LeavingTime") - F.expr(f"INTERVAL {StopTimeAtCheckPoints} MINUTE")
)

# Show the results
display(timing_results_df)


StartCP,FinishCP,RouteOrder,Distance,Height_Gain,TimeInMinutes,CumulativeTime,CumulativeTimeWithDwell,LeavingTime,ArrivalTime
Start,CP3,0,1.706,60,25.31320721248411,25.31320721248411,25.31320721248411,1900-01-01T10:25:00Z,1900-01-01T10:18:00Z
CP3,E7U,1,3.647,61,47.386793294942606,72.70000050742672,79.70000050742672,1900-01-01T11:19:00Z,1900-01-01T11:12:00Z
E7U,CP4,2,1.492,3,17.19056585419853,89.89056636162525,103.89056636162525,1900-01-01T11:43:00Z,1900-01-01T11:36:00Z
CP4,E8,3,1.828,2,20.894339050436923,110.78490541206216,131.7849054120622,1900-01-01T12:11:00Z,1900-01-01T12:04:00Z
E8,E6,4,2.045,57,28.850944259931456,139.63584967199364,167.63584967199364,1900-01-01T12:47:00Z,1900-01-01T12:40:00Z
E6,CP5,5,1.829,122,32.905660334173234,172.54151000616687,207.54151000616687,1900-01-01T13:27:00Z,1900-01-01T13:20:00Z
CP5,E5,6,1.631,13,19.764151418434,192.30566142460088,234.30566142460088,1900-01-01T13:54:00Z,1900-01-01T13:47:00Z
E5,E4R,7,1.725,131,32.62830215670028,224.9339635813012,273.9339635813012,1900-01-01T14:33:00Z,1900-01-01T14:26:00Z
E4R,CP6,8,0.925,37,14.17169824816146,239.1056618294626,295.10566182946263,1900-01-01T14:55:00Z,1900-01-01T14:48:00Z
CP6,E9U,9,1.241,96,23.64905724075605,262.75471907021864,325.75471907021864,1900-01-01T15:25:00Z,1900-01-01T15:18:00Z
