In this section, we are experimenting with OSRM and different routing techniques to create efficient routing mechanism. 

Please make sure the OSRM engine is running in the background with the desired region's map.

The most common use of OSRM is to read location coordinate from a CSV file such as locations.csv and create routing between these locations. 

Please see a sample of location.csv in Ethiopia:

name,region,country,latitude,longitude,location_type,conflict_date,population<br>
Addis Ababa,Addis Ababa,Ethiopia,8.978098143949728,38.75857450155794,town,0,0<br>
Mekele,Mekele,Ethiopia,13.495486756898567,39.46589089154361,town,0,0<br>
Dire Dawa,Dire Dawa,Ethiopia,9.602835645348511,41.85443640082678,town,0,0<br>
Adama,Adama,Ethiopia,8.526471189559459,39.25963003366467,town,0,0

Install packages that will be used. 

In [None]:
import sys
!{sys.executable} -m pip install numpy pandas matplotlib geopy folium polyline tsp_solver scikit-learn scipy

In [None]:
import os
import csv
import numpy as np
import pandas as pd
import requests
import folium
import polyline
from sklearn.cluster import DBSCAN
from scipy.spatial import distance_matrix
from scipy.optimize import minimize
from sklearn.neighbors import KDTree
from tsp_solver.greedy import solve_tsp
import matplotlib.pyplot as plt
from itertools import combinations
from geopy.distance import geodesic

In [None]:
# Define the region or the map here:

region = "mali"

Please see codes inner comments for more information.

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty list to store route instructions
route_instructions = []

# Iterate through each pair of locations in the DataFrame
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        # Extract location details
        name1 = df.loc[i, 'name']
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        name2 = df.loc[j, 'name']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        try:
            # Send request to OSRM
            response = requests.get(request_url)
            response.raise_for_status()

            # Extract route data from the response
            route_data = response.json()

            # Check if response contains a route
            if 'routes' in route_data and len(route_data['routes']) > 0:
                # Extract route steps
                steps = route_data['routes'][0]['legs'][0]['steps']
                distance_sum = 0
                for step in steps:
                    # Extract step instructions
                    instruction = step['name']

                    # Extract step distance and add to the distance sum
                    distance = step['distance']
                    distance_sum += distance

                    # Append step information to the route_instructions list
                    route_instructions.append((instruction, distance_sum))

                # Extract route geometry
                geometry = route_data['routes'][0]['geometry']

                # Decode polyline string to get coordinates
                coordinates = polyline.decode(geometry)

                # Create a Folium PolyLine object
                polyline_obj = folium.PolyLine(
                    locations=coordinates,
                    color='blue',
                    weight=2,
                    opacity=0.9
                )

                # Add the PolyLine to the map
                polyline_obj.add_to(m)

        except requests.exceptions.RequestException as e:
            print(f"Error occurred during OSRM API request: {e}")
            continue

        # Add markers for the start and end locations
        folium.CircleMarker(
            location=(lat1, lon1),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name1
        ).add_to(m)

        folium.CircleMarker(
            location=(lat2, lon2),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name2
        ).add_to(m)

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-simple.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty set to store processed route pairs
processed_pairs = set()

# Iterate through each pair of locations in the DataFrame
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        # Extract location details
        name1 = df.loc[i, 'name']
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        name2 = df.loc[j, 'name']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Check if the reverse pair already exists in the processed pairs set
        if (name2, name1) in processed_pairs:
            continue

        # Add the pair to the processed pairs set
        processed_pairs.add((name1, name2))

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        try:
            # Send request to OSRM
            response = requests.get(request_url)
            response.raise_for_status()

            # Extract route data from the response
            route_data = response.json()

            # Check if response contains a route
            if 'routes' in route_data and len(route_data['routes']) > 0:
                # Extract route steps
                steps = route_data['routes'][0]['legs'][0]['steps']
                distance_sum = 0
                for step in steps:
                    # Extract step instructions
                    instruction = step['name']

                    # Extract step distance and add to the distance sum
                    distance = step['distance']
                    distance_sum += distance

                # Append step information to the route_instructions list
                route_instructions.append((instruction, distance_sum))

                # Extract route geometry
                geometry = route_data['routes'][0]['geometry']

                # Decode polyline string to get coordinates
                coordinates = polyline.decode(geometry)

                # Create a Folium PolyLine object
                polyline_obj = folium.PolyLine(
                    locations=coordinates,
                    color='blue',
                    weight=2,
                    opacity=0.9
                )

                # Add the PolyLine to the map
                polyline_obj.add_to(m)

        except requests.exceptions.RequestException as e:
            print(f"Error occurred during OSRM API request: {e}")
            continue

        # Add markers for the start and end locations
        folium.CircleMarker(
            location=(lat1, lon1),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name1
        ).add_to(m)

        folium.CircleMarker(
            location=(lat2, lon2),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name2
        ).add_to(m)

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-remove-duplicates.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty set to store processed route pairs
processed_pairs = set()

# Iterate through each pair of locations in the DataFrame
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        # Extract location details
        name1 = df.loc[i, 'name']
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        name2 = df.loc[j, 'name']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Create frozensets for the pair of locations
        location_pair = frozenset([name1, name2])

        # Check if the frozenset already exists in the processed pairs set
        if location_pair in processed_pairs:
            continue

        # Add the frozenset to the processed pairs set
        processed_pairs.add(location_pair)

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        try:
            # Send request to OSRM
            response = requests.get(request_url)
            response.raise_for_status()

            # Extract route data from the response
            route_data = response.json()

            # Check if response contains a route
            if 'routes' in route_data and len(route_data['routes']) > 0:
                # Extract route steps
                steps = route_data['routes'][0]['legs'][0]['steps']
                distance_sum = 0
                for step in steps:
                    # Extract step instructions
                    instruction = step['name']

                    # Extract step distance and add to the distance sum
                    distance = step['distance']
                    distance_sum += distance

                # Append step information to the route_instructions list
                route_instructions.append((instruction, distance_sum))

                # Extract route geometry
                geometry = route_data['routes'][0]['geometry']

                # Decode polyline string to get coordinates
                coordinates = polyline.decode(geometry)

                # Create a Folium PolyLine object
                polyline_obj = folium.PolyLine(
                    locations=coordinates,
                    color='blue',
                    weight=2,
                    opacity=0.9
                )

                # Add the PolyLine to the map
                polyline_obj.add_to(m)

        except requests.exceptions.RequestException as e:
            print(f"Error occurred during OSRM API request: {e}")
            continue

        # Add markers for the start and end locations
        folium.CircleMarker(
            location=(lat1, lon1),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name1
        ).add_to(m)

        folium.CircleMarker(
            location=(lat2, lon2),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name2
        ).add_to(m)

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-distance-based.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m

In [None]:
import os
import pandas as pd
import requests
import folium

# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty list to store route instructions
route_instructions = []

# Create a set to store visited location pairs
visited_pairs = set()

# Iterate through each pair of locations in the DataFrame
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        # Extract location details
        name1 = df.loc[i, 'name']
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        name2 = df.loc[j, 'name']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Check if the pair has been visited before (or its reverse pair)
        if (name1, name2) in visited_pairs or (name2, name1) in visited_pairs:
            continue

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        try:
            # Send request to OSRM
            response = requests.get(request_url)
            response.raise_for_status()

            # Extract route data from the response
            route_data = response.json()

            # Check if response contains a route
            if 'routes' in route_data and len(route_data['routes']) > 0:
                # Extract route steps
                steps = route_data['routes'][0]['legs'][0]['steps']
                distance_sum = 0
                for step in steps:
                    # Extract step instructions
                    instruction = step['name']

                    # Extract step distance and add to the distance sum
                    distance = step['distance']
                    distance_sum += distance

                # Append step information to the route_instructions list
                route_instructions.append((instruction, distance_sum))

                # Extract route geometry
                geometry = route_data['routes'][0]['geometry']

                # Decode polyline string to get coordinates
                coordinates = polyline.decode(geometry)

                # Add the coordinates to the map as a PolyLine
                folium.PolyLine(
                    locations=coordinates,
                    color='blue',
                    weight=2,
                    opacity=0.9
                ).add_to(m)

                # Add the pair to the visited set and its reverse pair
                visited_pairs.add((name1, name2))
                visited_pairs.add((name2, name1))

        except requests.exceptions.RequestException as e:
            print(f"Error occurred during OSRM API request: {e}")
            continue

        # Add markers for the start and end locations
        folium.CircleMarker(
            location=(lat1, lon1),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name1
        ).add_to(m)

        folium.CircleMarker(
            location=(lat2, lon2),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name2
        ).add_to(m)

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-clustering.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty list to store route instructions
route_instructions = []

# Iterate through the sequential pairs of locations in the DataFrame
for i in range(len(df)):
    # Extract location details
    name1 = df.loc[i, 'name']
    lat1 = df.loc[i, 'latitude']
    lon1 = df.loc[i, 'longitude']
    
    # Check if it is the last location
    if i == len(df) - 1:
        name2 = df.loc[0, 'name']
        lat2 = df.loc[0, 'latitude']
        lon2 = df.loc[0, 'longitude']
    else:
        name2 = df.loc[i + 1, 'name']
        lat2 = df.loc[i + 1, 'latitude']
        lon2 = df.loc[i + 1, 'longitude']

    # Prepare API request URL
    osrm_endpoint = 'http://localhost:5000/route/v1/driving'
    request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

    try:
        # Send request to OSRM
        response = requests.get(request_url)
        response.raise_for_status()

        # Extract route data from the response
        route_data = response.json()

        # Check if response contains a route
        if 'routes' in route_data and len(route_data['routes']) > 0:
            # Extract route steps
            steps = route_data['routes'][0]['legs'][0]['steps']
            distance_sum = 0
            for step in steps:
                # Extract step instructions
                instruction = step['name']

                # Extract step distance and add to the distance sum
                distance = step['distance']
                distance_sum += distance

            # Append step information to the route_instructions list
            route_instructions.append((instruction, distance_sum))

            # Extract route geometry
            geometry = route_data['routes'][0]['geometry']

            # Decode polyline string to get coordinates
            coordinates = polyline.decode(geometry)

            # Add the coordinates to the map as a PolyLine
            folium.PolyLine(
                locations=coordinates,
                color='blue',
                weight=2,
                opacity=0.9
            ).add_to(m)

        # Add markers for the start and end locations
        folium.CircleMarker(
            location=(lat1, lon1),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name1
        ).add_to(m)

        folium.CircleMarker(
            location=(lat2, lon2),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name2
        ).add_to(m)

    except requests.exceptions.RequestException as e:
        print(f"Error occurred during OSRM API request: {e}")
        continue

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-distance-based-clustering.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty list to store route instructions
route_instructions = []

# Create an empty list to store coordinates for clustering
all_coordinates = []

# Iterate through each pair of locations in the DataFrame
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        # Extract location details
        name1 = df.loc[i, 'name']
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        name2 = df.loc[j, 'name']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        try:
            # Send request to OSRM
            response = requests.get(request_url)
            response.raise_for_status()

            # Extract route data from the response
            route_data = response.json()

            # Check if response contains a route
            if 'routes' in route_data and len(route_data['routes']) > 0:
                # Extract route steps
                steps = route_data['routes'][0]['legs'][0]['steps']
                distance_sum = 0
                for step in steps:
                    # Extract step instructions
                    instruction = step['name']

                    # Extract step distance and add to the distance sum
                    distance = step['distance']
                    distance_sum += distance

                # Append step information to the route_instructions list
                route_instructions.append((instruction, distance_sum))

                # Extract route geometry
                geometry = route_data['routes'][0]['geometry']

                # Decode polyline string to get coordinates
                coordinates = polyline.decode(geometry)

                # Add the coordinates to the list for clustering
                all_coordinates.extend(coordinates)

                # Add markers for the start and end locations
                folium.CircleMarker(
                    location=(lat1, lon1),
                    radius=6,
                    color='red',
                    fill=True,
                    fill_color='blue',
                    popup=name1
                ).add_to(m)

                folium.CircleMarker(
                    location=(lat2, lon2),
                    radius=6,
                    color='red',
                    fill=True,
                    fill_color='blue',
                    popup=name2
                ).add_to(m)

        except requests.exceptions.RequestException as e:
            print(f"Error occurred during OSRM API request: {e}")
            continue

# Perform DBSCAN (Density-Based Spatial Clustering of Applications with Noise) for coordinates
eps = 0.001  # distance threshold for clustering
min_samples = 2  # minimum number of points in a cluster
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
cluster_labels = dbscan.fit_predict(all_coordinates)

# Create a dictionary to store polylines for each cluster
cluster_polylines = {}

# Iterate over the clusters and generate polylines for each cluster
for label in set(cluster_labels):
    if label == -1:
        # Skip noise points (not assigned to any cluster)
        continue

    # Extract coordinates for the current cluster
    cluster_coords = [coord for i, coord in enumerate(all_coordinates) if cluster_labels[i] == label]

    # Calculate the distance matrix for the cluster coordinates
    distance_matrix_cluster = distance_matrix(cluster_coords, cluster_coords)

    # Define the objective function for TSP
    def tsp_objective_function(order):
        # Cast float values to integers for indexing
        order = order.astype(int)
        return sum([distance_matrix_cluster[order[i]][order[i + 1]] for i in range(len(order) - 1)])

    # Define the constraint function for TSP
    def tsp_constraint_function(order):
        # Cast float values to integers for indexing
        order = order.astype(int)
        return sum(order) - len(order)

    # Define the initial guess for the TSP
    initial_guess = list(range(len(cluster_coords)))

    # Define the bounds for the TSP
    bounds = [(0, len(cluster_coords) - 1)] * len(cluster_coords)

    # Define the constraint for the TSP
    constraint = {'type': 'eq', 'fun': tsp_constraint_function}

    # Solve the TSP using the minimise function
    result = minimize(tsp_objective_function, initial_guess, method='SLSQP', bounds=bounds, constraints=constraint)

    # Get the optimal order of locations from the result and convert to integers
    optimal_order = result.x.astype(int)

    # Reorder the cluster coordinates based on the optimal order
    reordered_coords = [cluster_coords[i] for i in optimal_order]

    # Create a list of polyline coordinates
    polyline_coords = [(coord[0], coord[1]) for coord in reordered_coords]

    # Create a Folium PolyLine object for the current cluster
    polyline_obj = folium.PolyLine(
        locations=polyline_coords,
        color='blue',
        weight=2,
        opacity=0.9
    )

    # Add the polyline to the map
    polyline_obj.add_to(m)

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-clustering-tsp.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty list to store route instructions
route_instructions = []

# Create an empty list to store coordinates for clustering
all_coordinates = []

# Iterate through each pair of locations in the DataFrame
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        # Extract location details
        name1 = df.loc[i, 'name']
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        name2 = df.loc[j, 'name']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        try:
            # Send request to OSRM
            response = requests.get(request_url)
            response.raise_for_status()

            # Extract route data from the response
            route_data = response.json()

            # Check if response contains a route
            if 'routes' in route_data and len(route_data['routes']) > 0:
                # Extract route steps
                steps = route_data['routes'][0]['legs'][0]['steps']
                distance_sum = 0
                for step in steps:
                    # Extract step instructions
                    instruction = step['name']

                    # Extract step distance and add to the distance sum
                    distance = step['distance']
                    distance_sum += distance

                # Append step information to the route_instructions list
                route_instructions.append((instruction, distance_sum))

                # Extract route geometry
                geometry = route_data['routes'][0]['geometry']

                # Decode polyline string to get coordinates
                coordinates = polyline.decode(geometry)

                # Add the coordinates to the list for clustering
                all_coordinates.extend(coordinates)

                # Add markers for the start and end locations
                folium.CircleMarker(
                    location=[lat1, lon1],
                    radius=6,
                    color='red',
                    fill=True,
                    fill_color='blue',
                    popup=name1
                ).add_to(m)

                folium.CircleMarker(
                    location=[lat2, lon2],
                    radius=6,
                    color='red',
                    fill=True,
                    fill_color='blue',
                    popup=name2
                ).add_to(m)

        except requests.exceptions.RequestException as e:
            print(f"Error occurred during OSRM API request: {e}")
            continue

# Perform DBSCAN (Density-Based Spatial Clustering of Applications with Noise) for coordinates
eps = 0.01  # distance threshold for clustering
min_samples = 2  # minimum number of points in a cluster
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
cluster_labels = dbscan.fit_predict(all_coordinates)

# Create a dictionary to store polylines for each cluster
cluster_polylines = {}

# Create a list to store the final cluster coordinates
final_cluster_coords = []

# Iterate over the clusters and generate polylines for each cluster
for label in set(cluster_labels):
    if label == -1:
        # Skip noise points (not assigned to any cluster)
        continue

    # Extract coordinates for the current cluster
    cluster_coords = [coord for i, coord in enumerate(all_coordinates) if cluster_labels[i] == label]

    # Append the cluster_coords to the final_cluster_coords
    final_cluster_coords.append(cluster_coords)

    # Create a list of polyline coordinates
    polyline_coords = [(coord[0], coord[1]) for coord in cluster_coords]
    
    polyline_coords = set(polyline_coords)
    
    if len(polyline_coords) < 3:
        continue
    
    # Create a Folium PolyLine object for the current cluster
    polyline_obj = folium.PolyLine(
        locations=polyline_coords,
        color='blue',
        weight=2,
        opacity=0.9
    )

    # Add the polyline to the map
    polyline_obj.add_to(m)

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-density-based.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a map centered around the first location
start_lat = df.loc[0, 'latitude']
start_lon = df.loc[0, 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty list to store route instructions
route_instructions = []

# Create an empty list to store coordinates for clustering
all_coordinates = []

# Iterate through each pair of locations in the DataFrame
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        # Extract location details
        name1 = df.loc[i, 'name']
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        name2 = df.loc[j, 'name']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        try:
            # Send request to OSRM
            response = requests.get(request_url)
            response.raise_for_status()

            # Extract route data from the response
            route_data = response.json()

            # Check if response contains a route
            if 'routes' in route_data and len(route_data['routes']) > 0:
                # Extract route steps
                steps = route_data['routes'][0]['legs'][0]['steps']
                distance_sum = 0
                for step in steps:
                    # Extract step instructions
                    instruction = step['name']

                    # Extract step distance and add to the distance sum
                    distance = step['distance']
                    distance_sum += distance

                # Append step information to the route_instructions list
                route_instructions.append((instruction, distance_sum))

                # Extract route geometry
                geometry = route_data['routes'][0]['geometry']

                # Decode polyline string to get coordinates
                coordinates = polyline.decode(geometry)

                # Add the coordinates to the list for clustering
                all_coordinates.extend(coordinates)

                # Add markers for the start and end locations
                folium.CircleMarker(
                    location=(lat1, lon1),
                    radius=6,
                    color='red',
                    fill=True,
                    fill_color='blue',
                    popup=name1
                ).add_to(m)

                folium.CircleMarker(
                    location=(lat2, lon2),
                    radius=6,
                    color='red',
                    fill=True,
                    fill_color='blue',
                    popup=name2
                ).add_to(m)

        except requests.exceptions.RequestException as e:
            print(f"Error occurred during OSRM API request: {e}")
            continue

# Perform KD-Tree clustering for coordinates
kdtree = KDTree(all_coordinates)
eps = 0.01  # maximum distance between two points to be considered in the same cluster
clusters = kdtree.query_radius(all_coordinates, eps)

# Convert all_coordinates to a NumPy array
all_coordinates = np.array(all_coordinates)

# Create a list to store the final cluster coordinates
final_cluster_coords = []

# Iterate over the clusters and generate polylines for each cluster
for cluster in clusters:
    # Concatenate the arrays in the cluster list to get the coordinates of the current cluster
    cluster_coords = all_coordinates[cluster]

    if len(cluster_coords) < 2:
        # Skip interpolation for clusters with fewer than 2 points
        continue

    # Use the TSP solver to find the optimal order of locations in the cluster
    distance_matrix_cluster = distance_matrix(cluster_coords, cluster_coords)
    tsp_route = solve_tsp(distance_matrix_cluster)

    # Reorder the cluster coordinates based on the TSP route
    reordered_coords = [cluster_coords[i] for i in tsp_route]

    # Connect the points with straight lines to create the route
    for i in range(len(reordered_coords) - 1):
        start_point = reordered_coords[i]
        end_point = reordered_coords[i + 1]

        # Add a polyline between the start and end points
        polyline_obj = folium.PolyLine(
            locations=[(start_point[0], start_point[1]), (end_point[0], end_point[1])],
            color='blue',
            weight=2,
            opacity=0.9
        )

        # Add the polyline to the map
        polyline_obj.add_to(m)

    # Add the extended cluster coordinates to the list
    final_cluster_coords.extend(cluster_coords)

# Create a list of polyline coordinates
polyline_coords = [(coord[0], coord[1]) for coord in final_cluster_coords]

# Create a Folium PolyLine object for the current cluster
polyline_obj = folium.PolyLine(
    locations=polyline_coords,
    color='blue',
    weight=2,
    opacity=0.9
)

# Add the polyline to the map
polyline_obj.add_to(m)

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map-clustering-kdtree.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display map
m

In [None]:
def euclidean_distance(a, b):
    return np.linalg.norm(np.array(a) - np.array(b))

def nearest_neighbor(locations):
    num_locations = len(locations)
    unvisited = set(range(num_locations))
    route = []
    
    # Choose a random starting location
    current_location = np.random.choice(num_locations)
    route.append(current_location)
    unvisited.remove(current_location)
    
    while unvisited:
        nearest_dist = float('inf')
        nearest_location = None
        
        for loc in unvisited:
            dist = euclidean_distance(locations[current_location], locations[loc])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_location = loc
        
        current_location = nearest_location
        route.append(current_location)
        unvisited.remove(current_location)
    
    return route

def visualise_route(locations, route):
    x = [loc[0] for loc in locations]
    y = [loc[1] for loc in locations]

    plt.figure(figsize=(8, 6))
    plt.scatter(x, y, c='blue', s=100, zorder=2)
    for i in range(len(route) - 1):
        start = route[i]
        end = route[i + 1]
        plt.plot([x[start], x[end]], [y[start], y[end]], 'r-', zorder=1)
    plt.plot([x[route[-1]], x[route[0]]], [y[route[-1]], y[route[0]]], 'r-', zorder=1)
    plt.xlabel('X-coordinate')
    plt.ylabel('Y-coordinate')
    plt.title('Nearest Neighbor Algorithm - Shortest Route')
    plt.grid(True)
    plt.show()

# Replace these coordinates with your actual locations A, B, C, D, etc.
locations = [(0, 0), (1, 2), (4, 3), (2, 5)]
route = nearest_neighbor(locations)
print("Optimal route:", route)
visualise_route(locations, route)


In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
locations_csv_file = os.path.join(locations_csv_path, locations_csv)
if not os.path.exists(locations_csv_file):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(locations_csv_file)

# Create a new DataFrame with only the required columns
locations = df[['name', 'latitude', 'longitude']]

def euclidean_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

def nearest_neighbor_route(locations):
    # Create a list to keep track of visited locations
    visited = []
    unvisited = list(range(len(locations)))  # List of indices of unvisited locations

    # Start from the first location (index 0)
    current_location = 0
    route = [current_location]

    # Loop through all the locations
    while len(unvisited) > 1:
        nearest_dist = float('inf')
        nearest_location = None

        for loc in unvisited:
            if loc != current_location:  # Skip the current location itself
                dist = euclidean_distance(locations.iloc[current_location][['latitude', 'longitude']],
                                          locations.iloc[loc][['latitude', 'longitude']])
                if dist < nearest_dist:
                    nearest_dist = dist
                    nearest_location = loc

        # Mark the nearest location as visited
        visited.append(current_location)

        # Move to the nearest location and remove it from the unvisited list
        current_location = nearest_location
        unvisited.remove(nearest_location)

        # Add the nearest location to the route
        route.append(current_location)

    # Add the first location (index 0) at the end to complete the route
    route.append(0)

    return route


# Reset the index of the DataFrame
df = df.reset_index(drop=True)

# Get the optimal route using the nearest neighbor algorithm
route = nearest_neighbor_route(df)

# Create a map centered around the first location
start_lat = df.loc[route[0], 'latitude']
start_lon = df.loc[route[0], 'longitude']
m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

# Create an empty list to store route instructions
route_instructions = []

# Create an empy list to store all routes
routes = []

# Iterate through the optimal route
for i in range(len(route) - 1):
    # Extract location details
    name1 = df.loc[route[i], 'name']
    lat1 = df.loc[route[i], 'latitude']
    lon1 = df.loc[route[i], 'longitude']
    name2 = df.loc[route[i + 1], 'name']
    lat2 = df.loc[route[i + 1], 'latitude']
    lon2 = df.loc[route[i + 1], 'longitude']

    # Prepare API request URL
    osrm_endpoint = 'http://localhost:5000/route/v1/driving'
    request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

    try:
        # Send request to OSRM
        response = requests.get(request_url)
        response.raise_for_status()

        # Extract route data from the response
        route_data = response.json()

        # Check if response contains a route
        if 'routes' in route_data and len(route_data['routes']) > 0:
            # Extract route steps
            steps = route_data['routes'][0]['legs'][0]['steps']
            distance_sum = 0
            for step in steps:
                # Extract step instructions
                instruction = step['name']

                # Extract step distance and add to the distance sum
                distance = step['distance']
                distance_sum += distance

                # Append step information to the route_instructions list
                route_instructions.append((instruction, distance_sum))

            # Extract route geometry
            geometry = route_data['routes'][0]['geometry']

            # Decode polyline string to get coordinates
            coordinates = polyline.decode(geometry)
            
            routes.append(coordinates)

            # Create a Folium PolyLine object
            polyline_obj = folium.PolyLine(
                locations=coordinates,
                color='blue',
                weight=2,
                opacity=0.9
            )

            # Add the PolyLine to the map
            polyline_obj.add_to(m)

    except requests.exceptions.RequestException as e:
        print(f"Error occurred during OSRM API request: {e}")
        continue

    # Add markers for the start and end locations
    folium.CircleMarker(
        location=[lat1, lon1],
        radius=6,
        color='red',
        fill=True,
        fill_color='blue',
        popup=name1
    ).add_to(m)

    folium.CircleMarker(
        location=[lat2, lon2],
        radius=6,
        color='red',
        fill=True,
        fill_color='blue',
        popup=name2
    ).add_to(m)


# CSV file path
csv_file = f"{region}-route-coords.csv"

# Write the nested list to the CSV file
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["latitude", "longitude"])  # Write the header
    for route in routes:
        writer.writerows(route)  # Write each sublist of coordinates to the CSV file

# Save the map as an HTML file
map_file = os.path.join('images', f'{region}-route-map.html')
m.save(map_file)
print(f"Map saved as {map_file}")

# Display the map
m


#### Creating Distance Matrix

In [None]:
# Set the file name of the locations.csv file
locations_csv = f'{region}-locations.csv'

# Set the path to the directory where route.geojson is stored
locations_csv_path = '.'

# Check if the CSV file exists
if not os.path.exists(os.path.join(locations_csv_path, locations_csv)):
    print(f"Error: {locations_csv} file not found.")
    exit(1)

# Read the locations from CSV
df = pd.read_csv(os.path.join(locations_csv_path, locations_csv))

# Create an empty DataFrame to store the distances
num_locations = len(df)
distance_matrix = pd.DataFrame(index=range(num_locations), columns=df['name'])
distance_matrix = distance_matrix.fillna(0)

# Iterate through each pair of locations in the DataFrame
for i in range(num_locations):
    for j in range(i + 1, num_locations):
        # Extract location details
        lat1 = df.loc[i, 'latitude']
        lon1 = df.loc[i, 'longitude']
        lat2 = df.loc[j, 'latitude']
        lon2 = df.loc[j, 'longitude']

        # Prepare API request URL
        osrm_endpoint = 'http://localhost:5000/route/v1/driving'
        request_url = f"{osrm_endpoint}/{lon1},{lat1};{lon2},{lat2}?steps=true&geometries=polyline"

        # Send request to OSRM
        response = requests.get(request_url)
        route_data = response.json()

        # Check if response contains a route
        if 'routes' in route_data and len(route_data['routes']) > 0:
            # Extract distance
            distance = route_data['routes'][0]['distance']

            # Update the distance matrix
            distance_matrix.at[i, df.loc[j, 'name']] = distance
            distance_matrix.at[j, df.loc[i, 'name']] = distance

# Save the distance matrix to a CSV file for pruning
distance_matrix.to_csv(f'{region}-distances.csv', index_label='name')

print(f"Distances saved as {region}-distances.csv")


#### Implementing Geo Pruning

In [None]:
def calculate_distance(coord1, coord2):
    return geodesic(coord1, coord2).meters

def geo_pruning(x, df, k, b):
    name = x.name
    distances = []
    x1 = df.loc[df['name'] == name, 'latitude'].values
    y1 = df.loc[df['name'] == name, 'longitude'].values
    for xx in df['name']:
        x2 = df.loc[df['name'] == xx, 'latitude'].values[0]
        y2 = df.loc[df['name'] == xx, 'longitude'].values[0]
        distance = geodesic((x1, y1), (x2, y2)).meters
        distances.append(distance)
    knn = sorted(distances)[k]
    distances = [d if d <= knn * b else 0 for d in distances]
    return distances

def get_polyline_string(origin, destination):
    url = f"http://router.project-osrm.org/route/v1/driving/{origin[1]},{origin[0]};{destination[1]},{destination[0]}"
    response = requests.get(url)
    data = response.json()
    if response.status_code == 200 and data.get("code") == "Ok":
        route = data.get("routes")[0]
        polyline_string = route.get("geometry")
        return polyline_string
    else:
        return None

def visualise_geo_pruned_routes(locations_df, distances_geo_df, region, k, b):
    # Create a map centered around the first location
    start_lat = locations_df.loc[0, 'latitude']
    start_lon = locations_df.loc[0, 'longitude']
    m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

    # Apply geo_pruning function to get the pruned distances
    pruned_distances = distances_geo_df.apply(lambda x: geo_pruning(x, locations_df, k, b), axis=1)

    # Create routes DataFrame from k-NN Route results
    routes_data = []
    for name, row in knn_routes_df.iterrows():
        nearest_neighbors = [col for col, distance in zip(Distances.index, row) if distance > 0]
        for neighbor in nearest_neighbors:
            routes_data.append({'name1': name, 'name2': neighbor})
    routes_df = pd.DataFrame(routes_data)

    # Iterate through the routes_df DataFrame and update the geometry column
    route_coordinates = []
    for i, row in routes_df.iterrows():
        idx1 = row['name1']  # Use the index as location name
        idx2 = row['name2']  # Use the index as location name

        origin = (locations_df.loc[idx1, 'latitude'], locations_df.loc[idx1, 'longitude'])
        destination = (locations_df.loc[idx2, 'latitude'], locations_df.loc[idx2, 'longitude'])
        geometry_str = get_polyline_string(origin, destination)
        routes_df.at[i, 'geometry'] = geometry_str

        # Skip rows with NaN 'geometry' values
        if pd.isna(geometry_str):
            print(f"Warning: No route between {idx1} and {idx2}, skipping.")
            continue

        # Attempt to decode the polyline
        try:
            coordinates = polyline.decode(geometry_str)
            route_coordinates.append(coordinates)
        except ValueError:
            print(f"Warning: Invalid 'geometry' value for {idx1} to {idx2}, skipping.")
            continue

        # Continue with existing code
        polyline_obj = folium.PolyLine(
            locations=coordinates,
            color='blue',
            weight=2,
            opacity=0.9,
            popup=f"{idx1} to {idx2}"
        )
        polyline_obj.add_to(m)

    # Add markers for each location
    for i, row in locations_df.iterrows():
        name = row['name']
        lat = row['latitude']
        lon = row['longitude']
        folium.CircleMarker(
            location=(lat, lon),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name
        ).add_to(m)
        
    # Save the route_coordinates to the CSV file
    csv_file = f"{region}-geo-pruned-route-coords.csv"
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["latitude", "longitude"])
        for coordinates in route_coordinates:
            writer.writerows(coordinates)

    print(f"Coordinates saved as {csv_file}")

    # Save the map as an HTML file
    map_file = f'{region}-geo-pruned-route-map.html'
    m.save(map_file)
    print(f"Map saved as {map_file}")

    # Display the map
    return m

# Load data here (Locations and Distances DataFrames)
Locations = pd.read_csv(f'{region}-locations.csv')

Distances = pd.read_csv(f'{region}-distances.csv', index_col='name')

# Set parameters for geo_pruning
k_value = 6
b_value = 1.1

# Visualise geo_pruning results
m_geo_pruned = visualise_geo_pruned_routes(Locations, Distances, region, k_value, b_value)

# Display map
m_geo_pruned

#### Implementing Triangle Pruning

In [None]:
def calculate_distance(coord1, coord2):
    return geodesic(coord1, coord2).meters

def triangle_pruning(x, df, t):
    name = x.name
    distances = []
    x1 = df.loc[df['name'] == name, 'latitude'].values[0]
    y1 = df.loc[df['name'] == name, 'longitude'].values[0]
    for xx in df['name']:
        x2 = df.loc[df['name'] == xx, 'latitude'].values[0]
        y2 = df.loc[df['name'] == xx, 'longitude'].values[0]
        distance = geodesic((x1, y1), (x2, y2)).meters
        distances.append(distance)
    
    nearest_neighbors = sorted(enumerate(distances), key=lambda x: x[1])[:3]
    pruning_distances = [d for idx, d in nearest_neighbors if d <= t]
    
    return pruning_distances

def visualise_triangle_pruned_routes(locations_df, distances_geo_df, region, t):
    # Create a map centered around the first location
    start_lat = locations_df.loc[0, 'latitude']
    start_lon = locations_df.loc[0, 'longitude']
    m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

    # Apply triangle_pruning function to get the pruned distances
    pruned_distances = distances_geo_df.apply(triangle_pruning, args=(locations_df, t,))
    
    # Create routes DataFrame from k-NN Route results
    routes_data = []
    for name, row in knn_routes_df.iterrows():
        nearest_neighbors = [col for col, distance in zip(Distances.index, row) if distance > 0]
        for neighbor in nearest_neighbors:
            routes_data.append({'name1': name, 'name2': neighbor})
    routes_df = pd.DataFrame(routes_data)

    # Iterate through the routes_df DataFrame and update the geometry column
    route_coordinates = []
    for i, row in routes_df.iterrows():
        idx1 = row['name1']  # Use the index as location name
        idx2 = row['name2']  # Use the index as location name

        origin = (locations_df.loc[idx1, 'latitude'], locations_df.loc[idx1, 'longitude'])
        destination = (locations_df.loc[idx2, 'latitude'], locations_df.loc[idx2, 'longitude'])
        geometry_str = get_polyline_string(origin, destination)
        routes_df.at[i, 'geometry'] = geometry_str

        # Skip rows with NaN 'geometry' values
        if pd.isna(geometry_str):
            print(f"Warning: No route between {idx1} and {idx2}, skipping.")
            continue

        # Attempt to decode the polyline
        try:
            coordinates = polyline.decode(geometry_str)
            route_coordinates.append(coordinates)
        except ValueError:
            print(f"Warning: Invalid 'geometry' value for {idx1} to {idx2}, skipping.")
            continue

        # Continue with existing code
        polyline_obj = folium.PolyLine(
            locations=coordinates,
            color='blue',
            weight=2,
            opacity=0.9,
            popup=f"{name1} to {name2}"
        )
        polyline_obj.add_to(m)

    # Add markers for each location
    for i, row in locations_df.iterrows():
        name = row['name']
        lat = row['latitude']
        lon = row['longitude']
        folium.CircleMarker(
            location=(lat, lon),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name
        ).add_to(m)
        
    # Save the route_coordinates to the CSV file
    csv_file = f"{region}-triangle-pruned-route-coords.csv"
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["latitude", "longitude"])
        for coordinates in route_coordinates:
            writer.writerows(coordinates)

    print(f"Coordinates saved as {csv_file}")

    # Save the map as an HTML file
    map_file = f'{region}-triangle-pruned-route-map.html'
    m.save(map_file)
    print(f"Map saved as {map_file}")

    # Display the map
    return m

# Load data here (Locations and Distances DataFrames)
Locations = pd.read_csv(f'{region}-locations.csv')

Distances = pd.read_csv(f'{region}-distances.csv', index_col='name')

# Set parameter for triangle_pruning
t_value = 10000  # Adjust this threshold value as needed

# Visualise triangle_pruning results
m_triangle_pruned = visualise_triangle_pruned_routes(Locations, Distances, region, t_value)

# Display map
m_triangle_pruned

#### Implementing KNN Pruning

In [None]:
def calculate_distance(coord1, coord2):
    return geodesic(coord1, coord2).meters

def knn_route(x, df, k):
    name = x.name
    distances = []
    x1 = df.loc[df['name'] == name, 'latitude'].values[0]
    y1 = df.loc[df['name'] == name, 'longitude'].values[0]
    for xx in df['name']:
        x2 = df.loc[df['name'] == xx, 'latitude'].values[0]
        y2 = df.loc[df['name'] == xx, 'longitude'].values[0]
        distance = geodesic((x1, y1), (x2, y2)).meters
        distances.append(distance)
    
    nearest_neighbors = sorted(enumerate(distances), key=lambda x: x[1])[1:k+1]  # Exclude the location itself
    return [distances[idx] for idx, _ in nearest_neighbors]

def visualise_knn_routes(locations_df, k):
    # Create a map centered around the first location
    start_lat = locations_df.loc[0, 'latitude']
    start_lon = locations_df.loc[0, 'longitude']
    m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

    # Group locations by name
    grouped_locations = locations_df.groupby('name')

    # Apply knn_route function to get the k-nearest neighbors distances for each group
    knn_distances = grouped_locations.apply(knn_route, df=locations_df, k=k)
    
    # Create routes DataFrame from k-NN Route results
    routes_data = []
    for name, row in knn_routes_df.iterrows():
        nearest_neighbors = [col for col, distance in zip(Distances.index, row) if distance > 0]
        for neighbor in nearest_neighbors:
            routes_data.append({'name1': name, 'name2': neighbor})
    routes_df = pd.DataFrame(routes_data)

    # Iterate through the routes_df DataFrame and update the geometry column
    route_coordinates = []
    for i, row in routes_df.iterrows():
        idx1 = row['name1']  # Use the index as location name
        idx2 = row['name2']  # Use the index as location name

        origin = (locations_df.loc[idx1, 'latitude'], locations_df.loc[idx1, 'longitude'])
        destination = (locations_df.loc[idx2, 'latitude'], locations_df.loc[idx2, 'longitude'])
        geometry_str = get_polyline_string(origin, destination)
        routes_df.at[i, 'geometry'] = geometry_str

        # Skip rows with NaN 'geometry' values
        if pd.isna(geometry_str):
            print(f"Warning: No route between {idx1} and {idx2}, skipping.")
            continue

        # Attempt to decode the polyline
        try:
            coordinates = polyline.decode(geometry_str)
            route_coordinates.append(coordinates)
        except ValueError:
            print(f"Warning: Invalid 'geometry' value for {idx1} to {idx2}, skipping.")
            continue

        # Continue with existing code
        polyline_obj = folium.PolyLine(
            locations=coordinates,
            color='blue',
            weight=2,
            opacity=0.9,
            popup=f"{name1} to {name2}"
        )
        polyline_obj.add_to(m)

    # Add markers for each location
    for i, row in locations_df.iterrows():
        name = row['name']
        lat = row['latitude']
        lon = row['longitude']
        folium.CircleMarker(
            location=(lat, lon),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name
        ).add_to(m)
        
    # Save the route_coordinates to the CSV file
    csv_file = f"{region}-knn-pruned-route-coords.csv"
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["latitude", "longitude"])
        for coordinates in route_coordinates:
            writer.writerows(coordinates)

    print(f"Coordinates saved as {csv_file}")

    # Save the map as an HTML file
    map_file = f'knn-route-map.html'
    m.save(map_file)
    print(f"Map saved as {map_file}")

    # Display the map
    return m

# Load data here (Locations DataFrame)
Locations = pd.read_csv(f'{region}-locations.csv')

# Set parameter for k-NN route
k_value = 3  # Adjust this value for the number of nearest neighbors

# Visualise k-NN route results
m_knn_route = visualise_knn_routes(Locations, k_value)

# Display map
m_knn_route


#### Implementing Sequential Pruning

In [None]:
def calculate_distance(coord1, coord2):
    return geodesic(coord1, coord2).meters

def get_polyline_string(origin, destination):
    url = f"http://router.project-osrm.org/route/v1/driving/{origin[1]},{origin[0]};{destination[1]},{destination[0]}"
    response = requests.get(url)
    data = response.json()
    if response.status_code == 200 and data.get("code") == "Ok":
        route = data.get("routes")[0]
        polyline_string = route.get("geometry")
        return polyline_string
    else:
        return None

def visualise_routes(locations_df, routes_df, region):
    route_coordinates = []
    start_lat = locations_df.loc[0, 'latitude']
    start_lon = locations_df.loc[0, 'longitude']
    m = folium.Map(location=[start_lat, start_lon], zoom_start=6)

    # Iterate through the routes_df DataFrame and update the geometry column
    for i, row in routes_df.iterrows():
        idx1 = row['name1']  # Use the index as location name
        idx2 = row['name2']  # Use the index as location name

        origin = (locations_df.loc[idx1, 'latitude'], locations_df.loc[idx1, 'longitude'])
        destination = (locations_df.loc[idx2, 'latitude'], locations_df.loc[idx2, 'longitude'])
        geometry_str = get_polyline_string(origin, destination)
        routes_df.at[i, 'geometry'] = geometry_str

        # Skip rows with NaN 'geometry' values
        if pd.isna(geometry_str):
            print(f"Warning: No route between {idx1} and {idx2}, skipping.")
            continue

        # Attempt to decode the polyline
        try:
            coordinates = polyline.decode(geometry_str)
            route_coordinates.append(coordinates)
        except ValueError:
            print(f"Warning: Invalid 'geometry' value for {idx1} to {idx2}, skipping.")
            continue

        # Continue with existing code
        polyline_obj = folium.PolyLine(
            locations=coordinates,
            color='blue',
            weight=2,
            opacity=0.9,
            popup=f"{idx1} to {idx2}"
        )
        polyline_obj.add_to(m)

    # Add markers for each location
    for i, row in locations_df.iterrows():
        name = row['name']
        lat = row['latitude']
        lon = row['longitude']
        folium.CircleMarker(
            location=(lat, lon),
            radius=6,
            color='red',
            fill=True,
            fill_color='blue',
            popup=name
        ).add_to(m)
        
    # Save the route_coordinates to the CSV file
    csv_file = f"{region}-pruned-route-coords.csv"
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["latitude", "longitude"])
        for coordinates in route_coordinates:
            writer.writerows(coordinates)

    print(f"Coordinates saved as {csv_file}")

    # Save the map as an HTML file
    map_file = f'{region}-pruned-route-map.html'
    m.save(map_file)
    print(f"Map saved as {map_file}")

    # Display the map
    return m

def triangle_pruning(x, df, t):
    name = x.name
    distances = []
    x1 = df.loc[df['name'] == name, 'latitude'].values[0]
    y1 = df.loc[df['name'] == name, 'longitude'].values[0]
    for xx in df['name']:
        x2 = df.loc[df['name'] == xx, 'latitude'].values[0]
        y2 = df.loc[df['name'] == xx, 'longitude'].values[0]
        distance = geodesic((x1, y1), (x2, y2)).meters
        distances.append(distance)
    return distances

def geo_pruning(x, df, k, b):
    name = x.name
    distances = []
    x1 = df.loc[df['name'] == name, 'latitude'].values[0]
    y1 = df.loc[df['name'] == name, 'longitude'].values[0]
    for xx in df['name']:
        x2 = df.loc[df['name'] == xx, 'latitude'].values[0]
        y2 = df.loc[df['name'] == xx, 'longitude'].values[0]
        distance = geodesic((x1, y1), (x2, y2)).meters
        distances.append(distance)
    knn = sorted(distances)[k]
    distances = [d if d <= knn * b else 0 for d in distances]
    return distances


def knn_route(x, df, k):
    name = x.name
    distances = []
    x1 = df.loc[df['name'] == name, 'latitude'].values[0]
    y1 = df.loc[df['name'] == name, 'longitude'].values[0]
    for xx in df['name']:
        x2 = df.loc[df['name'] == xx, 'latitude'].values[0]
        y2 = df.loc[df['name'] == xx, 'longitude'].values[0]
        distance = geodesic((x1, y1), (x2, y2)).meters
        distances.append(distance)
    nearest_neighbors = sorted(enumerate(distances), key=lambda x: x[1])[1:k+1]
    return [distances[idx] for idx, _ in nearest_neighbors]


# Load data here (Locations and Distances DataFrames)
Locations = pd.read_csv(f'{region}-locations.csv')

Distances = pd.read_csv(f'{region}-distances.csv', index_col='name')

# Set parameters for pruning and k-NN route
t_value = 10000  # Adjust this value for triangle pruning
k_value = 3  # Adjust this value for the number of nearest neighbors
k_distance = 10000  # Adjust this value for k-NN route

# Triangle Pruning
pruned_distances_df = Distances.apply(triangle_pruning, args=(Locations, t_value,))

# Geo Pruning
geo_pruned_distances_df = pruned_distances_df.apply(geo_pruning, args=(Locations, k_value, 1.1))

# k-NN Route
knn_routes_df = geo_pruned_distances_df.apply(knn_route, args=(Locations, k_value))

# Create routes DataFrame from k-NN Route results
routes_data = []
for name, row in knn_routes_df.iterrows():
    nearest_neighbors = [col for col, distance in zip(Distances.index, row) if distance > 0]
    for neighbor in nearest_neighbors:
        routes_data.append({'name1': name, 'name2': neighbor})
routes_df = pd.DataFrame(routes_data)

# Visualise routes
m_routes = visualise_routes(Locations, routes_df, region)

# Display map
m_routes
