## AskStreets: Query and Visualizing Street Networks using OpenStreetMap, ArangoDB, and LangGraph
Author: Adam Munawar Rahman, March 2025

Using powerful libraries like OSMnx, we can retrieve street networks and feature datasets from OpenStreetMap and persist them as graph and collections in ArangoDB. Then, with a  ReACT agent model, feed natural language queries to LLMs to execute complex lookups, run GPU backed graph algorithms, and visualize geospatial coordinates - all to enable streamlined insights into the network properties of the geographic area we are analyzing.

In [None]:
import json
import os
import random
import re
import time
import traceback
import yaml

import folium
import geopandas as gpd

import networkx as nx
import numpy as np
import nx_arangodb as nxadb
import osmnx as ox
import pandas as pd

from arango import ArangoClient
from geopy.exc import GeocoderTimedOut, GeocoderUnavailable
from geopy.geocoders import Nominatim
from IPython.display import Markdown, display

from langchain.tools import Tool
from langchain_community.chains.graph_qa.arangodb import ArangoGraphQAChain
from langchain_community.graphs import ArangoGraph
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_ibm import ChatWatsonx
from langgraph.prebuilt import create_react_agent

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

### I. Loading OpenStreetMap Networks and Features to ArangoDB
We can retrieve both street networks (consisting of nodes and edges representing the different types of paths that can be traversed between the geographic locations) and features (consisting of points associated with specific attributes like amenity type) by calling `ox.graph_from_address` and `ox.features_from_address`. 

In this instance, I'm using my workplace in NYC - IBM at One Madison  Avenue - to build our datasets from.

In [None]:
# Retrieve the street network from OpenStreetMap using the built-in OSMnx function
G_ox = ox.graph_from_address("1 Madison Ave, Manhattan, NY", dist=250.0)

# # Drop the shapely.Linestring attribute as it is not serializable to JSON, preventing ArangoDB persistence
for node1, node2, edge_dict in G_ox.edges(data=True):
    edge_dict.pop('geometry', None)
    
print(f"Graph of street network within a 250 meter radius of One Madison Avenue: {G_ox}")

In [None]:
ox.plot_graph(G_ox)

In [None]:
# Since the nodes in the OSMnx graph do not include the address, we reverse geocode each node
geolocator = Nominatim(user_agent="osmnx_geocoder")
for node_id, node_data in G_ox.nodes(data=True):
    lat = node_data['y']
    lon = node_data['x']
    
    try:
        # Use reverse geocoding on points to get address information
        location = geolocator.reverse(f"{lat}, {lon}", exactly_one=True, timeout=None)
        if location:
            # Add address information to the node
            G_ox.nodes[node_id]['address'] = location.address

        # Avoid possible rate limiting by waiting a bit between each node
        time.sleep(1)

    except Exception as e:
        print(f"Error geocoding node {node_id}: {e}")

In [None]:
# The node identifiers change when persisting the graph to ArangoDB, so to allow the tools to better communicate with each other,
# e.g. when passing OSMnx results to the AQL tool, let's include current NetworkX node IDs so ArangoDB can access them
for node_id, node_data in G_ox.nodes(data=True):
    G_ox.nodes[node_id]['ID'] = node_id

In [None]:
madison_filepath = "./data/madison.graphml"

In [None]:
# Save street network graph 
ox.io.save_graphml(G_ox, madison_filepath)

In [None]:
# Load street network graph from saved file
G_ox = ox.io.load_graphml(madison_filepath)

In [None]:
node_list_with_addresses = list(G_ox.nodes(data=True))
print(f"Sample node data with address: {node_list_with_addresses[0]}")
print(f"Sample node data with address: {node_list_with_addresses[65]}")
print(f"Sample node data with address: {node_list_with_addresses[120]}")
print(f"Sample node data with address: {node_list_with_addresses[34]}")

edge_list = list(G_ox.edges(data=True))
print(f"Sample edge data: {edge_list [0]}")
print(f"Sample edge data: {edge_list [65]}")
print(f"Sample edge data: {edge_list [120]}")
print(f"Sample edge data: {edge_list [34]}")

In [None]:
# Retrieve OpenStreet map features within the area of the street network that we just received
# Refer to https://wiki.openstreetmap.org/wiki/Map_features for the specific features

# Collecting a large number of tags improves the diversity of queries we can make!
tags = {'building': True, 'amenity': True, 'healthcare': True, 'office': True, 'public_transport': True, 'craft': True, 'historic': True}
features_gdf = ox.features.features_from_address("1 Madison Ave, Manhattan, NY", tags, 250.0)

In [None]:
# This is to avoid warnings given by geopandas regarding the centroid accuracy
# Some of the geometries in the GeoDataFrame returned by OSMnx are polygons 
# that give a bounding box for a particular feature

# To simplify the process so that each feature has an associated lat and long,
# we just return the centroid of the geometry instead
# e.g. for a bounding box, just return the center point 
# this will be sufficient for performing calculations on said feature
utm_crs = ox.projection.project_gdf(features_gdf).crs
features_gdf['lat'] = features_gdf.to_crs(utm_crs).centroid.to_crs(features_gdf.crs).y
features_gdf['lon'] = features_gdf.to_crs(utm_crs).centroid.to_crs(features_gdf.crs).x

features_gdf_adjusted = features_gdf.drop(columns=['geometry','type'])

features_json_str = features_gdf_adjusted.reset_index().to_json(orient='records')
features_records = json.loads(features_json_str)

# OSMnx still pulls in many tag labels that resolve to None, let's remove them
for feature in features_records:
    for key, value in list(feature.items()):
        if value is None:
            del feature[key]

print(f"Sample feature data (Courthouse): {features_records[1]}\n")
print(f"Sample feature data (Citi Bike Rental): {features_records[43]}\n")
print(f"Sample feature data (Subway Platform): {features_records[418]}\n")
print(f"Sample feature data (Beam Charging Station): {features_records[97]}\n")
print(f"Sample feature data (School): {features_records[221]}\n")

Our graph network and features dictionary are now prepared to load into ArangoDB, let's connect to the ArangoDB instance and create the database to store our One Madison data.

In [None]:
# Loads in credentials file to dictionary, includes API keys and ArangoDB info
credentials = yaml.load(open('credentials.yml'), Loader=yaml.SafeLoader)

In [None]:
# ArangoDB credentials
adb_host = credentials["DATABASE_HOST"]
adb_user = credentials["DATABASE_USERNAME"]
adb_pass = credentials["DATABASE_PASSWORD"]

In [None]:
# Connect to the running ArangoDB instance - in my case, the Community Edition Docker container
client = ArangoClient(hosts=adb_host)
# Connect to the "_system" database
sys_db = client.db('_system',username=adb_user,password=adb_pass,verify=True)

# Create the database to store graph and features collection for One Madison Avenue
if not sys_db.has_database('madison'):
    sys_db.create_database('madison')

madison_db = client.db('madison',username=adb_user,password=adb_pass,verify=True)

In [None]:
# The OSM graph is a MultiDiGraph - i.e. a Directed Graph that contains nodes that can have multiple edges between them
# Let's make sure we persist it in ArangoDB as the same type
G_ox_adb = nxadb.MultiDiGraph(
    name="G_ox_adb",
    db=madison_db,
    incoming_graph_data=G_ox,
    overwrite_graph=True
)
print(f"Graph of One Madison Ave street network persisted to ArangoDB: {G_ox_adb}")

# Jupyter Notebook occasionally misreports the graph has having 0 edges when loaded into ArangoDB
# Verify that the correct number of nodes and edges are there
print(f"True number of nodes in G_ox_adb is: {G_ox_adb.number_of_nodes()}")
print(f"True number of edges in G_ox_adb is: {G_ox_adb.number_of_edges()}")

# Sometimes the true number of edges is also reported as 0, so I re-run the cell to reimport the graph

In [None]:
# Persist the features JSON to ArangoDB as a collection so it is accessible via AQL
if madison_db.has_collection('features'):
    madison_db.delete_collection('features')
features_collection = madison_db.create_collection('features')

features_collection.insert_many(features_records)
print(f"Added {len(features_records)} feature records to ArangoDB 'features' collection in the Madison database")

In [None]:
# Initialize the ArangoGraph LangChain wrapper for the One Madison Avenue database
arango_graph = ArangoGraph(madison_db)

### II. Defining the LLM-based tools for the ReACT Agent App
In this portion of the code we define four AI tools:
1. `text_to_aql_to_text` - Generates ArangoDB Query Language based on the natural language query
2. `text_to_osmnx_algorithm_to_text` - Generates OSMnx/NetworkX Python code based on the natural language query
3. `text_to_geocoder_to_coordinates` - Extracts the geographic location from the natural language query and geocodes it
4. `text_to_coordinates_to_folium_map` - Extracts geospatial coordinates from the query and plots them on a Folium map

In [None]:
# Sets the LLM provider
# Choice of 'OPENAI', 'ANTHROPIC', and 'WATSONX'
AI_PLATFORM = "OPENAI"

In [None]:
if AI_PLATFORM == "ANTHROPIC":
    os.environ["ANTHROPIC_API_KEY"] = credentials["ANTHROPIC_API_KEY"]
elif AI_PLATFORM == "OPENAI":
    os.environ["OPENAI_API_KEY"] = credentials["OPENAI_API_KEY"]
elif AI_PLATFORM == "WATSONX":
    os.environ["WATSONX_API_KEY"] = credentials["WATSONX_API_KEY"]

In [None]:
@tool
def text_to_aql_to_text(query):
    """
    Translates natural language to AQL, executes against Arango database, returns results as text.

    USE THIS TOOL WHEN:
    - Querying the graph in ArangoDB (nodes, edges, features collections)
    - Finding relationships/paths between locations or buildings
    - Performing aggregations, filters, or sorting on stored data
    - Looking up properties like names, addresses, or building types
    - Counting or listing street network elements with specific attributes

    DO NOT USE WHEN:
    - Working with the in-memory OSMnx graph object
    - Calculating network metrics like centrality or clustering
    - Performing advanced spatial operations
    """
    print(f"🔍 AQL Tool: Starting to process query: '{query}'")

    if AI_PLATFORM == "ANTHROPIC":
        llm = ChatAnthropic(temperature=0, model_name="claude-3-7-sonnet-20250219")
    elif AI_PLATFORM == "OPENAI":
        llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
    elif AI_PLATFORM == "WATSONX":
        llm = ChatWatsonx(temperature=0, model_name="ibm/granite-34b-code-instruct")

    print(f"🔍 AQL Tool: Initialized LLM")

    # Enhance the query by adding context to the original question posed by the user
    # referencing the attributes of the graphs and collections store in ArangoDB

    schema_context = """
    You are an experienced graph network analyst proficient in writing ArangoDB Query Language.

    This query is about data including but not limited to these collections:

    1. G_ox_adb_node: Street intersections with attributes:
    - x, y: Coordinates (longitude, latitude)
    - osmid: OpenStreetMap ID
    - highway: Type of intersection (junction, traffic_signals, etc.)
    - address: Real world location of the node
    - ID: The original ID from the NetworkX version of the graph

    2. G_ox_adb_edge: Street segments connecting intersections
    - length: length in meters
    - name: Street name
    - oneway: Boolean indicating if it's a one-way street

    3. features: Features of the area with attributes:
    - osmid: OpenStreetMap ID
    - element_type: 'node' or 'way'
    - the following OpenStreetMap feature tags
        - building: Boolean yes/no indicating if the feature is a building
        - amenity: The type of amenity the feature is e.g. cafe, bank
        - healthcare: The health specialty e.g. nurse, optometrist
        - office: The type of company the office is for e.g. financial, airline
        - public_transport: The type of public transport feature e.g. platform, station
        - craft: Describes the type of craft the feature is for e.g. bakery, jeweller
        - historic: Describes the type of historic site, e.g. house, memorial
    - name: Name of the feature
    - lat, lon: geospatial coordinates of the feature

    # TASK
    Generate executable AQL to answer this query. 
    
    If the query asks for a path to the nearest feature, first find a node on the graph that is closest to that feature, 
    then run the algorithm.

    If the query specifies node IDs, use the ID attribute of the node. 
    For example, if the query asks "Find the address of the intersection with node ID 247798296.", then use node.ID instead of node._key.

    If the query specifies a generic location type like 'cafe', 'office', 'subway' or 'bank', use the amenity attribute. 
    Use the other OpenStreetMap feature tags if the query is more specific.

    For example, if there is a building located at a certain latitude and longitude, and the query asks for a path to get to that building,
    first find a node in the graph that has the nearest straight line distance to that building, then use that node for the traversal algorithm.

    Focus on writing efficient AQL queries that:
    1. Use FILTER, SORT, LIMIT as needed to optimize performance
    2. Use graph traversals for finding paths and connections
    3. Return well-structured data that directly answers the question
    4. Include relevant coordinates and attributes in the results
    """
    context = schema_context
    enhanced_query = f"{context} Query: {query}"

    try:
        chain = ArangoGraphQAChain.from_llm(
            llm=llm, graph=arango_graph, verbose=True, allow_dangerous_requests=True
        )

        print("🔍 AQL Tool: Executing query against ArangoDB")
        result = chain.invoke(enhanced_query)
        return str(result["result"])
        print("🔍 AQL Tool: Completed AQL query ✅")

    except Exception as e:
        print(f"🔍 AQL Tool: ERROR executing query: {str(e)} ❌")
        return f"Error executing the database query: {str(e)}"


In [None]:
@tool
def text_to_osmnx_algorithm_to_text(query):
    """
    Executes OSMnx/NetworkX algorithms on the in-memory street network graph.

    USE THIS TOOL WHEN:
    - Calculating network metrics (centrality, connectivity, clustering)
    - Finding paths using specialized algorithms
    - Analyzing network topology and structure
    - Performing spatial operations (nearest nodes, isochrones)
    - Computing network statistics or identifying critical components

    DO NOT USE WHEN:
    - Querying existing database records
    - Looking up specific street or building properties
    - Creating visualizations
    - Geocoding addresses
    """
    print(f"🚏 OSMnx Tool: Starting to process query: '{query}'")

    if AI_PLATFORM == "ANTHROPIC":
        llm = ChatAnthropic(temperature=0, model_name="claude-3-7-sonnet-20250219")
    elif AI_PLATFORM == "OPENAI":
        llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
    elif AI_PLATFORM == "WATSONX":
        llm = ChatWatsonx(temperature=0, model_name="ibm/granite-34b-code-instruct")

    print("🚏 OSMnx Tool: Initialized LLM")

    # Step 1: Generate OSMnx code
    print("🚏 OSMnx Tool: 1) Generating OSMnx code")

    code_generation_prompt = f"""
    You are an expert in network analysis using OSMnx and NetworkX. 
    I need you to translate a natural language query into precise Python code.
    
    # GRAPH SCHEMA
    I have an OSMnx Graph called `G_ox` with the following schema: {arango_graph.schema}
    
    # QUERY TO ANALYZE
    Natural language query: {query}
    
    # TASK
    Generate executable Python code to answer this query using NetworkX/OSMnx algorithms.
    
    # GRAPH DETAILS
    The `G_ox` graph is an in-memory OSMnx graph representing a street network with these characteristics:
    - Each node represents an intersection with geospatial coordinates
    - Each edge represents a street segment connecting intersections
    - Nodes have attributes like 'x' and 'y' coordinates, 'osmid', and sometimes 'address'
    - Edges have attributes like 'length', 'name', 'highway' type, and sometimes 'oneway'
    
    # ALGORITHM SELECTION GUIDELINES
    - For centrality: Use nx.betweenness_centrality(), nx.closeness_centrality(), or nx.degree_centrality()
    - For connectivity: Use nx.node_connectivity(), nx.edge_connectivity(), or nx.is_strongly_connected()
    - For clustering: Use nx.clustering(), or community detection algorithms
    - For shortest paths: Use nx.shortest_path(), nx.shortest_path_length(), or ox.distance.shortest_path()
    - For accessibility: Compute isochrones or service areas using nx.ego_graph() or custom functions
    - For statistics: Use ox.stats.basic_stats() or custom calculations
    
    # CODE STRUCTURE
    - Include descriptive variable names and brief comments
    - For node lookups by coordinates: Use `node_id = ox.distance.nearest_nodes(G_ox, lon, lat)`
    - For path finding between points: First find nearest nodes, then compute shortest path
    - Focus on the specific algorithm needed rather than general exploration
    - Set the final answer as `FINAL_RESULT` - this should be a CONCISE representation of the answer
    - For visualization queries or path analyses, include relevant coordinates in the result
    - Format results for human readability (round numbers, provide units, use descriptive labels)
    
    Only provide Python code that can be executed via `exec()`. No explanations or markdown.
    """

    text_to_osmnx = llm.invoke(code_generation_prompt).content
    text_to_osmnx_cleaned = re.sub(
        r"^```python\n|```$", "", text_to_osmnx, flags=re.MULTILINE
    ).strip()

    print("🚏 OSMnx Tool: Generated code:")
    print("-" * 50)
    print(text_to_osmnx_cleaned)
    print("-" * 50)

    # Step 2: Execute OSMnx code
    print("\n🚏 OSMnx Tool: 2) Executing OSMnx code")
    global_vars = {"G_ox": G_ox, "ox": ox, "nx": nx, "pd": pd, "np": np}
    local_vars = {}

    try:
        exec(text_to_osmnx_cleaned, global_vars, local_vars)
        print("🚏 OSMnx Tool: Code executed successfully! ✅")

    except Exception as e:
        print(f"🚏 OSMnx Tool: EXEC ERROR: {str(e)} ❌")

        # Code correction mechanism
        MAX_ATTEMPTS = 3
        for attempt in range(1, MAX_ATTEMPTS + 1):
            print(
                f"🚏 OSMnx Tool: Attempting code correction: Attempt {attempt}/{MAX_ATTEMPTS}"
            )

            correction_prompt = f"""
            The following OSMnx/NetworkX code failed with error: {str(e)}
            
            Original code:
            ```python
            {text_to_osmnx_cleaned}
            ```
            
            Fix the code to properly execute and answer the query: "{query}"
            
            Common issues to check:
            1. Node or edge access methods (G_ox.nodes vs G_ox.nodes() or attribute access)
            2. Parameter types or values (ensure coordinates are float, IDs are correct types)
            3. Missing error handling (add try/except for node lookups, path finding)
            4. Incorrect attribute names (verify the attributes actually exist in the graph)
            5. Algorithm parameter requirements (some functions need specific inputs)
            
            Provide only the corrected code, no explanations.
            """

            corrected_code = llm.invoke(correction_prompt).content
            corrected_code_cleaned = re.sub(
                r"^```python\n|```$", "", corrected_code, flags=re.MULTILINE
            ).strip()

            print(f"🚏 OSMnx Tool: Corrected code (attempt {attempt}):")
            print("-" * 50)
            print(corrected_code_cleaned)
            print("-" * 50)

            try:
                exec(corrected_code_cleaned, global_vars, local_vars)
                text_to_osmnx_final = corrected_code_cleaned
                print(f"🚏 OSMnx Tool: Correction successful on attempt {attempt} ✅")
                break
            except Exception as e2:
                print(f"🚏 OSMnx Tool: Correction attempt {attempt} failed: {str(e2)} ❌")

                if attempt == MAX_ATTEMPTS:
                    error_msg = f"Unable to execute the OSMnx algorithm after {MAX_ATTEMPTS} attempts. Last error: {str(e2)}"
                    print(f"🚏 OSMnx Tool: {error_msg} ❌")
                    return error_msg

    if "FINAL_RESULT" not in local_vars:
        error_msg = (
            "Error: The code executed but did not set the FINAL_RESULT variable."
        )
        print(f"🚏 OSMnx Tool: {error_msg} ❌")
        return error_msg

    FINAL_RESULT = local_vars["FINAL_RESULT"]
    print("🚏 OSMnx Tool: Execution completed with result:")
    print("-" * 50)
    print(f"FINAL_RESULT: {FINAL_RESULT}")
    print("-" * 50)

    # Step 3: Formulate final answer
    print("🚏 OSMnx Tool: 3) Formulating final answer")

    natural_language_prompt = f"""
        # QUERY AND RESULTS
        Original query: "{query}"
        
        Analysis result: {FINAL_RESULT}
        
        # TASK
        Provide a clear, concise response that directly answers the original query based on the analysis result.
        
        Guidelines:
        - Explain what the result means in plain language
        - If the result is numeric, provide context for interpreting it
        - If the result identifies specific locations/streets, name them explicitly
        - For network metrics, explain their significance briefly (e.g., "high betweenness centrality means...")
        - If the result contains coordinates, format them clearly for potential visualization
        - Do not mention the code or algorithms used unless specifically asked
        
        Your response:
    """

    osmnx_to_text = llm.invoke(natural_language_prompt).content
    print("🚏 OSMnx Tool: Final answer:")
    print("-" * 50)
    print(osmnx_to_text)
    print("-" * 50)
    print("🚏 OSMnx Tool: Processing complete ✅")

    return osmnx_to_text


In [None]:
@tool
def text_to_geocoder_to_coordinates(query):
    """
    Extracts location names from query, geocodes them, returns latitude and longitude values.

    USE THIS TOOL WHEN:
    - A specific location name is mentioned in the query and geospatial coordinates are required

    DO NOT USE THIS TOOL WHEN:
    - Nonspecific location names like 'cafe' or 'park' are named in the query
    """
    print(f"📍 GEOCODER: Processing query: '{query}'")

    geolocator = Nominatim(user_agent="react_agent_geocoder")

    if AI_PLATFORM == "ANTHROPIC":
        llm = ChatAnthropic(temperature=0, model_name="claude-3-7-sonnet-20250219")
    elif AI_PLATFORM == "OPENAI":
        llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
    elif AI_PLATFORM == "WATSONX":
        llm = ChatWatsonx(temperature=0, model_name="ibm/granite-34b-code-instruct")

    print("📍 GEOCODER: Initialized LLM")

    # Extract location entities using LLM
    location_extraction_prompt = f"""
    Extract all location names from the following query. 
    Include cities, addresses, landmarks, neighborhoods, regions, or any other geographic references.
    Return ONLY the location names separated by semicolons.
    If no locations are found, return "NO_LOCATION".
    
    Query: {query}
    
    Location names:
    """

    location_extraction = llm.invoke(location_extraction_prompt).content

    # Process extracted locations
    possible_locations = (
        [loc.strip() for loc in location_extraction.split(";")]
        if location_extraction and location_extraction != "NO_LOCATION"
        else [query]
    )
    print(f"📍 GEOCODER: Extracted locations: {possible_locations}")

    # Process locations
    results = []
    for location in possible_locations:
        try:
            geocode_result = geolocator.geocode(
                location, exactly_one=True, language="en", timeout=None
            )

            if geocode_result:
                print(
                    f"📍 GEOCODER: Found {location} at {geocode_result.latitude}, {geocode_result.longitude}"
                )
                results.append(
                    {
                        "query_text": location,
                        "geocoded": {
                            "name": geocode_result.address,
                            "lat": geocode_result.latitude,
                            "lon": geocode_result.longitude,
                        },
                    }
                )
        except Exception as e:
            print(f"📍 GEOCODER: Error with {location}: {str(e)} ❌")
            continue

    # Try suggestion if no results found
    if not results:
        suggestion_prompt = """
        I could not geocode any locations from the query: "{query}"
        Suggest a better location reference or return "NO_SUGGESTION".
        """

        suggestion = llm.invoke(suggestion_prompt.format(query=query)).content

        if suggestion and suggestion != "NO_SUGGESTION":
            try:
                geocode_result = geolocator.geocode(
                    suggestion, exactly_one=True, language="en", timeout=None
                )
                if geocode_result:
                    results.append(
                        {
                            "query_text": suggestion,
                            "geocoded": {
                                "name": geocode_result.address,
                                "lat": geocode_result.latitude,
                                "lon": geocode_result.longitude,
                            },
                        }
                    )
            except Exception:
                pass

    # Return error if no results found
    if not results:
        return {
            "success": False,
            "error": "Could not geocode any locations from the query",
            "query": query,
        }

    # Narrow it down further
    primary_result = results[0]
    if len(results) > 1:
        location_names = [r["geocoded"]["name"] for r in results]

        primary_prompt = """
        Which ONE location is the primary focus of this query: "{query}"
        Locations: {locations}
        Return ONLY the name of the primary location.
        """

        primary_name = llm.invoke(
            primary_prompt.format(query=query, locations=", ".join(location_names))
        ).content

        # Find closest match
        for result in results:
            if primary_name.lower() in result["geocoded"]["name"].lower():
                primary_result = result
                break

    print(
        f"📍 GEOCODER: Identified primary location: {primary_result['geocoded']['name']} ✅"
    )

    return {
        "success": True,
        "locations": results,
        "primary_location": primary_result["geocoded"],
        "query": query,
    }


In [None]:
@tool
def text_to_coordinates_to_folium_map(query):
    """
    Visualize coordinates extracted from a query on a Folium map in a Jupyter notebook.

    This tool is always delivered as the end result of a visualization query.

    USE THIS TOOL WHEN:
    - The query mentions visualization or mapping and the result from the previous tool returns a list of coordinates
    """
    
    print(f"🌎 VISUALIZATION: Starting to process query: '{query}'")

    if AI_PLATFORM == "ANTHROPIC":
        llm = ChatAnthropic(temperature=0, model_name="claude-3-7-sonnet-20250219")
    elif AI_PLATFORM == "OPENAI":
        llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
    elif AI_PLATFORM == "WATSONX":
        llm = ChatWatsonx(temperature=0, model_name="ibm/granite-34b-code-instruct")
        
    print("🌎 VISUALIZATION: Initialized LLM")

    # Extract coordinates using LLM
    extraction_prompt = f"""
    Extract all geographic coordinates from the following text.
    Return ONLY a JSON array of objects with 'lat' and 'lon' properties.
    If a location name is mentioned instead of coordinates, do not include it.
    Only include explicitly mentioned coordinates.
    
    Example valid response:
    [
      {{"lat": 40.7411, "lon": -73.9897, "name": "Madison Square Park"}},
      {{"lat": 40.7128, "lon": -74.0060}}
    ]
    
    If you can identify names for the points, include them as a "name" property.
    If no coordinates are found, return an empty array: []
    
    Text: {query}
    
    JSON array:
    """

    print("🌎 VISUALIZATION: Sending coordinate extraction prompt to LLM")

    # Get coordinates from LLM
    extraction_result = llm.invoke(extraction_prompt).content
    print("🌎 VISUALIZATION: Received extraction result from LLM")

    # Clean up the response to ensure it's valid JSON
    # Sometimes the LLM adds text before or after the JSON array
    json_match = re.search(r"\[\s*\{.*\}\s*\]", extraction_result, re.DOTALL)
    if json_match:
        clean_json = json_match.group(0)
    else:
        clean_json = extraction_result

    # Parse the JSON response
    try:
        coordinates_list = json.loads(clean_json)
        print("🌎 VISUALIZATION: Successfully parsed JSON response")
    except json.JSONDecodeError as e:
        print(f"🌎 VISUALIZATION: Error parsing JSON response: {str(e)}")
        error_msg = "Failed to extract coordinates from the query. Please provide coordinates in a clear format. ❌"
        print(f"🌎 VISUALIZATION: Error - {error_msg}")
        return error_msg

    # Validate extracted coordinates
    if not coordinates_list:
        error_msg = """
        No coordinates found in the query. 
        Please provide coordinates in formats like '40.7411, -73.9897' or 'latitude 40.7411 longitude -73.9897'.
        """
        print(f"🌎 VISUALIZATION: Error - {error_msg} ❌")
        return error_msg

    print(f"🌎 VISUALIZATION: Extracted {len(coordinates_list)} coordinate pairs from query")

    print("🌎 VISUALIZATION: Creating Folium map")
    folium_map = folium.Map(tiles="Cartodb dark_matter")

    for point in coordinates_list:
        popup_text = point.get("name", f"Lat: {point['lat']}, Lon: {point['lon']}")
        folium.Marker(
            location=[point["lat"], point["lon"]], popup=popup_text, tooltip=popup_text
        ).add_to(folium_map)

    # Adjust map bounding box 
    if coordinates_list:
        lats = [point["lat"] for point in coordinates_list]
        lons = [point["lon"] for point in coordinates_list]
        folium_map.fit_bounds(
            [[min(lats), min(lons)], [max(lats), max(lons)]]
        )

    print(f"🌎 VISUALIZATION: Successfully created map with {len(coordinates_list)} points ✅")
    
    # Display map immediately in Jupyter Notebook
    display(folium_map)

    return f"Map with {len(coordinates_list)} points created and displayed."


Once all four tools have been prepared, we can now load them into the agentic app, which will share the results between tools to more effectively answer queries.

In [None]:
def query_street_network(query, display_result=True):
    """
    Process street network queries using a ReACT agent with AQL, OSMnx, Geocoding, and Visualization tools.
    """

    # Initialize LLM
    if AI_PLATFORM == "ANTHROPIC":
        llm = ChatAnthropic(temperature=0, model_name="claude-3-7-sonnet-20250219")
    elif AI_PLATFORM == "OPENAI":
        llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
    elif AI_PLATFORM == "WATSONX":
        llm = ChatWatsonx(temperature=0, model_name="ibm/granite-34b-code-instruct")

    # Cache the results stored by each tool so it can be passed to the next tool
    # Schema: { <tool_name>: <tool_result> }
    tool_results = {}

    def add_memory(tool):
        """
        Equip each tool with the ability to call itself AND include the results of the preceding tool
        The output is a new LangChain tool where the called function is the wrapped original tool

        This allows the ReACT agent to call the wrapped tool, which generates the intermediate query based
        on the cached result, then invokes the tool with the enhanced query

        This should enable the output of the agent to better handle hybrid queries reliant on multiple tools
        """

        def tool_with_memory(inter_query):
            # As the ReACT agent passes intermediate queries between tools,
            # we want to check for the string USE_PREVIOUS_RESULT<tool_name> in the query,
            # If the string is present, then we swap in the results from the previous tool
            # to assemble the full enhanced intermediate query

            for tool_name, result in tool_results.items():
                placeholder = f"USE_PREVIOUS_RESULT<{tool_name}>"
                if placeholder in inter_query:
                    inter_query = inter_query.replace(placeholder, str(result))

            # Execute the tool with the enhanced intermediate query
            result = tool.invoke(inter_query)
            tool_results[tool.name] = result
            return result

        # Create a new tool with the memory-enabled function
        return Tool(
            name=tool.name,
            description=tool.description
            + "\nYou can refer to results from other tools using USE_PREVIOUS_RESULT<tool_name>.",
            func=tool_with_memory,
        )

    tools = [
        text_to_aql_to_text,
        text_to_osmnx_algorithm_to_text,
        text_to_geocoder_to_coordinates,
        text_to_coordinates_to_folium_map,
    ]

    # Wrap all tools
    memory_tools = [add_memory(tool) for tool in tools]

    # Create the Agentic App
    app = create_react_agent(llm, memory_tools)

    # Create system message to explain tool result references
    # Note from the tool_with_memory function above that the USE_PREVIOUS_RESULT string will be replaced
    # by the results of the preceding tool, before being passed to the next tool
    system_message = SystemMessage(
        content="""
    You have access to multiple tools and can use the results of previous tool calls in new ones.
    To use a previous result, include USE_PREVIOUS_RESULT<tool_name> in your action input.
    For example: 
    Action: text_to_osmnx_algorithm_to_text
    Action Input: Analyze the graph from USE_PREVIOUS_RESULT<text_to_aql_to_text> using PageRank algorithm."""
    )

    # Run the agent
    final_state = app.invoke(
        {"messages": [system_message, HumanMessage(content=query)]}
    )
    response = final_state["messages"][-1].content

    # Display the result if specified, if not then return it for potential further processing
    if display_result:
        display(Markdown(f"{response}"))
        return None
    else:
        return response


### III. Querying the Agentic App to Retrieve Insights into an Urban Street Network
Now that the data and tools have been created, let's run a series of natural language queries to generate useful information about the area around One Madison Avenue.

In [None]:
query_street_network("Which intersection in the street network is the best place to set up my new corner store, and what's the closest bank?")

In [None]:
query_street_network("Visualize the top three most difficult to reach healthcare facilities in the street network.")

In [None]:
query_street_network("What is the nearest bike shelter to One Madison Avenue, and how many meters does it take to walk there via a pathway?")

In [None]:
query_street_network("How far are all of the public transport locations from each other? Compare the straight line with their walking distance.")

In [None]:
query_street_network("What are all the food places near the National Museum of Mathematics? Which one is easiest to get to by walking, and how long?")

In [None]:
query_street_network("Visualize the top five most crowded intersections in the street network and tell me their coordinates.")

In [None]:
query_street_network("Starting from the Flatiron Building, how long is the path to the Museum of Mathematics, through Madison Square Park, and back?")

In [None]:
query_street_network("In minutes, which is faster by bike from 23rd St Station - Met Life Insurance Tower, Gramercy Tavern, or Chelsea Piers Fitness?")

### IV. Querying the Agentic App to Retrieve Insights into a Rural Street Network and Health Facility Data
Let's build a new graph in my father's village of Gomnati in Nilphamari District of Bangladesh, to analyze an area with significantly different geography from New York City.

In [None]:
# Retrieve the map of Gomnati village using Gomnati Primary School as the center point
# We overwrite the in-memory OSMnx graph as the tools refer to this variable name specifically
G_ox = ox.graph_from_point((26.195655, 88.852847), dist=3500.00)

for node1, node2, edge_dict in G_ox.edges(data=True):
    edge_dict.pop('geometry', None)

ox.plot_graph(G_ox)
print(f"Graph of street network within a 3300 meter radius of Gomnati Primary School: {G_ox}")

In [None]:
# Since the nodes in the OSMnx graph do not include the address, we geocode each node
geolocator = Nominatim(user_agent="osmnx_geocoder")
for node_id, node_data in G_ox.nodes(data=True):
    lat = node_data['y']
    lon = node_data['x']
    
    try:
        # Use reverse geocoding on points to get address information
        location = geolocator.reverse(f"{lat}, {lon}", exactly_one=True, timeout=None)
        if location:
            # Add address information to the node
            G_ox.nodes[node_id]['address'] = location.address

        # Avoid possible rate limiting by waiting a bit between each node
        time.sleep(1)
        
    except Exception as e:
        print(f"Error geocoding node {node_id}: {e}")

In [None]:
for node_id, node_data in G_ox.nodes(data=True):
    G_ox.nodes[node_id]['ID'] = node_id

In [None]:
gomnati_filepath = "./data/gomnati.graphml"

In [None]:
ox.io.save_graphml(G_ox, gomnati_filepath)

In [None]:
G_ox = ox.io.load_graphml(gomnati_filepath)

In [None]:
node_list_with_addresses = list(G_ox.nodes(data=True))
print(f"Sample node data with address: {node_list_with_addresses[0]}")
print(f"Sample node data with address: {node_list_with_addresses[65]}")
print(f"Sample node data with address: {node_list_with_addresses[82]}")
print(f"Sample node data with address: {node_list_with_addresses[34]}")

edge_list = list(G_ox.edges(data=True))
print(f"Sample edge data: {edge_list [0]}")
print(f"Sample edge data: {edge_list [65]}")
print(f"Sample edge data: {edge_list [120]}")
print(f"Sample edge data: {edge_list [34]}")

In [None]:
tags = {'building': True, 'amenity': True, 'healthcare': True, 'office': True, 'public_transport': True, 'craft': True, 'historic': True}
features_gdf = ox.features.features_from_point((26.200, 88.850), tags, dist=3500.00)

In [None]:
utm_crs = ox.projection.project_gdf(features_gdf).crs
features_gdf['lat'] = features_gdf.to_crs(utm_crs).centroid.to_crs(features_gdf.crs).y
features_gdf['lon'] = features_gdf.to_crs(utm_crs).centroid.to_crs(features_gdf.crs).x
features_gdf_adjusted = features_gdf.drop(columns=['geometry'])

features_json_str = features_gdf_adjusted.reset_index().to_json(orient='records')
features_records = json.loads(features_json_str)

for feature in features_records:
    for key, value in list(feature.items()):
        if value is None:
            del feature[key]

print(f"Sample feature data: {features_records[0]}\n")
print(f"Sample feature data: {features_records[1]}\n")
print(f"Sample feature data: {features_records[2]}\n")

To augment the potential of our insights, let's pull in some critical data. https://healthsites.io/ was created by the Global Healthsites Mapping Project, with the aim of making global health facility data open and accessible. They have per-country datasets that can be retrieved via their API or downloadable as geospatial files.

In [None]:
# Downloaded the Bangladesh healthsites data as a Shapefile from healthsites.io
# Load this into a GeoDataFrame to access Point data
bd_healthsites_df = gpd.read_file('data/Bangladesh-node.shp')

In [None]:
# There are no Polygon shapes in this dataset, just points, so not need to use centroids
bd_healthsites_df['lon'] = bd_healthsites_df.geometry.apply(lambda p: p.x)
bd_healthsites_df['lat'] = bd_healthsites_df.geometry.apply(lambda p: p.y)

# Drop the geometry, duplicate, and empty columns that prevent serialization
bd_healthsites_df_adjusted = bd_healthsites_df.drop(columns=['geometry','is_in_heal','changeset_'])
bd_healthsites_json_str = bd_healthsites_df_adjusted.reset_index().to_json(orient='records')
bd_healthsites_records = json.loads(bd_healthsites_json_str)

print(f"Sample healthsite data: {bd_healthsites_records[0]}\n")
print(f"Sample healthsite data: {bd_healthsites_records[1]}\n")
print(f"Sample healthsite data: {bd_healthsites_records[2]}\n")

In [None]:
# Create the database to store graph and features collection for Gomnati
if not sys_db.has_database('gomnati'):
    sys_db.create_database('gomnati')

gomnati_db = client.db('gomnati',username=adb_user,password=adb_pass,verify=True)

In [None]:
G_ox_adb = nxadb.MultiDiGraph(
    name="G_ox_adb",
    db=gomnati_db,
    incoming_graph_data=G_ox,
    overwrite_graph=True
)
print(f"Graph of Gomnati, Bangladesh rural network persisted to ArangoDB: {G_ox_adb}")

print(f"True number of nodes in G_ox_adb is: {G_ox_adb.number_of_nodes()}")
print(f"True number of edges in G_ox_adb is: {G_ox_adb.number_of_edges()}")

In [None]:
if gomnati_db.has_collection('features'):
    gomnati_db.delete_collection('features')
features_collection = gomnati_db.create_collection('features')

features_collection.insert_many(features_records)
print(f"Added {len(features_records)} feature records to ArangoDB 'features' collection in the Gomnati database")

In [None]:
if gomnati_db.has_collection('healthsites'):
    gomnati_db.delete_collection('healthsites')
healthsites_collection = gomnati_db.create_collection('healthsites')

healthsites_collection.insert_many(bd_healthsites_records)
print(f"Added {len(bd_healthsites_records)} feature records to ArangoDB 'features' collection in the Gomnati database")

In [None]:
# Initialize ArangoGraph for the Gomnati database
arango_graph = ArangoGraph(gomnati_db)

In [None]:
query_street_network("What is the shortest path to the nearest health facility to গোমনাতি মহাবিদ্যালয়?")

In [None]:
query_street_network("Visualize the three most isolated addresses in this street network.")