# Accessing Street-Level Imagery with Open Street Maps 

Author: Thomas Lake

Using OSMnx to create street networks from OpenStreetMap.

See research paper: Boeing, 2017. OSMnx: New methods for acquiring, constructing, analyzing, and visualizing complex street networks.

The following notebook explores the Python OSMnx package (https://osmnx.readthedocs.io/en/stable/getting-started.html#introducing-osmnx)

You can download a street network by providing OSMnx any of the following:

- a bounding box
- a lat-long point plus a distance
- an address plus a distance
- a place name or list of place names (to automatically geocode and get the boundary of)
- a polygon of the desired street network's boundaries
- a .osm formatted xml file

You can also specify several different network types:

- 'drive' - get drivable public streets (but not service roads)
- 'drive_service' - get drivable streets, including service roads
- 'walk' - get all streets and paths that pedestrians can use (this network type ignores one-way directionality)
- 'bike' - get all streets and paths that cyclists can use
- 'all' - download all non-private OSM streets and paths (this is the default network type unless you specify a different one)
- 'all_private' - download all OSM streets and paths, including private-access ones

Once created, maps of street networks derived from Open Street Maps can be used to create regularly-spaced points along publicly-accessabile streets, and then sample Google Street View locations from points along a road network.

This process is outlined in the research paper: Vazquez Sanchez and Labib, 2024. Accessing eye-level greenness visibility from open-source street view images: A methodological development and implementation in multi-city and multi-country contexts. See: https://github.com/Spatial-Data-Science-and-GEO-AI-Lab/StreetView-NatureVisibility

To programatically download google street view images from locations using the Google API, see the Python package 'streetview': https://github.com/robolyst/streetview/tree/master


# Imports

In [None]:
# Imports for OSMnx
# https://osmnx.readthedocs.io/en/stable/user-reference.html
import networkx as nx
import osmnx as ox
print(ox.__version__) # OSMnx plans to update to version 2.0 in 2024.

# Imports for Google Street View Image downloader
# https://github.com/robolyst/streetview/tree/master
from streetview import search_panoramas, get_panorama_meta, get_streetview, get_panorama

# Other Imports
import time
import os
from tqdm import tqdm
from datetime import datetime
import requests
import json
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree
from shapely.geometry import Point
import geopy.distance



In [None]:
# Read Google Street View API Key for downloading GSV images

# Read secret API key
key_path = r'C:\Users\talake2\Desktop\auto_arborist_cvpr2022_v015\api_keys\Google_Street_View_Static_API_Key.txt'
with open(key_path, 'r') as file:
    GOOGLE_MAPS_API_KEY = file.read().strip()

# OSMnx Functions


In [None]:
# Following implementation of OSMnx in the manuscript: https://www.sciencedirect.com/science/article/pii/S221067072400091X?via%3Dihub#bib0020
# and code implementation of OSMnx from the manuscript: https://github.com/Spatial-Data-Science-and-GEO-AI-Lab/StreetView-NatureVisibility

def get_road_network_from_point(location, dist):
    # Get the road network graph using OpenStreetMap data
    # 'network_type' argument is set to 'drive' to get the road network suitable for driving
    # 'simplify' argument is set to 'True' to simplify the road network
    G = ox.graph_from_point(location, dist, dist_type = 'network', network_type='drive', simplify=True)

    # Create a set to store unique road identifiers
    unique_roads = set()
    # Create a new graph to store the simplified road network
    G_simplified = G.copy()

    # Iterate over each road segment
    for u, v, key, data in G.edges(keys=True, data=True):
        # Check if the road segment is a duplicate
        if (v, u) in unique_roads:
            # Remove the duplicate road segment
            G_simplified.remove_edge(u, v, key)
        else:
            # Add the road segment to the set of unique roads
            unique_roads.add((u, v))
    
    # Update the graph with the simplified road network
    G = G_simplified
    
    # Project the graph from latitude-longitude coordinates to a local projection (in meters)
    G_proj = ox.project_graph(G)

    # Convert the projected graph to a GeoDataFrame
    # This function projects the graph to the UTM CRS for the UTM zone in which the graph's centroid lies
    _, edges = ox.graph_to_gdfs(G_proj) 

    return G, edges



def select_points_on_road_network(roads, N=15):
    # Get a list of points over the road map with a N distance between them
    points = []
    # Iterate over each road
    
    for row in roads.itertuples(index=True, name='Road'):
        # Get the LineString object from the geometry
        linestring = row.geometry
        index = row.Index

        # Calculate the distance along the linestring and create points every 50 meters
        for distance in range(0, int(linestring.length), N):
            # Get the point on the road at the current position
            point = linestring.interpolate(distance)

            # Add the curent point to the list of points
            points.append([point, index])
    
    # Convert the list of points to a GeoDataFrame
    gdf_points = gpd.GeoDataFrame(points, columns=["geometry", "road_index"], geometry="geometry")

    # Set the same CRS as the road dataframes for the points dataframe
    gdf_points.set_crs(roads.crs, inplace=True)

    # Drop duplicate rows based on the geometry column
    gdf_points = gdf_points.drop_duplicates(subset=['geometry'])
    gdf_points = gdf_points.reset_index(drop=True)

    return gdf_points

# Setup. Define Locations to Sample Road Networks for Tree Geolocation Analyses

In [None]:

# Create dictionary of 20 testing locations for tree geolocation analyses
apple_park = (37.33354, -122.00567)
buffalo = (42.92952, -78.87408)
columbus = (40.15238321, -82.97449574)
la = (33.75995068, -118.286375)
montreal = (45.46182151, -73.60870962)
nyc = (40.59332846238751, -73.96461341067977)
nyc_2 = (40.67957647, -73.73748243)
pittsburgh = (40.44788537, -80.01430011)
sanfran = (37.72658289, -122.4719503)
sanfran_lombard = (37.80247707, -122.4181456)
sanjose = (37.28612748, -121.8084685)
seattle = (47.62319504, -122.3574537)
seattle_3 = (47.57606272, -122.3865466)
siouxfalls = (43.49218953, -96.72747222)
siouxfalls_2 = (43.49514608, -96.73300202)
siouxfalls_3 = (43.5491668, -96.73438307)
vancouver = (49.25261711, -123.023932)
vancouver_4 = (49.21252079, -123.0568194)
washington = (38.94273133, -76.99800397)
washington_2 = (38.88023974, -77.01280141)
st_helena = (38.50500615673824, -122.46982548134353)


sel_location = st_helena


# Step 1. Get the road network from point + distance

The first step of the code is to retrieve the road network for a specific place using OpenStreetMap data with the help of the OSMNX library. It begins by fetching the road network graph, focusing on roads that are suitable for driving. One important thing to note is that for bidirectional streets, the osmnx library returns duplicate lines. In this code, we take care to remove these duplicates and keep only the unique road segments to ensure that samples are not taken on the same road multiple times, preventing redundancy in subsequent analysis.

Following that, the code proceeds to project the graph from its original latitude-longitude coordinates to a local projection in meters. This projection is crucial for achieving accurate measurements in subsequent steps where we need to calculate distances between points. By converting the graph to a local projection, we ensure that our measurements align with the real-world distances on the ground, enabling precise analysis and calculations based on the road network data.


In [None]:
# Set distance to get road network
distance = 1000

# Create road network graph from point
graph, road = get_road_network_from_point(sel_location, distance)

# View first 5 road edges
road.head(5)

# Calculate summary statistics for road network graph
road_proj = ox.project_graph(graph)
nodes_proj = ox.graph_to_gdfs(road_proj, edges=False)
graph_area_m = nodes_proj.unary_union.convex_hull.area

# Output summary statistics of road network
# To get density-based statistics, you must also pass the network's bounding area in square meters
# Information on summary statistics: 
stats = ox.basic_stats(road_proj, area=graph_area_m, clean_int_tol=15)
pd.Series(stats)


In [None]:
# Plot the road map

# Reproject the GeoDataFrame to WGS84 (EPSG:4326)
road_wgs84 = road.to_crs('EPSG:4326')

# Step 1: Create a Folium map object
m = folium.Map(location=sel_location, zoom_start=15)

# Step 2: Iterate over the GeoDataFrame and add lines to the map
for index, row in road_wgs84.iterrows():
    line = row['geometry']
    coordinates = list(line.coords)
    coordinates = [(coord[1], coord[0]) for coord in coordinates]  # Swap lat and lon order
    folium.PolyLine(locations=coordinates, color='blue', weight=2).add_to(m)
# Step 3: Display the map
#m.savefig(f"C:/Users/talake2/Desktop/tree-geolocation/geolocation-panos-testing-cities-maps/geolocation-pano-testing-{testing_city}.png")
m


# Alternatively, create a custom road network

In [None]:
# Create a custom road network with OSMnx
# First, create a road graph, and save it as a GeoPackage to file
#fp = 'c:/users/talake2/desktop/raleigh_custom_road_network.gpkg'
# G = ox.graph_from_place("Nashville, TN", network_type="drive") # alternatively, use ox.graph_from_point
#ox.save_graph_geopackage(G, fp)
#fix, ax = ox.plot_graph(G)



In [None]:

# Second, edit the graph as needed in QGIS
# In QGIS, import the graph .gpkg file to obtain the nodes and edges.
# For each layer in the .gpkg (nodes/ edges), select your custom region and delete unnecessary nodes/edges.
# Once you are finished editing your features, save and overwrite the original file.
# Ensure the index attributes are non-null when you're finished


# Third, load your custom edited road network back to OSMnx
# load GeoPackage as node/edge GeoDataFrames indexed as described in OSMnx docs
#gdf_nodes = gpd.read_file(fp, layer='nodes').set_index('osmid')
#gdf_edges = gpd.read_file(fp, layer='edges').set_index(['u', 'v', 'key'])
#assert gdf_nodes.index.is_unique and gdf_edges.index.is_unique

# Convert the node/edge GeoDataFrames to a MultiDiGraph
#graph_attrs = {'crs': 'epsg:4326', 'simplified': True}
#G2 = ox.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs)
# Display your graph
#fig, ax = ox.plot_graph(G2)


# Step 2. Select the sample points on the road network

The second step of the code generates a list of evenly distributed points along the road network, with a specified distance between each point. This is achieved using a function that takes the road network data and an optional distance parameter N, which is set to a default value of 25 meters.

The function iterates over each road in the roads dataframe and creates points at regular intervals of the specified distance (N). By doing so, it ensures that the generated points are evenly spaced along the road network.

To maintain a consistent spatial reference, the function sets the Coordinate Reference System (CRS) of the gdf_points dataframe to match the CRS of the roads dataframe. This ensures that the points and the road network are in the same local projected CRS, measured in meters.

Furthermore, to avoid duplication and redundancy, the function removes any duplicate points in the gdf_points dataframe based on the geometry column. This ensures that each point in the resulting dataframe is unique and represents a distinct location along the road network.

In [None]:
# Create a set of approx. equally-spaced points along the road network
points = select_points_on_road_network(road, 25)

# Convert points into latitude/longitude with CRS
points_coords = points.to_crs(4326)
points_coords.head(5)

In [None]:
# Plot the points along the road map

# Reproject the GeoDataFrame to WGS84 (EPSG:4326)
points_wgs84 = points_coords.to_crs('EPSG:4326')

# Step 1: Create a Folium map object
m = folium.Map(location=sel_location, zoom_start=15)

# Step 2: Iterate over the GeoDataFrame and add points to the map
for index, row in points_wgs84.iterrows():
    point = row['geometry']
    coordinates = (point.y, point.x)  # Swap lat and lon order
    folium.CircleMarker(location=coordinates, color='blue', radius=2).add_to(m)

# Step 3: Display the map
#m.savefig(f"C:/Users/talake2/Desktop/tree-geolocation/geolocation-panos-testing-cities-maps/geolocation-pano-testing-{testing_city}.png")
m

# Step 3: Query panoramic images near road points


The next step in the pipeline focuses on finding the closest images for each point.

To calculate the distances between the features and the points, a k-dimensional tree (KDTree) approach is employed using the local projected crs in meters. The KDTree is built using the geometry coordinates of the feature points. By querying the KDTree, the nearest neighbors of the points in the points dataframe are identified. The closest feature and distance information are then assigned to each point accordingly.

In [None]:
# Function to remove panoramic images captured within a certain distance
def remove_adjacent_panoramics(pano_df, distance):
    # Convert latitude and longitude to Cartesian coordinates for distance calculation
    coords = np.vstack([pano_df['Panorama_Longitude'], pano_df['Panorama_Latitude']]).T
    pano_kd_tree = cKDTree(coords)

    # Query the tree to find the nearest neighbor for each point
    distances, indices = pano_kd_tree.query(coords, k=2)  # Find the nearest neighbor (k=2 because the nearest point is itself)
    distances_meters = distances * 111139  # Convert distances to meters

    # Find duplicate points within a set distance (meters)
    duplicates = np.where((distances_meters[:, 1] <= distance))[0]

    # Create a list to store indices to remove
    indices_to_remove = []

    # Iterate through the clusters and randomly keep one point while removing the rest
    for duplicate in duplicates:
        cluster_indices = indices[duplicate]
        # Randomly select one index to keep
        keep_index = np.random.choice(cluster_indices)
        # Remove other indices
        remove_indices = np.setdiff1d(cluster_indices, keep_index)
        indices_to_remove.extend(remove_indices)

    # Drop the indices to remove from the DataFrame
    pano_df = pano_df.drop(index=indices_to_remove)

    return pano_df

In [None]:

# Hold point location and available panoramic image location metadata
pano_data = []

# Iterate over each point in the road network, and get metadata for the nearest panoramic images
for i in tqdm(range(len(points_coords.geometry))):

    # Search for all available panoramic images closest to each point
    panos = search_panoramas(lat=points_coords.geometry.y[i], lon=points_coords.geometry.x[i])
    
    # Iterate through the closest set of panos for a given location
    # For each pano image, get the metadata by supplying the unique pano_id string
    for pano in panos:
        
        meta = get_panorama_meta(pano_id=pano.pano_id, api_key=GOOGLE_MAPS_API_KEY)
        
        if meta.date:
            date_code = datetime.strptime(meta.date, '%Y-%m')
            
            # Append data on point and panoramic image location
            pano_data.append({'Point_Index': i,
                'Point_Latitude': points_coords.geometry.y[i],
                'Point_Longitude': points_coords.geometry.x[i],
                'Panorama_ID': pano.pano_id,
                'Panorama_Date': meta.date,
                'Panorama_Latitude': pano.lat,
                'Panorama_Longitude': pano.lon,
                'Panorama_Rotation': pano.heading})


# All available panoramic images sampled from road points
pano_df = pd.DataFrame(pano_data)

# Remove duplicate images based on 'Panorama_ID'
pano_df.drop_duplicates(subset='Panorama_ID', keep='first', inplace=True)

print(f'Total Available Panoramic Images:', len(pano_df))

# Remove images where 'Panorama_Date' is after 2016-01
pano_df = pano_df[pano_df['Panorama_Date'] >= '2016-01']
pano_df.reset_index(drop=True, inplace=True)
print(f'Total Panoramic Images After 2016:', len(pano_df))

# Remove any panoraamic images closer than 5 meters
pano_df_simple = remove_adjacent_panoramics(pano_df, 5)
pano_df_simple.reset_index(drop=True, inplace=True)

print(f'Total Panoramic Images After De-Duplication:', len(pano_df_simple))


# Step 4: Visualize Panoramic Image Locations Before Download

In [None]:
# Plot Pano Locations and Street Sampling Points

from eomaps import Maps
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from cartopy.io import img_tiles
from shapely.geometry import Point, LineString
%matplotlib inline


# Define the bounding box for the plotting area
bbox = [
    pano_df['Point_Longitude'].min() - 0.0005,
    pano_df['Point_Longitude'].max() + 0.0005,
    pano_df['Point_Latitude'].min() - 0.0005,
    pano_df['Point_Latitude'].max() + 0.0005
]

# Convert panoramic image data to a Geodataframe
pano_geometry = [Point(xy) for xy in zip(pano_df['Panorama_Longitude'], pano_df['Panorama_Latitude'])]
pano_locations_gdf = gpd.GeoDataFrame(pano_df, geometry=pano_geometry, crs="EPSG:4326")

# Convert panoramic image data to a Geodataframe
sel_pano_geometry = [Point(xy) for xy in zip(pano_df_simple['Panorama_Longitude'], pano_df_simple['Panorama_Latitude'])]
sel_pano_locations_gdf = gpd.GeoDataFrame(pano_df_simple, geometry=sel_pano_geometry, crs="EPSG:4326")


# Plotting:
# Define a map and extent
m = Maps(crs=Maps.CRS.Mercator.GOOGLE, figsize=(20, 20))
m.set_extent((bbox[0], bbox[1], bbox[2], bbox[3]))

# Plot the GeoDataFrame onto the map
m.add_gdf(pano_locations_gdf, marker='x', color='blue', alpha=0.80, markersize = 250, label='Available Panoramic Images')
m.add_gdf(sel_pano_locations_gdf, marker='+', color='white', alpha=0.80, markersize = 250, label='Filtered Panoramic Images')
m.add_gdf(points_coords, marker='o', color='red', alpha=0.80, markersize = 250, label='Street Points')

m.add_wms.ESRI_ArcGIS.SERVICES.World_Imagery.add_layer.xyz_layer()

# Show the map
#m.savefig(f"C:/Users/talake2/Desktop/tree-geolocation/geolocation-panos-testing-cities-maps/geolocation-pano-testing-{testing_city}.png")
m.show();


# Download Panoramic Images

In [None]:
# Loop over panoramic images sampled in the pano_df
for i in tqdm(range(len(pano_df_simple))):

    # Get panoramic image unique ID
    panoID = pano_df_simple['Panorama_ID'][i]
    
    # Define the file path for the image
    #image_path = fr'C:/Users/talake2/Desktop/tree-geolocation/geolocation-pano-testing-cities/geolocation-pano-testing-columbus-downloader/{panoID}.jpg'
    image_path = fr'C:/Users/talake2/Desktop/pano_download_testing/{panoID}.jpg'
    
    # Check if image already exists. If it does, skip that image.
    if os.path.exists(image_path):
        print(f"Image {panoID} already exists. Skipping...")
        continue
    
    try:
        print("Downloading Panoramic Image and Metadata:")
        
        # Create the matching panoramic image metadata .json file
        metadata = {
            'panoId': panoID,
            'panoDate:': pano_df_simple['Panorama_Date'][i],
            'lat': pano_df_simple['Panorama_Latitude'][i],
            'lng': pano_df_simple['Panorama_Longitude'][i],
            'rotation': pano_df_simple['Panorama_Rotation'][i]
            }
        
        # Write metadata to JSON file
        with open(fr'C:/Users/talake2/Desktop/tree-geolocation/geolocation-pano-testing-cities/geolocation-pano-testing-columbus-downloader/{panoID}.metadata.json', 'w') as f:
            json.dump(metadata, f)
            
        # Attempt to download single panoramic image
        image = get_panorama(pano_id=panoID)

        # Save the image
        image.save(image_path, "jpeg")
            
        print(f"Saved Panoramic Image and Metadata: ", panoID)
    except UnidentifiedImageError as e:
        print(f"Error downloading image {panoID}: {e}")
        # Optionally, you can add a delay before retrying
        time.sleep(2)  # Wait for 2 seconds before retrying
        continue  # Skip to the next iteration of the loop

# Download Panoramic Images in Parallel with ThreadPoolExecutor

In [None]:
import os
import json
import time
from concurrent.futures import ThreadPoolExecutor
from PIL import UnidentifiedImageError
from tqdm import tqdm

# Function to download a single panoramic image
def download_panorama(pano_info):
    start_time = time.time()
    panoID, pano_date, lat, lng, rotation = pano_info
    image_path = fr'C:/Users/talake2/Desktop/pano_download_testing/{panoID}.jpg'

    if os.path.exists(image_path):
        print(f"Image {panoID} already exists. Skipping...")
        return None

    try:
        print(f"Downloading Panoramic Image and Metadata: {panoID}")
        metadata = {
            'panoId': panoID,
            'panoDate': pano_date,
            'lat': lat,
            'lng': lng,
            'rotation': rotation
        }
        with open(fr'C:/Users/talake2/Desktop/pano_download_testing/{panoID}.metadata.json', 'w') as f:
            json.dump(metadata, f)

        image = get_panorama(pano_id=panoID)
        image.save(image_path, "jpeg")
        
        end_time = time.time()
        duration = end_time - start_time

        print(f"Saved Panoramic Image and Metadata: {panoID}. Time taken: {duration:.2f} seconds")
    except UnidentifiedImageError as e:
        print(f"Error downloading image {panoID}: {e}")
        time.sleep(2)  # Wait for 2 seconds before retrying

# Number of threads to run in parallel
num_threads = os.cpu_count() - 1  # Use the number of available CPU cores

# Prepare the arguments for the parallel execution
pano_info_list = [(pano_df_simple['Panorama_ID'][i],
                   pano_df_simple['Panorama_Date'][i],
                   pano_df_simple['Panorama_Latitude'][i],
                   pano_df_simple['Panorama_Longitude'][i],
                   pano_df_simple['Panorama_Rotation'][i]) for i in range(len(pano_df_simple))]

# Create a ThreadPoolExecutor with the desired number of threads
with ThreadPoolExecutor(max_workers=num_threads) as executor:
    # Submit tasks for each panoramic image download
    futures = [executor.submit(download_panorama, pano_info) for pano_info in pano_info_list]

    # Track the progress of the parallel downloads using tqdm
    for future in tqdm(futures, total=len(pano_info_list)):
        try:
            future.result()  # Wait for the task to complete
        except Exception as e:
            print(f"Error downloading panorama: {e}")

In [None]:
# EOF