# OSM Tag Stats
In this Notebook we will calculate the availability of different tags in the OpenSteetMap data of different Locations.

This means we will look at the percentage of edges that have a given tag.

Example: 10% of cycleways have a width tag.

Small Disclaimer: This is only a rough estimate because we don't look at all possible Subtag combinations. But from experience, this gives a good overview of the state of the data.

If this interests you, you should also check out the Ohsome Examples Notebook [Ohsome Examples Notebook](example-ohsome).

In [None]:
import pandas as pd
import osmnx as ox
import networkx as nx

from pathlib import Path
import copy
import sys
import pyproj # we need this for jupyter-book to build right...

from helpers import osmhelper as oh

In [None]:
useful_tags_way = ['bridge', 'tunnel', 'oneway', 'lanes', 'ref', 'name',
                   'highway', 'maxspeed', 'service', 'access', 'area',
                   'landuse', 'width', 'est_width', 'junction',
                   'sidewalk', 'cycleway', 'bicycle', 'footway',
                   'cyclestreet', 'path', 'foot',
                   "sidewalk:right", "sidewalk:left", "sidewalk:both",
                   "cycleway:right", "cycleway:left", "cycleway:both",
                   "width", "surface", "smoothness",
                   "lanes:width", "lanes:surface", "lanes:smoothness",
                   "maxwidth", "est_width", "lit"]

ox.settings.useful_tags_way += useful_tags_way

In [None]:
def filter_graph_by_dict(G, filter_dict, drop_nodes=False):
    # Determine edges to keep based on filter criteria
    def edge_to_keep(edge_data):
        for key, value in filter_dict.items():
            if key in edge_data:
                # When value is None, any edge containing the key should be kept
                if value is None:
                    return True
                # Handle case when edge attribute is a list
                if isinstance(edge_data[key], list):
                    if any(sub_key in value for sub_key in edge_data[key]):
                        return True
                # Handle case when edge attribute is a single value
                elif edge_data[key] in value:
                    return True
        return False

    # Filter edges
    edges_to_remove = [(u, v, k) for u, v, k, d in G.edges(
        keys=True, data=True) if not edge_to_keep(d)]
    G.remove_edges_from(edges_to_remove)

    # Optionally, remove nodes with no edges
    if drop_nodes:
        G.remove_nodes_from(
            [node for node, degree in G.degree() if degree == 0])

    return G

In [None]:
def generate_graph_path(placename, graph_path, simplify=True, file_identifier=""):
    """
    Generate the file path for a graph based on the given parameters.
    """
    base_filename = f"{placename}{'_' if file_identifier else ''}{
        file_identifier}{'_complex' if not simplify else ''}.graphml"
    return Path(graph_path, base_filename)

In [None]:
def load_graph_from_path(file_graph, force_download=False):
    """
    Load a graph from the specified path if the file exists and force_download is False,
    using print statements for output instead of logging.

    :param file_graph: The Path object pointing to the graph file.
    :param force_download: Boolean indicating if the graph should be re-downloaded.
    :return: The loaded graph, or None if the file does not exist.
    """
    print(f"Checking: {file_graph}")

    if file_graph.is_file() and not force_download:
        print(f"Loading from disk: {file_graph}")
        graph = ox.load_graphml(file_graph)  # Load graph
        return graph
    else:
        print(f"File not found or download forced: {file_graph}")
        return None

In [None]:
def is_area_reasonable(df):
    """
    Checks if the area defined by a DataFrame's bounding box is within a reasonable size.

    :param df: GeoDataFrame with bbox_north, bbox_east, bbox_south, bbox_west columns.
    :return: Boolean indicating if the area is reasonable.
    """
    diagonal = ox.distance.euclidean_dist_vec(
        df.bbox_north[0], df.bbox_east[0], df.bbox_south[0], df.bbox_west[0])
    return diagonal < 1  # You might want to adjust this threshold.

In [None]:
def download_graph(placename, network_type, simplify, clean_periphery, retain_all, df):
    """
    Attempts to download a graph from OpenStreetMap based on the provided parameters.

    :param placename: Name of the place to download.
    :param network_type: Type of network to download.
    :param simplify: Whether to simplify the graph.
    :param clean_periphery: Whether to clean the periphery of the graph.
    :param retain_all: Whether to retain all nodes in the graph.
    :param df: GeoDataFrame for the place.
    :return: The downloaded graph or None if unsuccessful.
    """
    try:
        if is_area_reasonable(df):
            graph = ox.graph_from_place(placename, network_type=network_type,
                                        simplify=simplify, clean_periphery=clean_periphery,
                                        retain_all=retain_all)
            print(f"Downloaded graph for {placename}")
        else:
            print("Area too big.")
            return None
    except Exception as e:
        print(e)
        print(f"{placename} did not resolve as Polygon, trying BBox instead.")
        try:
            north, south, east, west = df.bbox_north[0], df.bbox_south[0], df.bbox_east[0], df.bbox_west[0]
            graph = ox.graph_from_bbox(north, south, east, west, network_type=network_type,
                                       simplify=simplify, clean_periphery=clean_periphery,
                                       retain_all=retain_all)
            print("Downloaded graph for BBox successfully.")
        except Exception as e:
            print(e)
            return None
    return graph

In [None]:
def get_place(placename, graph_paths=["osm"], simplify=True, network_type="all_private",
              force_download=False, file_identifier=None, clean_periphery=True, retain_all=True, which_result=0):
    # Convert place name to GeoDataFrame
    df = ox.geocoder.geocode_to_gdf(placename, which_result=which_result)

    # Generate path and attempt to load graph from disk
    file_graph = generate_graph_path(
        placename, graph_paths[0], simplify, file_identifier)
    graph = load_graph_from_path(file_graph, force_download)

    if graph is not None:
        return graph

    # Download graph if not found
    graph = download_graph(placename, network_type,
                           simplify, clean_periphery, retain_all, df)
    if graph is not None:
        ox.save_graphml(graph, file_graph)
        print(f"Graph saved to {file_graph}")
        return graph

    return None

In [None]:
def count_tags(G, tag="width"):
    return sum(1 for _, _, _, d in G.edges(keys=True, data=True) if tag in d)

In [None]:
def stats_tag(G, tag="width"):
    total = len(G.edges())
    count = count_tags(G, tag)
    # Avoid division by zero
    percent = (count / total) * 100 if total > 0 else 0
    return count, total, round(percent, 2)

In [None]:
def tag_stats(G_nx, filter_names=["intercity", "local", "cycleway", "footway", "path", "sidewalk", "bicycle"], tags=["name", "surface", "width", "smoothness", "maxwidth", "est_width", "maxspeed", "access", "lit", "bicycle"]):
    if G_nx is None:
        return None

    # Initialize DataFrame to store results
    df = pd.DataFrame(columns=filter_names, index=tags)

    for filter_name in filter_names:
        # Assuming oh.get_filter_by_name returns a dict compatible with filter_graph_by_dict
        filter_dict = oh.get_filter_by_name(filter_name)

        # Make a copy of the graph for filtering
        H_nx = copy.deepcopy(G_nx)

        # Apply filtering
        H_nx = filter_graph_by_dict(H_nx, filter_dict)

        # Gather statistics for each tag
        for tag in tags:
            count, total, percent = stats_tag(H_nx, tag)
            df.at[tag, filter_name] = percent

    return df

In [None]:
def stats_for_place(latitude, longitude, distance):
    G_nx = ox.graph_from_point((latitude, longitude),
                               dist=distance, network_type='all')

    # Print the graph summary
    print(G_nx)

    # Plot the graph
    ox.plot_graph(G_nx)
    stats = tag_stats(G_nx)
    return stats

## Dresden Schloss

In [None]:
# Dresden Schloss 51.05245523397302, 13.736736634153816
latitude = 51.05245523397302
longitude = 13.736736634153816
distance = 1000


G_nx = ox.graph_from_point((latitude, longitude),


                           dist=distance, network_type='all')


stats = stats_for_place(latitude, longitude, distance)
print(stats)
# stats.to_csv('results/{filename}.csv')

## Berlin, Siegessäule

In [None]:
# Berlin, Siegessäule 52.51450061937166, 13.350077836872178
latitude = 52.51450061937166
longitude = 13.350077836872178
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## Berlin, Karl-Marx-Platz (Neukölln)

In [None]:
# Berlin Karl-Marx-Platz 52.473669052264555, 13.44103486341092
# The Group "Verkehrswende Gruppe" has mapped the area of NeuKölln very well
latitude = 52.473669052264555
longitude = 13.44103486341092
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## Hamburg, Rathaus

In [None]:
# Hamburg Rathaus 53.550419301331544, 9.992272601103517
latitude = 53.550419301331544
longitude = 9.992272601103517
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## München, Max-Joseph-Platz

In [None]:
# Max-Joseph-Platz, München 48.140007569741435, 11.57765210639961
latitude = 48.140007569741435
longitude = 11.57765210639961
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## München, Lerchenauer See

In [None]:
# München, Lerchenauer See, 48.19984724772561, 11.535569565376347
# In dem Stadtteil gibt hat eine Aktive Mappering für Ihre Abschlussarbeit den ganzen Stadtteil gemappt
latitude = 48.19984724772561
longitude = 11.535569565376347
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## Model Regionen
Schkeuditz, Köthen, Eisleben, Taucha

## Schkeuditz

In [None]:
# Schkeuditz, 51.393711233983396, 12.220401148266053
latitude = 51.393711233983396
longitude = 12.220401148266053
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## Köthen

In [None]:
# Köthen, Anhalt, 51.75122818820332, 11.973634212924361
latitude = 51.75122818820332
longitude = 11.973634212924361
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## Eisleben

In [None]:
# Eisleben, 51.52790584132251, 11.547197528369814
latitude = 51.52790584132251
longitude = 11.547197528369814
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)

## Taucha

In [None]:
# Taucha, 51.38024397088443, 12.492732676363739
latitude = 51.38024397088443
longitude = 12.492732676363739
distance = 1000

G_nx = ox.graph_from_point((latitude, longitude),
                           dist=distance, network_type='all')
stats = stats_for_place(latitude, longitude, distance)
print(stats)