In [None]:
import duckdb
from quackosm import PbfFileReader

In [None]:
duckdb.load_extension('spatial')

In [None]:
duckdb.sql(
    """
    SELECT
        kind, id, tags
    FROM
        ST_ReadOSM('tests/test_files/monaco.osm.pbf')
    WHERE
        tags IS NOT NULL
    """
)

In [None]:
def _sql_escape(value: str) -> str:
    """Escape value for SQL query."""
    return value.replace("'", "''")

In [None]:
# { "*speed": "*0", "highway": "primary" }
speed_tags = duckdb.sql(
    """
    WITH distinct_tags AS (
        SELECT
            DISTINCT unnest(map_keys(tags)) tag
        FROM
            ST_ReadOSM('tests/test_files/monaco.osm.pbf')
        WHERE
            tags IS NOT NULL
    )
    SELECT
        tag
    FROM
        distinct_tags
    WHERE
        tag LIKE '%speed'
    """
).fetchnumpy()["tag"]

sql_where_clauses = [
    f"""(list_contains(map_keys(tags), '{_sql_escape('highway')}') AND list_extract(map_extract(tags, '{_sql_escape('highway')}'), 1) = 'primary')"""
]
for speed_tag in speed_tags:
    sql_where_clauses.append(
        f"(list_contains(map_keys(tags), '{_sql_escape(speed_tag)}') AND list_extract(map_extract(tags, '{_sql_escape(speed_tag)}'), 1) LIKE '%0')"
    )

In [None]:
r = duckdb.sql(
    f"""
    SELECT
        *
    FROM
        ST_ReadOSM('tests/test_files/monaco.osm.pbf')
    WHERE
        (
            (
                kind = 'node'
                AND lat IS NOT NULL
                AND lon IS NOT NULL
            )
            OR
            (
                kind = 'way'
                AND len(refs) >= 2
            )
            OR
            (
                kind = 'relation' AND len(refs) > 0
                AND list_contains(map_keys(tags), 'type')
                AND list_has_any(map_extract(tags, 'type'), ['boundary', 'multipolygon'])
            )
        )
        AND tags IS NOT NULL
        AND ({' OR '.join(sql_where_clauses)})
    ORDER BY kind, id
    """
)
print(r.sql_query())
r

In [None]:
TAGS_FILTERS_EXAMPLES = [
    # None,
    {"building": True},
    {"amenity": True, "leisure": True},
    {"amenity": "parking", "leisure": ["park", "garden"], "office": True},
    {"office": False},
    {"building": True, "office": False},
    {"name:en": True},
    {"name:*": True},
    {"name:*": False},
    {"*": True},
    {"*": False},
    {"building": True, "addr:*": False},
    {"building": True, "addr:*": True},
    {"building": True, "addr:*": True, "source:*": False},
    { "name:*": "Monaco" },
    { "name:*": ["Monaco", "France"] },
    { "highway": "primary", "maxspeed": False },
    { "highway": "*ary" },
    { "highway": "*ary", "maxspeed": False },
    { "*speed": "*0" },
    { "*speed": "*0", "railway": False, "waterway": False },
    { "*speed": ["*0", "90"] },
    { "*speed": "*0", "highway": "primary" },
    { "*speed": "*0", "highspeed": True },
]

In [None]:
flat_results = []
for tags_filter_example in TAGS_FILTERS_EXAMPLES:
    print(tags_filter_example)
    reader = PbfFileReader(tags_filter=tags_filter_example, silent_mode=True)
    flat_results.append(reader.get_features_gdf(file_paths='tests/test_files/monaco.osm.pbf', ignore_cache=True))
    print()

In [None]:
flat_results[22].sort_index()

In [None]:
flat_results[-1].sort_index()

In [None]:
duckdb.sql(
    f"""
    SELECT
        kind, id, list_extract(map_extract(tags, 'maxspeed'), 1) as "maxspeed",  list_extract(map_extract(tags, 'source:maxspeed'), 1) as "source:maxspeed_2",
    FROM
        ST_ReadOSM('tests/test_files/monaco.osm.pbf')
    WHERE
        tags IS NOT NULL
        AND (list_extract(map_extract(tags, 'def:highway=motorway;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'def:highway=primary|highway=secondary|highway=tertiary;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'def:highway=residential;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'highspeed'), 1) LIKE '%0') OR (list_contains(map_keys(tags), 'maxspeed')) OR (list_extract(map_extract(tags, 'def:highway=living_street;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'source:maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'def:highway=trunk;maxspeed'), 1) LIKE '%0')

    """
)

In [None]:
duckdb.sql(
    f"""
    SELECT
        kind, id, tags
    FROM
        ST_ReadOSM('tests/test_files/monaco.osm.pbf')
    WHERE
        tags IS NOT NULL
        AND (list_extract(map_extract(tags, 'def:highway=motorway;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'def:highway=primary|highway=secondary|highway=tertiary;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'def:highway=residential;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'highspeed'), 1) LIKE '%0') OR (list_contains(map_keys(tags), 'maxspeed')) OR (list_extract(map_extract(tags, 'def:highway=living_street;maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'source:maxspeed'), 1) LIKE '%0') OR (list_extract(map_extract(tags, 'def:highway=trunk;maxspeed'), 1) LIKE '%0')

    """
)

In [None]:
GROUPED_TAGS_FILTERS_EXAMPLES = [
    {"buildings": {"building": True}},
    {"buildings_all": {"building": True}, "buildings_star_all": {"building": "*"}},
    {"buildings_all": {"building": True}, "buildings_office": {"building": "office"}},
    {"english_name": {"name:en": True}, "all_names": {"name:*": True}},
    {"buildings_and_features_with_addr": {"building": True, "addr:*": True}, "buildings_and_features_with_names": {"building": True, "name:*": True}},
    {"buildings_without_addr": {"building": True, "addr:*": False}, "buildings_without_names": {"building": True, "name:*": False}}, # will result in both without names and addresses
    {"highways_and_ways_with_decimal_speeds": {"highway": True, "*speed": "*0"}, "railways": {"railway": True}},
]

In [None]:
grouped_results = []
for tags_filter_example in GROUPED_TAGS_FILTERS_EXAMPLES:
    print(tags_filter_example)
    reader = PbfFileReader(tags_filter=tags_filter_example, silent_mode=True)
    grouped_results.append(
        reader.get_features_gdf(file_paths="tests/test_files/monaco.osm.pbf", ignore_cache=True)
    )
    print()

In [None]:
SQL_EXAMPLES_NO_STAR = [
    "(list_contains(map_keys(tags), 'building'))",
    "((list_contains(map_keys(tags), 'amenity')) OR (list_contains(map_keys(tags), 'leisure')))",
    # "((tag_key = 'amenity' and tag_value = 'parking') OR (tag_key = 'leisure' and tag_value in ('park', 'garden')) OR (tag_key = 'office'))",
    # "(tag_key != 'office')",
    # "((tag_key = 'building')) AND ((tag_key != 'office'))",
    # "(tag_key = 'name:en')"
]

In [None]:
from collections import namedtuple

OSMTagFilterExample = namedtuple("OSMTagFilterExample", "find_tags positive negative")

In [None]:
SQL_EXAMPLES = [
    
]

In [None]:
for where_filter in SQL_EXAMPLES_NO_STAR:
    print(
        duckdb.sql(
            f"""
            SELECT
                kind, id, tags
            FROM
                ST_ReadOSM('tests/test_files/monaco.osm.pbf')
            WHERE
                tags IS NOT NULL
                AND {where_filter}
            """
        ).sql_query()
    )
    print(
        duckdb.sql(
            f"""
            SELECT
                kind, id, tags
            FROM
                ST_ReadOSM('tests/test_files/monaco.osm.pbf')
            WHERE
                tags IS NOT NULL
                AND {where_filter}
            """
        )
    )