In [1]:
# Import packages
from dataclasses import dataclass
from geopandas import GeoDataFrame
from OSMPythonTools.overpass import Overpass, OverpassResult
from shapely import Point

# Constants
SHOP_VALUES = [
    'alcohol', 'appliance', 'bakery', 'bed', 'beverages', 'bicycle', 'boat',
    'books', 'butcher', 'car', 'car_parts', 'clothes', 'coffee', 'computer',
    'confectionery', 'convenience', 'cosmetics', 'craft', 'dairy', 'deli',
    'electrical', 'electronics', 'fabric', 'fashion_accessories', 'food',
    'furniture', 'garden_centre', 'gas', 'greengrocer', 'hardware', 'health_food',
    'houseware', 'ice_cream', 'interior_decoration', 'jewelry', 'kitchen',
    'lighting', 'mobile_phone', 'motorcycle', 'optician', 'outdoor', 'paint',
    'pasta', 'pastry', 'pet', 'seafood', 'scooter', 'shoes', 'sports',
    'stationery', 'supermarket', 'tea', 'tiles', 'toys', 'tyres', 'watches', 'wine'
]
TAIPEI = 3601293250
QUERY = '''
        area({})->.searchArea;
        (
            nwr["amenity"="bar"](area.searchArea);
            nwr["amenity"="cafe"](area.searchArea);
            nwr["amenity"="fast_food"](area.searchArea);
            nwr["amenity"="food_court"](area.searchArea);
            nwr["amenity"="pub"](area.searchArea);
            nwr["amenity"="restaurant"](area.searchArea);
            nwr["shop"="alcohol"](area.searchArea);
            nwr["shop"="appliance"](area.searchArea);
            nwr["shop"="bakery"](area.searchArea);
            nwr["shop"="bed"](area.searchArea);
            nwr["shop"="beverages"](area.searchArea);
            nwr["shop"="bicycle"](area.searchArea);
            nwr["shop"="boat"](area.searchArea);
            nwr["shop"="books"](area.searchArea);
            nwr["shop"="butcher"](area.searchArea);
            nwr["shop"="car"](area.searchArea);
            nwr["shop"="car_parts"](area.searchArea);
            nwr["shop"="clothes"](area.searchArea);
            nwr["shop"="coffee"](area.searchArea);
            nwr["shop"="computer"](area.searchArea);
            nwr["shop"="confectionery"](area.searchArea);
            nwr["shop"="convenience"](area.searchArea);
            nwr["shop"="cosmetics"](area.searchArea);
            nwr["shop"="craft"](area.searchArea);
            nwr["shop"="dairy"](area.searchArea);
            nwr["shop"="deli"](area.searchArea);
            nwr["shop"="electrical"](area.searchArea);
            nwr["shop"="electronics"](area.searchArea);
            nwr["shop"="fabric"](area.searchArea);
            nwr["shop"="fashion_accessories"](area.searchArea);
            nwr["shop"="food"](area.searchArea);
            nwr["shop"="furniture"](area.searchArea);
            nwr["shop"="garden_centre"](area.searchArea);
            nwr["shop"="gas"](area.searchArea);
            nwr["shop"="greengrocer"](area.searchArea);
            nwr["shop"="hardware"](area.searchArea);
            nwr["shop"="health_food"](area.searchArea);
            nwr["shop"="houseware"](area.searchArea);
            nwr["shop"="ice_cream"](area.searchArea);
            nwr["shop"="interior_decoration"](area.searchArea);
            nwr["shop"="jewelry"](area.searchArea);
            nwr["shop"="kitchen"](area.searchArea);
            nwr["shop"="lighting"](area.searchArea);
            nwr["shop"="mobile_phone"](area.searchArea);
            nwr["shop"="motorcycle"](area.searchArea);
            nwr["shop"="optician"](area.searchArea);
            nwr["shop"="outdoor"](area.searchArea);
            nwr["shop"="paint"](area.searchArea);
            nwr["shop"="pasta"](area.searchArea);
            nwr["shop"="pastry"](area.searchArea);
            nwr["shop"="pet"](area.searchArea);
            nwr["shop"="seafood"](area.searchArea);
            nwr["shop"="scooter"](area.searchArea);
            nwr["shop"="shoes"](area.searchArea);
            nwr["shop"="sports"](area.searchArea);
            nwr["shop"="stationery"](area.searchArea);
            nwr["shop"="supermarket"](area.searchArea);
            nwr["shop"="tea"](area.searchArea);
            nwr["shop"="tiles"](area.searchArea);
            nwr["shop"="toys"](area.searchArea);
            nwr["shop"="tyres"](area.searchArea);
            nwr["shop"="watches"](area.searchArea);
            nwr["shop"="wine"](area.searchArea);
        );
        out center;
        '''

@dataclass
class Category:
    amenity: str
    name: str
    shop: str
    valid: bool

def check_missing_shops(gdf: GeoDataFrame) -> list[str]:
    if 'shop' not in gdf.columns:
        raise ValueError('GeoDataFrame must contain a "shop" column.')
    
    observed = set(gdf["shop"].dropna().unique())
    expected_set = set(SHOP_VALUES)

    missing = sorted(list(expected_set - observed))
    return missing

def get_category(tags: dict | None) -> Category:
    category = Category('', '', '', False)

    if isinstance(tags, dict):
        amenity = 'amenity' in tags.keys()
        shop = 'shop' in tags.keys()

        category.amenity = tags['amenity'] if amenity else ''
        category.name = tags['name'] if 'name' in tags.keys() else ''
        category.shop = tags['shop'] if shop else ''
        category.valid = amenity | shop

    return category

def get_shops() -> GeoDataFrame:
    ov = Overpass()
    scraped = None
    
    try:
        scraped = ov.query(QUERY.format(TAIPEI), timeout=300)

    except Exception as ex:
        print(f'An exception occured during the query. {ex}')
        return GeoDataFrame(columns=['id', 'name', 'amenity', 'shop', 'geometry'])

    if not isinstance(scraped, OverpassResult):
        print('The returned object is not an OverpassResult object.')
        return GeoDataFrame(columns=['id', 'name', 'amenity', 'shop', 'geometry'])

    else:
        elements = scraped.elements()
        results_collection: list[dict[str, int | str | Point]] = list()

        if isinstance(elements, list):
            for element in elements:
                category = get_category(element.tags())

                lon = None
                lat = None

                if (element.type() == 'node'):
                    lon = element.lon()
                    lat = element.lat()
                
                else:
                    lon = element.centerLon()
                    lat = element.centerLat()

                if not category.valid or not isinstance(lon, float) or not isinstance(lat, float):
                    continue

                else:
                    result = {
                        'id': element.id(),
                        'name': category.name,
                        'amenity': category.amenity,
                        'shop': category.shop,
                        'geometry': Point(lon, lat)
                    }
                    results_collection.append(result)
        
        gdf = GeoDataFrame(results_collection, geometry='geometry', crs='EPSG:4326')
        gdf.sort_values('id', inplace=True)
        return gdf

In [None]:
gdf = get_shops()
gdf.to_file('../data/demand/poi_osm.geojson', index=False)
display(gdf)