In [1]:
import pandas as pd # library for data analysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents
import pickle

In [2]:
def scrape_osm_wiki_tags(keys):
    result_dict = {}  # Initialize dictionary to store results
    remove_items = ['yes', 'user defined','User defined', 'Yes']
    for key in keys:
        wikiurl = f"https://wiki.openstreetmap.org/wiki/Key:{key}"
        response = requests.get(wikiurl)
        # Parse data from the HTML into a BeautifulSoup object
        soup = BeautifulSoup(response.text, 'html.parser')
        # Try finding a table or a div with the specified classes
        result_element = soup.select_one('table.wikitable, div.taglist')

        if result_element:
            if 'wikitable' in result_element.get('class', []):  # It's a table
                print(f"Table found for {key}")
                df = pd.read_html(str(result_element))[0]  # Parse the table
                if isinstance(df.columns, pd.MultiIndex):  # Adjust MultiIndex if necessary
                    df.columns = df.columns.droplevel(1)

                if 'Key' not in df.columns:
                    df['Key'] = key
                df = df[df['Key'].eq(key)]  # Filter rows matching the key
                result_list = list(df['Value'])
                result_list = [item for item in result_list if item not in remove_items]
                result_dict[key] = result_list
            elif 'taglist' in result_element.get('class', []):  # It's a tag list div
                print(f"Tag list div found for {key}")
                tags_string = result_element['data-taginfo-taglist-tags']
                tags_list = tags_string.removeprefix(f'{key}=').split(',')  # Assuming the tags are comma-separated
                tags_list = [item for item in tags_list if item not in remove_items]
                result_dict[key] = tags_list
        else:
            print(f"No data found for {key}")
            result_dict[key] = []

    return result_dict



In [3]:
result_dict = scrape_osm_wiki_tags(['amenity','shop','craft','office','sport','tourism','leisure','building','military','public_transport','aeroway'])
result_dict

Table found for amenity
Table found for shop
Tag list div found for craft
Tag list div found for office
Table found for sport
Table found for tourism
Table found for leisure
Table found for building
Tag list div found for military
Tag list div found for public_transport
Table found for aeroway


{'amenity': ['bar',
  'biergarten',
  'cafe',
  'fast_food',
  'food_court',
  'ice_cream',
  'pub',
  'restaurant',
  'college',
  'dancing_school',
  'driving_school',
  'first_aid_school',
  'kindergarten',
  'language_school',
  'library',
  'surf_school',
  'toy_library',
  'research_institute',
  'training',
  'music_school',
  'school',
  'traffic_park',
  'university',
  'bicycle_parking',
  'bicycle_repair_station',
  'bicycle_rental',
  'bicycle_wash',
  'boat_rental',
  'boat_sharing',
  'bus_station',
  'car_rental',
  'car_sharing',
  'car_wash',
  'compressed_air',
  'vehicle_inspection',
  'charging_station',
  'driver_training',
  'ferry_terminal',
  'fuel',
  'grit_bin',
  'motorcycle_parking',
  'parking',
  'parking_entrance',
  'parking_space',
  'taxi',
  'weighbridge',
  'atm',
  'payment_terminal',
  'bank',
  'bureau_de_change',
  'money_transfer',
  'payment_centre',
  'baby_hatch',
  'clinic',
  'dentist',
  'doctors',
  'hospital',
  'nursing_home',
  'pharma

In [4]:
#dump list to pickle file
with open('osm_tags.pkl', 'wb') as f:
    pickle.dump(result_dict, f)