# OSM POI additions to the database

This notebook adds OSM POIs to the database for the cities that already exist in the database. Categories currently added:
- Hospitals (including clinics)
- Schools and kindergartens
- Restaurants
- Grocery stores
- Cinemas and Theaters

For amenity types available in OSM, see their [wiki](https://wiki.openstreetmap.org/wiki/Key:amenity).

NOTE: **multiple runs will result in duplicate information in the database!**

In [3]:
import psycopg2 as pg
import h3
from psycopg2.extras import execute_values
import itertools as itt
import configparser

from OSMPythonTools.overpass import Overpass, overpassQueryBuilder

H3_LEVEL = 9

category_map = {
    "amenity" : {
        'Clinics and Hospitals': ['clinic', 'hospital'],
        'Schools and Kindergartners': ['school', 'kindergarten'],
        'Restaurants': ['restaurant'],
        'Cinemas and Theaters': ['cinema', 'theatre'],
    },
    "shop": {
        'Grocery stores and supermarkets': ['convenience', 'greengrocer', 'seafood', 'mall', 'wholesale', 'supermarket']
    }
}

#reverse map for lookup afterwards
cat_tuples = [[[(v,key) for v in values] for key, values in cat.items()] for cat in category_map.values()]
lookup_map = {key: value for key, value in itt.chain(*itt.chain(*cat_tuples))}


## Helper functions

A couple of helper functions that use OSMPythonTools to build custom Overpass queries that allow us retrieving all types for a given city at once. This is a bit more flexible than what we had previously as we can work with our own bounding boxes and ensure that our "city" definitions are consistent everywhere.

In [97]:
#helper function that builds an Overpass query to retrieve multiple nodes at once with bounding box parameters
def build_query(node_map, bbox):
    bbox_string = ", ".join([str(b) for b in bbox])

    nodes = []
    for tag, tag_values in node_map.items():
        for _, types in tag_values.items():
            for t in types:
                nodes.append("""node["{:s}"="{:s}"]({:s});""".format(tag, t, bbox_string))

    return """
    (
        {}
    ); out center; out body;""".format("\n ".join(nodes))

#helper function to parse Overpass results
#yields a generator 
def parse_query_results(results, lookup_map):
    for i, place in enumerate(results):
        #find tag type
        for c in category_map:
            if place.tag(c) is not None:
                #in some cases, places have multiple tags, of which only some are of interest (e.g. amenity=restaurant and shop=bakery)
                #we only pick up the ones where the lookup in the predefined categories does not fail
                try:
                    category = lookup_map[place.tag(c)]
                    name = place.tag("name") if place.tag("name") is not None else "Unknown " + place.tag(c)
                    yield {
                        "Category": category,
                        "Name": name,
                        "Lat": place.lat(),
                        "Long": place.lon(),
                        "H3ID": h3.geo_to_h3(place.lat(), place.lon(), H3_LEVEL)
                    }
                except KeyError:
                    pass                

#helper function that saves to database in batches
def save_to_database(conn, places, template, batch_size=100):
    #helper function for batching
    def grouper(n, iterable):
        it = iter(iterable)
        while True:
            chunk = list(itt.islice(it, n))
            if not chunk:
                return
            yield chunk

    with conn.cursor() as cur:
        #save information into DB in batches of 100 locations                        
        for batch in grouper(batch_size, places):            
            execute_values(
                cur, 
                'INSERT INTO public.pois (Name, H3ID, Lat, Long, Category) VALUES %s', 
                batch, 
                template=template
            )      

## Execution

For each city in the DB, retrieve the bounding box, build the Overpass query, retrieve objects and save them to the database.

In [6]:
config = configparser.ConfigParser()
config.read("../../../config/config.ini")    
db_params = config['DB']

In [105]:
with pg.connect(**db_params) as conn:
    with conn.cursor() as cur:
        cur.execute("SELECT CityName, BoundingBox from public.cities")
        for city, bbox in cur.fetchall():
            print("Fetching OSM data for {}..".format(city))

            #flipping the bbox as lat/long is in reversed order..
            flipped_bbox = [bbox[1], bbox[0], bbox[3], bbox[2]]
            
            #run the overpass query
            overpass = Overpass()
            query = build_query(category_map, flipped_bbox)
            result = overpass.query(query, timeout=360)
            
            #generator with results parsed
            places = parse_query_results(result.elements(), lookup_map)
            print("Found a total of {} OSM elements in the city".format(result.countElements()))

            #save everything to DB
            template="(%(Name)s, %(H3ID)s, %(Lat)s, %(Long)s, %(Category)s)"
            save_to_database(conn, places=places, template=template, batch_size=100)

            

Fetching OSM data for Atlanta..
Found a total of 3352 OSM elements in the city
