# Demonstration of semantic annotation

In [1]:
from osm_annotation import geofabrik_database, semantic_annotation

## Build a local OpenStreetMap(Geofabrik) database

In [2]:
database_folder_path =  "/work/mhealthresearchgroup/Geofabrik/"

In [4]:
geofabrik_database.build(database_folder_path)

In [5]:
semantic_annotator = semantic_annotation.SemanticAnnotator(database_folder_path)

"""
The SemanticAnnotator class aims to annotate location data using geofabrik database created by geofabrik_database.py. 

There are three annotation methods:
    Method 1. annotate_single_point(lat, lon): annotate single point with semantic labels from OpenStreetMap database.
        - pro: return distances to all POI types 
        - con: time-consuming (~2 hours/query). Method 3 is recommended for batch of points. 
    Method 2. annotate_single_shape(lat_list, lon_list): annotate single shape (e.g., bounding box, polygon) with semantic labels from OpenStreetMap database. 
        - pro: most accurate method
        - con: need a set of points define the query shape
    Method 3. annotate_batch_points(dataframe, latitude_colname, longitude_colname): annotate a batch of points (usually centroids of places) with semantic labels from OpenStreetMap database.
        - pro: fastest method. Fit for annotating many centroids of places simultaneously. 
        - con: just return the label of the nearest POI and the distance.   
    
This script uses the geodf and dist functions from the GPS2space package (https://gps2space.readthedocs.io/en/latest/).
    
"""

## Annotate location data with Point-of-interest tags

### Method 1: annotate a single point

"""annotate single point with semantic labels from OpenStreetMap database.
Match the nearest POI to the query point.

Parameters:
   lat (long): latitude of the query point, in degree
   lon (long): longitude of the query point, in degree
Returns:
   a json file with
        matched_labels: semantic label matched with the query point
        min_distance: the distance from the query point to the matched POI, in meters
        distances_to_pois: distance to other types of POIs
"""

In [8]:
# coordinates of Fenway Park in Boston
centroid_latitude = 42.34653831212525
centroid_longitude = -71.09724395926423

In [8]:
semantic_annotator.annotate_single_point(centroid_latitude, centroid_longitude)

100%|██████████| 5/5 [2:50:42<00:00, 2048.51s/it]


{'matched_labels': 'recreational;outdoor;pitch (polygon)',
 'min_distance': 6.169761255410299,
 'distances_to_pois': {'busines;busines;company (polygon)': 326501.593160387,
  'busines;busines;convention_center (polygon)': 2682942.101031607,
  'busines;busines;factory (polygon)': 3612.955363467255,
  'busines;busines;industrial (polygon)': 486.88772114370636,
  'busines;busines;office (polygon)': 932.2980449124797,
  'commercial;food;bakery (point)': 738.1374807550822,
  'commercial;food;bakery (polygon)': 928.5980506023864,
  'commercial;food;beverage (point)': 309.4560856957147,
  'commercial;food;beverage (polygon)': 2916.136263146877,
  'commercial;food;cafe (point)': 144.24793650512296,
  'commercial;food;cafe (polygon)': 947.7997197480698,
  'commercial;food;dining_hall (polygon)': 6265.0338543036705,
  'commercial;food;fast_food (point)': 79.80060752479443,
  'commercial;food;fast_food (polygon)': 1491.0348538773333,
  'commercial;food;food (polygon)': 301543.4250849768,
  'comme

### Method 2: annotate a single shape

"""annotate single shape with semantic labels from OpenStreetMap database.
The shape can be of any geometric shape that can be described with a list of latitude and longitude.
    e.g., a bound box, a polygon
Match with the label of the point POI (OSM POI represented by a point) within the query shape
    and with the label of the polygon POI (OSM POI represented by a polygon) intersected with the query shape.

Parameters:
   lat_list (long): a list of latitudes of the query shape, in degree
   lon_list (long): a list of longitudes of the query shape, in degree
Returns:
   a json file with
        matched_labels: semantic labels matched with the query shape
        point_labels: semantic labels of point POI matched with the query shape
        poly_labels: semantic labels of polygon POI matched with the query shape
        matched_geometries: geometries of POIs matched with the query shape
"""

In [9]:
# bounding box (NW corner,NE corner, SE corner, SW corner) of Museum of Fine Arts in Boston
lat_list = [42.33969558839377, 42.34039653732734, 42.339235761638996, 42.33847311473655]
lon_list = [-71.09563225696323, -71.09348529667446, -71.09270768730832, -71.0948470612041]

In [63]:
semantic_annotator.annotate_single_shape(lat_list, lon_list)

100%|██████████| 5/5 [33:23<00:00, 400.63s/it]


{'matched_labels': ['commercial;food;cafe (point)',
  'commercial;shopping;shop (point)',
  'commercial;leisure;museum (polygon)',
  'service;transportation;parking (polygon)',
  'recreational;outdoor;nature (polygon)'],
 'point_labels': ['commercial;food;cafe (point)',
  'commercial;shopping;shop (point)'],
 'poly_labels': ['commercial;leisure;museum (polygon)',
  'recreational;outdoor;nature (polygon)',
  'service;transportation;parking (polygon)'],
 'matched_geometries': [<shapely.geometry.point.Point at 0x2b8b25338410>,
  <shapely.geometry.polygon.Polygon at 0x2b8b25353b10>,
  <shapely.geometry.point.Point at 0x2b8b253415d0>,
  <shapely.geometry.polygon.Polygon at 0x2b8b250b0910>,
  <shapely.geometry.polygon.Polygon at 0x2b8b25331690>]}

### Method 3: annotate batch of points

"""annotate a batch of points (usually centroids of places) with semantic labels from OpenStreetMap database.
The batch of points should be stored in a panda dataframe with columns of latitude and longitude.
Match the nearest POI to each query point.

Parameters:
    a dataframe with
        lat_list (long): a list of latitudes of the query shape, in degree
        lon_list (long): a list of longitudes of the query shape, in degree
Returns:
   a dataframe with
        matched_labels: semantic labels matched with the query points
        min_distance: the distance from the query point to the matched POI, in meters
"""

In [12]:
import warnings
warnings.filterwarnings("ignore")

In [12]:
# library, cafe, gym, and train station around the Northeastern University campus
locations = [[42.33833,-71.08795], # library
             [42.33909,-71.08758], # cafe
             [42.34033,-71.09038], # gym
             [42.33661, -71.08944]] # train station
location_dataframe = pd.DataFrame(data = locations, columns = ['latitude', 'longitude'])

In [14]:
print(location_dataframe)

   latitude  longitude
0  42.33833  -71.08795
1  42.33909  -71.08758
2  42.34033  -71.09038
3  42.33661  -71.08944


In [15]:
semantic_annotator.annotate_batch_points(dataframe = location_dataframe, latitude_colname = 'latitude', longitude_colname = 'longitude')

100%|██████████| 5/5 [2:53:23<00:00, 2080.62s/it]


Unnamed: 0,latitude,longitude,geometry,matched_labels,min_distance
0,42.33833,-71.08795,POINT (-71.08795 42.33833),service;education;library (polygon),5.761234
1,42.33909,-71.08758,POINT (-71.08758 42.33909),commercial;food;fast_food (point),1.928266
2,42.34033,-71.09038,POINT (-71.09038 42.34033),service;education;university (polygon),2.672806
3,42.33661,-71.08944,POINT (-71.08944 42.33661),service;transportation;train_station (point),5.179144
