# Manmade features




In [None]:
import numpy as np
import pandas as pd
import pyproj
import shapely
import osmnx
import geopandas as gpd

import plotly
from plotly.subplots import make_subplots
from plotly.graph_objects import Scatter

from geo_encodings import draw_shape

import polars as pl
pl.Config.set_tbl_rows(25)


In [None]:
# Define a bounding box to pull things from.

# Somewhere in Belarus.
lat0, lon0 = 53.107846, 28.792399
lat1, lon1 = 53.221441, 28.989799

# # Minsk.
# lat0, lon0 = 53.894162, 27.536065
# lat1, lon1 = 53.910152, 27.570615

# # Milkovo, Kamchatka.
# lat0, lon0 = 54.680048, 158.585918
# lat1, lon1 = 54.714276, 158.656632

# # Asagiri, Japan.
# lat0, lon0 = 32.228572, 130.881277
# lat1, lon1 = 32.262670, 130.906660

# # Melo Uruguay.
# lat0, lon0 = -32.395782, -54.201407
# lat1, lon1 = -32.338938, -54.122384

# # Libreville Gabon.
# lat0, lon0 = 0.354229, 9.393414
# lat1, lon1 = 0.477928, 9.496069

# Portsmouth NH.
lat0, lon0 = 43.065028, -70.793336
lat1, lon1 = 43.094909, -70.722353

# # Downtown Tulsa OK.
# lat0, lon0 = 36.145202, -95.989340
# lat1, lon1 = 36.164382, -95.969949

# Handy derived stuff.
query_bounds = [lon0, lat0, lon1, lat1]
center_lon = (lon0 + lon1) / 2
center_lat = (lat0 + lat1) / 2


In [None]:
# Define a local map projection
offset = 20000
proj_def = f"""
+proj=tmerc +lat_0={center_lat} +lon_0={center_lon} 
+k=1.0 +x_0={offset} +y_0={offset} +datum=WGS84 +units=m +no_defs
"""
ltm_crs = pyproj.CRS.from_proj4(proj_def)
wgs84_crs = pyproj.CRS.from_epsg(4326)
proj_forward = pyproj.Transformer.from_crs(wgs84_crs, ltm_crs, always_xy=True).transform
proj_inverse = pyproj.Transformer.from_crs(ltm_crs, wgs84_crs, always_xy=True).transform


## Get and process features

In [None]:
tags = {
    'amenity': True,
    'shop': True,
    'leisure': True,
}
features = osmnx.features.features_from_bbox(query_bounds, tags=tags).reset_index()
features['gtype'] = [g.geom_type for g in features['geometry']]
features['geomxy'] = [
    shapely.ops.transform(proj_forward, rec['geometry'])
    for rec in features.to_dict('records')
]
print('%d features total' % len(features))

In [None]:
# Re-processing.
# Amenities are a bit erratic in how they are specified. Here I will assure
# that app are represented as Point entities, and will also map the "shop" and "leisure"
# categories into "amenities".

new_records = []

for rec in features.to_dict('records'):
    if rec['gtype'] == 'Point' or rec['amenity'] == 'parking':
        geomxy = rec['geomxy']
    else:
        geomxy = rec['geomxy'].centroid

    if str(rec['amenity']) != 'nan':
        label = str(rec['amenity'])
    elif str(rec['shop']) != 'nan':
        label = str(rec['shop'])
    elif str(rec['leisure']) != 'nan':
        label = str(rec['leisure'])
    else:
        label = 'unknown'
        
    new_records.append({
        'amenity': label,
        'geomxy': geomxy,
        'gtype': geomxy.geom_type,
    })

ff = gpd.GeoDataFrame(new_records)
features = ff

In [None]:
vcs = features[[key_tag, 'gtype']].value_counts().reset_index()
for rec  in vcs.to_dict('records'):
    print('%20s %20s %4d' % (rec['amenity'], rec['gtype'], rec['count']))

In [None]:
type(vcs)

In [None]:
translator = [
    {
        'category': 'amenity',
        'label': 'parking lot',
        'gtype': 'Polygon',
        'keys': {'amenity': ['parking']},
    },
    {
        'category': 'amenity',
        'label': 'food and drink',
        'gtype': 'Point',
        'keys': {'amenity': ['restaurant', 'bar', 'pub', 'cafe', 'coffee', 'nightclub']},
    },
    {
        'category': 'amenity',
        'label': 'religious',
        'gtype': 'Point',
        'keys': {'amenity': ['place_of_worship']},
    },
    {
        'category': 'amenity',
        'label': 'recreation',
        'gtype': 'Point',
        'keys': {'amenity': ['pitch', 'park', 'fountain', 'playground', 'outdoor_seating', 'stadium', 'garden', ]},
    },
    {
        'category': 'amenity',
        'label': 'commercial',
        'gtype': 'Point',
        'keys': {'amenity': [
            'bank', 'car_repair', 'cannabis', 'fitness_center', 'alcohol', 'mall', 'tattoo', 'garden_centre',
            'hairdresser', 'clothes', 'fast_food', 'car', 'books', 'beauty', 'florist', 
            'bowling_alley', 'swimming_pool', 'theatre', 'arts_centre', 'arts', 'veterinary', 'appliance',
            'music', 'events_venue', 'erotic', 'butcher', 'car_wash', 'car_rental', 'car_parts', 'furniture',
            'shoes', 'fitness_centre', 'water_park', 'second_hand', 'gas', 'craft', 'trade', 'gift',
            'money_lender', 'storage_rental', 'escape_game', 'convenience', 'fuel', 'travel_agency',
            'wine', 'chocolate', 'dance', 'beverages', 
        ]},
    },

    
]


In [None]:
# Apply the translator. That is, go through the list of features, and check each one against the records 
# of "translator" that we just defined. If any match, then save the geometries along with their 
# labels. Generate a report on any that were missed.

keepers = []
missed = []

for feature in features.to_dict('records'):

    matched = False
    
    # Check geometry type first. 
    for fclass in translator:
        if feature['gtype'] == fclass['gtype']:

            # Check any keys for this feature class.
            all_match = True
            for key_name in fclass['keys']:
                key_values = fclass['keys'][key_name]
                if key_name not in feature or feature[key_name] not in key_values:
                    all_match = False
            if all_match:
                keepers.append({
                    'category': fclass['category'],
                    'label': fclass['label'],
                    'gtype': feature['gtype'],
                    'geom': feature['geomxy'],
                })
                matched = True
    if not matched:
        missed.append({
            'amenity': feature['amenity'],
            'gtype': feature['gtype']
        })

In [None]:
print('%d records unmatched' % len(missed))
if len(missed) > 0:
    m = pl.DataFrame(missed).select(['amenity', 'gtype'])
    print(m.group_by(['amenity', 'gtype']).agg(pl.len()).sort(by='len', descending=True))

In [None]:
df = pl.DataFrame(keepers)
print('%d records matched' % len(df))
pl.Config.set_tbl_rows(25)
print(df.group_by(pl.col(['label', 'gtype'])).agg(pl.len().alias('n')).sort(by='n', descending=True))

In [None]:
# Vis
colors = {
    'unknown': 'green',
    'apartment': 'red',
    'meadow': 'yellow',
    'recreation': 'dark green',
    'residential': 'orange',
    'commercial': 'teal',
    'retail': 'purple',
    'industrial': 'gray',
    'cemetery': 'cyan'
}
used = set()
fig = make_subplots(1, 1)
for rec in df.rows(named=True):
    label = rec['label']
    if label in ['pedestrian way', 'service road']:
        continue
    color = colors.get(label, 'gray')
    showlegend = label not in used
    used.add(label)
    draw_shape(rec['geom'], fig, name=label, color=color, showlegend=showlegend)
fig['layout']['width'] = 800
fig['layout']['height'] = 800
fig.show()
