# Apply the trained embedding model to selected locations

The real purpose of this notebook is to develop and end-to-end process
to pull geo data, get MPP encodings, and then apply the initial and
final embedding models.

## Processing Setup

In [None]:
# Google colab
# import os
# from google.colab import drive
# drive.mount('/content/drive')
# project_home = '/content/drive/MyDrive/Projects/verge'
# os.chdir(project_home)
# !pip install geo_encodings

In [1]:
# Local processing setup
project_home = '..'

## Notebook Setup

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from typing import List, Tuple, Optional

import pickle
import json
import copy
import pandas as pd
import numpy as np
import pyproj
import shapely
import osmnx
import geopandas

import sys
sys.path.append('%s/03-embeddings' % project_home)
from embedderv5 import *

sys.path.append(project_home)
from utils.verge import rules


## Parameters

In [3]:
# The name of the ROI to use.
roi_name = 'ne-laptop'

# The name of the general-purpose data directory.
data_home = '%s/data' % (project_home)

# The name of the ROI-specific data directory.
roi_home = '%s/data/%s' % (project_home, roi_name)

# The unique identifier of the model to be used.
run_id = '201b'



## Preliminaries

In [4]:
# Read the ROI definition.
fname = '%s/roi.json' % roi_home
with open(fname) as source:
    roi = json.load(source)

tile_size = roi['tile_size']
encoding_resolution = roi['encoding_resolution']

roi

{'name': 'ne-laptop',
 'lon0': -73.564321,
 'lat0': 41.253746,
 'lon1': -68.058533,
 'lat1': 45.116468,
 'proj_def': '\n+proj=tmerc +lat_0=43.185107 +lon_0=-70.81142700000001\n+k=1.0 +x_0=231000.0 +y_0=211000.0 +datum=WGS84 +units=m +no_defs\n',
 'tile_size': 2000,
 'tile_shift': 1000,
 'encoding_resolution': 100}

400 elements in encodings


In [6]:
# Define a local map projection, using the definition from the ROI file.
def get_projections(proj_def):
    ltm_crs = pyproj.CRS.from_proj4(proj_def)
    wgs84_crs = pyproj.CRS.from_epsg(4326)
    proj_forward = pyproj.Transformer.from_crs(wgs84_crs, ltm_crs, always_xy=True).transform
    proj_inverse = pyproj.Transformer.from_crs(ltm_crs, wgs84_crs, always_xy=True).transform
    return proj_forward, proj_inverse

proj_forward, proj_inverse = get_projections(roi['proj_def'])

In [7]:
# Read the coastline file.
fname = '%s/coastlines' % (roi_home)
coastlines_gdf = geopandas.read_file(fname)
print('%d coastline polygons' % len(coastlines_gdf))

def get_land_water(bounds, features):

    # Create a baseline polygon consisting of the whole AOI.
    landwater = copy.deepcopy(bounds)

    # Intersect that with the coastlines data.
    coastlines = shapely.union_all(coastlines_gdf['geometry'].values)
    landwater = landwater.intersection(coastlines)

    # subtract out any polygonal water feature.
    for _, f in features.iterrows():
        if f['geometry'].geom_type in ['Polygon', 'MultiPolygon']:
            if f['natural'] == 'water':
                landwater = shapely.difference(landwater, f['geometry'])

    return landwater

3514 coastline polygons


## Processing


### Set a tile center location

In [8]:
center_lat, center_lon = 43.000659, -70.921196 # Stratham Subaru

### Pull OSM data for the area around this location

In [9]:
# Get bounds for which to query OSM features.
buffer = roi['tile_size'] / 2 + 200
center_x, center_y = proj_forward(center_lon, center_lat)
x0, y0 = center_x - buffer, center_y - buffer
x1, y1 = center_x + buffer, center_y + buffer
lon0, lat0 = proj_inverse(x0, y0)
lon1, lat1 = proj_inverse(x1, y1)
query_bounds = [lon0, lat0, lon1, lat1]
print(query_bounds)


[-70.93589085254257, 42.98984217038265, -70.90649599145738, 43.01147392753617]


In [10]:
# Query for all relevant geospatial entities we need within the bounding box.
import osmnx
tags = {
    'landuse': True,
    'place': True,
    'highway': True,
    'railway': True,
    #'aeroway': True,
    'bridge': True,
    'tunnel': True,
    #'power': True,
    'natural': True,
    'waterway': True,
    'landcover': True,
    #'building': True,
    'amenity': True,
    'shop': True,
    'leisure': True
}
features = osmnx.features.features_from_bbox(query_bounds, tags=tags).reset_index()
print('%d features from OSM' % len(features))



1823 features from OSM


In [11]:
# Just retain the relevant columns.
columns_in_rules = set(['geometry', 'amenity', 'highway', 'landuse', 'railway', 'water', 'waterway', 'natural'])
columns_in_features = set(features.columns)
columns_to_keep = list(columns_in_rules.intersection(columns_in_features))
features = features[columns_to_keep]
features.head(3)

Unnamed: 0,water,natural,highway,waterway,geometry,amenity,landuse
0,,,traffic_signals,,POINT (-70.92247 42.99919),,
1,,,motorway_junction,,POINT (-70.93157 42.99691),,
2,,,traffic_signals,,POINT (-70.92713 42.99249),,


### Re-organize geo info for this tile

In [13]:
# Down-select and re-format any relevant geospatial entities ("gents").
gents = []
for feature in features.to_dict('records'):

    geomxy = shapely.ops.transform(proj_forward, feature['geometry'])
    if geomxy.is_empty:
        continue
    gtype = geomxy.geom_type

    for rule in rules:
        if gtype == rule['gtype']:
            osm_key = rule['osm_key']
            if osm_key in feature:
                osm_value = str(feature[osm_key])
                if osm_value in rule['osm_values']:
                    gents.append({
                        'feature': feature,
                        'category': rule['gent_category'],
                        'label': rule['gent_label'],
                        'geomxy': geomxy,
                        'gtype': gtype
                    })
print('%d features selected' % len(gents))


300 features selected


In [14]:
# Create a "land/water" polygon.
lons = [lon0, lon1, lon1, lon0, lon0]
lats = [lat0, lat0, lat1, lat1, lat0]
lonlat_bounds = shapely.Polygon(list(zip(lons, lats)))
landwater = get_land_water(lonlat_bounds, features)
landwaterxy = shapely.ops.transform(proj_forward, landwater)
gents.append({
    'category': 'waterway',
    'label': 'land',
    'geomxy': landwaterxy,
    'gtype': landwaterxy.geom_type
})


In [16]:
# Get the bounds for this tile in projected coordinates.
buffer = roi['tile_size'] / 2
center_x, center_y = proj_forward(center_lon, center_lat)
x0, y0 = center_x - buffer, center_y - buffer
x1, y1 = center_x + buffer, center_y + buffer
xx = [x0, x1, x1, x0, x0]
yy = [y0, y0, y1, y1, y0]
tile_bbox = shapely.Polygon(list(zip(xx, yy)))

219 geospatial entities


In [19]:
# Re-project all geospatial entities and clip them to the bounds of this tile.
tile_gents = []
for gent in gents:
    geomxy = shapely.affinity.translate(
        gent['geomxy'].intersection(tile_bbox),
        xoff=-x0, yoff=-y0
    )
    if geomxy.is_empty:
        continue
    tile_gents.append({
        'category': gent['category'],
        'label': gent['label'],
        'geometry': geomxy,
        'gtype': gent['gtype'],
        'xoff': x0,
        'yoff': y0,
    })
print('%d geospatial entities' % len(tile_gents))
pd.DataFrame(tile_gents).head(3)

219 geospatial entities


Unnamed: 0,category,label,geometry,gtype,xoff,yoff
0,amenity,commercial,POINT (418.4354396635317 1.3595715950650629),Point,221049.424774,189514.680608
1,amenity,commercial,POINT (1194.7591855522187 1242.6530638959666),Point,221049.424774,189514.680608
2,amenity,commercial,POINT (671.1252223148767 236.52948677458335),Point,221049.424774,189514.680608


### Apply MPP encoding to all entities in this tile

In [20]:
# Define an encoder to use.
from geo_encodings import MPPEncoder
encoder = MPPEncoder(
    region=[0, 0, tile_size, tile_size],
    resolution=encoding_resolution,
    center=True
)
geo_encoding_dim = len(encoder)
print('%d elements in encodings' % geo_encoding_dim)


400 elements in encodings


In [27]:
# Apply encodings.
for gent in tile_gents:
    gent['encoding'] = encoder.encode(gent['geometry']).values()


### Get one-hot label vectors for each entity

In [28]:
# We will also need the one-hot label vectors for each entity.
for gent in tile_gents:
    pass
print(gent)

{'category': 'waterway', 'label': 'land', 'geometry': <POLYGON ((1.074 101.314, 16.452 110.357, 27.075 120.14, 55.089 131.875, 90....>, 'gtype': 'MultiPolygon', 'xoff': 221049.42477433695, 'yoff': 189514.6806076507, 'encoding': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1.,

In [None]:
# Get initial embedding for this tile.

In [None]:
# Get final embedding for this tile.