### VERGE: Vector-mode Regional Geospatial Embedding

# Define Areas Of Interest

An AOI is some lon/lat box that will be further sub-divided into tiles,
which are the units of analysis in this project.

This notebook defines the tiles.


## Processing setup

In [4]:
# Google colab setup
import os
from google.colab import drive
drive.mount('/content/drive')
project_home = '/content/drive/MyDrive/Projects/verge'
os.chdir(project_home)
!pip install osmnx pygeohash geo-encodings rtree

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Collecting rtree
  Downloading rtree-1.4.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.1 kB)
Downloading rtree-1.4.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (541 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m541.1/541.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rtree
Successfully installed rtree-1.4.0


In [None]:
# # Local processing setup
# project_home = '..'

## Notebook Setup

In [5]:
import pandas as pd
import numpy as np
import geopandas
from rtree import index
import folium
import json
import shapely
import os

## Parameters

In [6]:
# The name of the ROI to use.
roi_name = 'newengland'

# The name of the general-purpose data directory.
data_home = '%s/data' % (project_home)

# The name of the ROI-specific data directory.
roi_home = '%s/data/%s' % (project_home, roi_name)


In [7]:
# Define our region of interest.
fname = '%s/roi.json' % roi_home
with open(fname) as source:
    roi = json.load(source)

# Unpack these for convenience.
lat0, lon0 = roi['lat0'], roi['lon0']
lat1, lon1 = roi['lat1'], roi['lon1']


In [11]:
# Read the big file defining urban aread.
# fname = '%s/tl_rd22_us_uac20/tl_rd22_us_uac20.shp' % data_home
fname = '%s/urban/urban.shp' % roi_home
print(fname)
global_gdf = geopandas.read_file(fname)
print('%d urban area polygons' % len(global_gdf))

/content/drive/MyDrive/Projects/verge/data/newengland/urban/urban.shp
90 urban area polygons


In [12]:
# Put all of those into a spatial index.
spatial_index = index.Index()

for idx, geom in enumerate(global_gdf.geometry):
    if geom is not None:
        spatial_index.insert(idx, geom.bounds)  # bounds = (minx, miny, maxx, maxy)


In [13]:
# Get the polygons covering our region.
query_bounds = (lon0, lat0, lon1, lat1)
matches = list(spatial_index.intersection(query_bounds))
regional_gdf = global_gdf.iloc[matches]
print('%d urban polygons in our region [%s]' % (len(regional_gdf), roi['name']))

90 urban polygons in our region [newengland]


In [14]:
# See what we got.
center_lon = (lon0 + lon1) / 2.0
center_lat = (lat0 + lat1) / 2.0

map_center = [center_lat, center_lon]
m = folium.Map(location=map_center, zoom_start=7)
for _, row in regional_gdf.iterrows():
    if row['geometry'].geom_type in ['Polygon', 'MultiPolygon']:
        geo_json = folium.GeoJson(row.geometry)
        geo_json.add_to(m)
m

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Save it.
fname = '%s/urban' % (roi_home)
try:
    regional_gdf.to_file(fname, driver="ESRI Shapefile", mode='w')
except PermissionError:
    print("Write failed. Probably because the file exists and can't be overwritten.")

## Get a list of AOIs
An AOI is a lon/lat box that has some minmum degree of overlap
with an urban area.

In [15]:
# Create a multipolygon with all of the polygons in the geo frame.

polys = []
for r in regional_gdf.to_dict('records'):
    g = r['geometry']
    if g.geom_type == 'Polygon':
        polys.append(g)
    elif g.geom_type == 'MultiPolygon':
        polys += list(g.geoms)

all_urban = shapely.geometry.MultiPolygon(polys)

In [16]:
# Loop over possible AOIs, checking their overlap with urban areas.

dlon, dlat = 0.1, 0.1
coverage_threshold = 0.2
n = 0
aois = []

def aoi_name(lon, lat):
    h = 'w' if lon < 0 else 'e'
    a = '%04.0f%s' % (np.abs(lon) * 10, h)
    h = 's' if lat < 0 else 'n'
    b = '%03.0f%s' % (np.abs(lat) * 10, h)
    return '%s-%s' % (a, b)

aoi_lon0 = np.ceil(lon0 / 0.1) * 0.1
while aoi_lon0 + dlon < lon1:
    aoi_lon1 = aoi_lon0 + dlon

    aoi_lat0 = np.ceil(lat0 / 0.1) * 0.1
    while aoi_lat0 + dlat < lat1:
        aoi_lat1 = aoi_lat0 + dlat

        coords = [
            (aoi_lon0, aoi_lat0),
            (aoi_lon1, aoi_lat0),
            (aoi_lon1, aoi_lat1),
            (aoi_lon0, aoi_lat1),
            (aoi_lon0, aoi_lat0)
        ]
        aoi = shapely.geometry.Polygon(coords)

        # Get overlap with urban areas.
        overlap = aoi.intersection(all_urban)

        # Check proportional coverage.
        coverage = overlap.area / aoi.area

        if coverage >= coverage_threshold:
            nom = aoi_name(aoi_lon0, aoi_lat0)
            aois.append({
                'aoi_tag': nom,
                'lon0': np.round(aoi_lon0, 1),
                'lat0': np.round(aoi_lat0, 1),
                'lon1': np.round(aoi_lon1, 1),
                'lat1': np.round(aoi_lat1, 1),
            })

            # print(nom, '%.2f %.2f %.4f' % (aoi_lon0, aoi_lat0, coverage))
            n += 1

        aoi_lat0 += dlat
    aoi_lon0 += dlon
print('%d aois' % n)

263 aois


In [17]:
# Save the list of AOIs.
fname = '%s/aois.csv' % (roi_home)
pd.DataFrame(aois).to_csv(fname, index=False)
print('%d records to %s' % (len(aois), fname))

263 records to /content/drive/MyDrive/Projects/verge/data/newengland/aois.csv
