# Collect shapes and generate relations

In this folder, we are building a dataset of geospatial shapes that have certain perscribed
relations, like "polygon intersection" or "point-on-line". 
The actual entities and their specific attributes don't matter. 

This dataset is to be a collection of geometries that include span all six standard types --
Point, LineString, Polygon, MultiPoint, MultiLineString, and MultiPolygon.

All shapes will be pulled from OpenStreetMap using the `osmnx` package. 
Since OSM is short on the Multi* type of entities, I will create them 
by combining random subsets of the other types.

We will not be encoding things in lon/lat space.
Instead we will focus on rectangular subsets of say 10km x 10 km. 
The mapping between lon/lat and local x/y will be done based on a local
transverse Mercator projection.




In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import osmnx
import shapely
import pyproj

import plotly
from plotly.subplots import make_subplots
from plotly.graph_objects import Scatter

from geo_encodings.vis import px_draw

In [None]:
import sys
sys.path.append('../src')
from harvesters import ShapeHarvester
from generators import RelationGenerator

## Setup

In [None]:
# Define a lon/lat center and a radius from which to pull shapes.
# center_lat, center_lon = 43.000273, -71.088411 # Brentwood NH
# center_lat, center_lon = 43.283307, -72.576623 # Chester VT
# center_lat, center_lon = 42.631024, -70.993787 # Boxford MA
# center_lat, center_lon = 41.688562, -73.030388 # Terryville CT
# center_lat, center_lon = 42.681636, -74.920189 # Middlefield NY
center_lat, center_lon = 42.006502, -71.808369 # Thompson CT

extent = 20000.0 # meters

# Props to ChatGPT for this:
colors = [
    "#1f77b4",  # Blue (Good for water features)
    "#ff7f0e",  # Orange (Great for roads or paths)
    "#2ca02c",  # Green (Natural features like parks)
    "#d62728",  # Red (Important landmarks or warnings)
    "#9467bd",  # Purple (Alternative highlight color)
    "#8c564b",  # Brown (Earthy tones for terrain)
    "#e377c2",  # Pink (Soft accent, good for points of interest)
    "#7f7f7f",  # Gray (Neutral elements)
    "#bcbd22",  # Yellow-Green (Highlighting important areas)
    "#17becf"   # Cyan (Water-adjacent features or paths)
]


In [None]:
harvester = Harvester(center_lon, center_lat, extent)
shapes = harvester.harvest(['points', 'linestrings', 'polygons'])
shapes['type'].value_counts()

In [None]:
harvester = Harvester(center_lon, center_lat, extent * 2)
tiles = harvester.harvest(['tiled-polygons'])
tiles['type'].value_counts()

## Generate shape pairs with given relationships

In [None]:
aoi_width = 100
aoi_height = 100
relation = 'point-on-linestring'
# relation = 'point-in-polygon'
# relation = 'linestring-intersects-linestring'
# relation = 'linestring-intersects-polygon'
# relation = 'polygon-intersects-polygon'
# relation = 'polygon-borders-polygon'

fodder = tiles if relation == 'polygon-borders-polygon' else shapes
generator = Generator(fodder, bounds=[0, 0, aoi_width, aoi_height], scale=25)

ncases = 4

fig = make_subplots(2, ncases)

for i in range(ncases):
    a, b = generator.generate(relation, True, max_attempts=100)
    px_draw(a, fig, irow=1, icol=i+1, name=a.geom_type, color='red')
    px_draw(b, fig, irow=1, icol=i+1, name=b.geom_type, color='blue')

for i in range(ncases):
    a, b = generator.generate(relation, False)
    px_draw(a, fig, irow=2, icol=i+1, name=a.geom_type, color='red')
    px_draw(b, fig, irow=2, icol=i+1, name=b.geom_type, color='blue')

fig['layout']['title'] = relation
fig['layout']['width'] = 1000
fig['layout']['height'] = 500

for i in range(8):
    fig['layout']['xaxis%d' % (i+1)]['range'] = [0, aoi_width]
    fig['layout']['yaxis%d' % (i+1)]['range'] = [0, aoi_height]
fig
# fig.print_grid()

In [None]:
cases = []

#
# Most cases
#

generator = Generator(shapes, bounds=[0, 0, 100, 100], scale=25)

relations = [
    'point-on-linestring',  
    'point-in-polygon', 
    'linestring-intersects-linestring',
    'linestring-intersects-polygon', 
    'polygon-intersects-polygon',
]

for relation in relations:
    print(relation)
    for i in range(100):
        ma = 100 if relation == 'point-in-polygon' else 20
        aa, bb = generator.generate(relation, True, max_attempts=ma)
        if aa is not None and bb is not None:
            cases.append({
                'relation': relation,
                'sense': True,
                'shape_a': shapely.set_precision(aa, 0.001), 
                'shape_b': shapely.set_precision(bb, 0.001),
            })
    for i in range(200):
        aa, bb = generator.generate(relation, False)
        if aa is not None and bb is not None:
            cases.append({
                'relation': relation,
                'sense': False,
                'shape_a': shapely.set_precision(aa, 0.001), 
                'shape_b': shapely.set_precision(g2, 0.001),
            })

#
# polygon border case
# 

generator = Generator(tiled_polygons, bounds=[0, 0, 100, 100], scale=25)

relations = [
    'polygon-borders-polygon',
]

for relation in relations:
    print(relation)
    for i in range(100):
        aa, bb = generator.generate(relation, True)
        if aa is not None and bb is not None:
            cases.append({
                'relation': relation,
                'sense': True,
                'shape_a': shapely.set_precision(aa, 0.001), 
                'shape_b': shapely.set_precision(bb, 0.001),
            })
    for i in range(200):
        aa, bb = generator.generate(relation, True)
        if aa is not None and bb is not None:
            cases.append({
                'relation': relation,
                'sense': False,
                'shape_a': shapely.set_precision(aa, 0.001), 
                'shape_b': shapely.set_precision(bb, 0.001),
            })

print(len(cases))


In [None]:
import geopandas
a = geopandas.GeoDataFrame(cases)
a.drop(columns=['shape_a', 'shape_b']).value_counts().sort_index()

In [None]:
# Save it
import pygeohash
tag = pygeohash.encode(center_lat, center_lon, 8)
fname = 'relations-%s.geojson' % tag
out = geopandas.GeoDataFrame(cases)
out = geopandas.GeoDataFrame(out.drop_duplicates())
out.to_file(fname, driver='GeoJSON')
print('%s' % fname)