# Shape clustering using multi-point proximity embedding (MPPE)

This notebook gives an example of how MPPE can be used to cluster
a set of overlapping geometric objects based on their spatial similarity. 

## Package setup

In [None]:
from geo_encodings.encoders import MPPEncoder

## Setup

In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_array
from sklearn.cluster import DBSCAN
import shapely
import shapely.wkt
import plotly
import plotly.subplots
from plotly.graph_objs import Scatter, Figure

from geo_encodings import MPPEncoder


## Create an embedding for a domain

In [None]:
x0, y0 = 0, 0
x1, y1 = 100, 100
resolution = 20
scale = 20
encoder = MPPEncoder(region=[x0, y0, x1, y1], resolution=resolution, scale=scale, center=True)
print('%d reference points in encoder' % len(encoder))

## Create a few line segments
Each line will pass through the center of our domain, at a certain angle. 
Segments will be one of a couple of different lengths.

In [None]:
cx = (x1 - x0) / 2
cy = (y1 - y0) / 2

line_length = (x1 - x0) * 0.2
angles = [10, 15, 20, 100, 105, 110]
radii = [40, 20, 20, 40, 40, 40]
things = []

for angle, radius in list(zip(angles, radii)):
    theta = np.radians(angle)
    xx0 = cx + radius * np.cos(theta)
    yy0 = cy + radius * np.sin(theta)
    xx1 = cx - radius * np.cos(theta)
    yy1 = cy - radius * np.sin(theta)
    wkt = 'LINESTRING(%f %f, %f %f)' % (xx0, yy0, xx1, yy1)
    thing = {
        'xx': np.array([xx0, xx1]),
        'yy': np.array([yy0, yy1]),
        'geom': shapely.wkt.loads(wkt)
    }
    things.append(thing)
    print(thing)
    

In [None]:
# Draw the lines that we just created.

fig = plotly.subplots.make_subplots(1, 1)

tr = Scatter(
    x=encoder.ref_x, y=encoder.ref_y, mode='markers', name='ref points',
    marker_symbol='cross-thin-open', marker_color='grey'
)
fig.append_trace(tr, 1, 1)
             
for k, thing in enumerate(things):
    show = k == 0
    tr = Scatter(
        x=thing['xx'], y=thing['yy'], 
        mode='lines', marker={'color': 'black'},
        name='shapes', legendgroup='shapes', showlegend=show
    )
    fig.append_trace(tr, 1, 1)
    
fig['layout']['width'] = 500
fig['layout']['height'] = 480
fig['layout']['xaxis1']['title'] = 'x-coordinate'
fig['layout']['yaxis1']['title'] = 'y-coordinate'
fig['layout']['xaxis1']['range'] = [0, 100]
fig['layout']['yaxis1']['range'] = [0, 100]

fig

## Cluster the lines based on their encodings

In [None]:
# Make a data matrix consisting of the encodings of the lines.
xx = np.vstack([
    encoder.encode(z['geom']).values()
    for z in things
])

# Do a DBSCAN clustering.
model = DBSCAN(eps=0.5, min_samples=1)
model.fit(xx)
print('cluster labels:', model.labels_)


In [None]:
# Plot the lines colored by cluster.

colors = ['red', 'blue', 'green']
fig = plotly.subplots.make_subplots(1, 1)

tr = Scatter(
    x=encoder.ref_x, y=encoder.ref_y, mode='markers', name='ref points',
    marker_symbol='cross-thin-open', marker_color='grey'
)
fig.append_trace(tr, 1, 1)

already_seen = set()
for label, thing in list(zip(model.labels_, things)):
    show = label not in already_seen
    already_seen.add(label)
    tr = Scatter(
        x=thing['xx'], y=thing['yy'], name='cluster %d' % (label+1),
        mode='markers+lines', marker={'color': colors[label]},
        showlegend=show
    )
    fig.append_trace(tr, 1, 1)
    
fig['layout']['width'] = 500
fig['layout']['height'] = 480
fig['layout']['xaxis1']['title'] = 'x-coordinate'
fig['layout']['yaxis1']['title'] = 'y-coordinate'
fig['layout']['xaxis1']['range'] = [0, 100]
fig['layout']['yaxis1']['range'] = [0, 100]

fig