# Shape clustering using multi-point proximity embedding (MPPE)

This notebook gives an example of how MPPE can be used to cluster
a set of overlapping geometric objects based on their spatial similarity. 

## Package setup

In [None]:
import numpy as np
from odyssey_geo.encoders import MPPEncoder

## Setup

In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_array
from sklearn.cluster import DBSCAN
import shapely
import shapely.wkt
import plotly
import plotly.subplots
from plotly.graph_objs import Scatter, Figure


## Create an embedding for a domain

In [None]:
x0, y0 = 0, 0
x1, y1 = 100, 100
resolution = 20
scale = 20
encoder = MPPEncoder(domain=[x0, y0, x1, y1], resolution=resolution, scale=scale, center=True)
print('%d reference points in encoder' % encoder.n_ref)

## Create a few line segments
Each line will pass through the center of our domain, at a certain angle. 
Segments will be one of a couple of different lengths.

In [None]:
cx = (x1 - x0) / 2
cy = (y1 - y0) / 2

line_length = (x1 - x0) * 0.2

# radius = line_length / 2.0
# print(radius)

angles = [10, 15, 20, 100, 105, 110]
radii = [40, 20, 20, 40, 40, 40]
things = []

for angle, radius in list(zip(angles, radii)):
    theta = np.radians(angle)
    xx0 = cx + radius * np.cos(theta)
    yy0 = cy + radius * np.sin(theta)
    xx1 = cx - radius * np.cos(theta)
    yy1 = cy - radius * np.sin(theta)
    wkt = 'LINESTRING(%f %f, %f %f)' % (xx0, yy0, xx1, yy1)
    thing = {
        'xx': np.array([xx0, xx1]),
        'yy': np.array([yy0, yy1]),
        'geom': shapely.wkt.loads(wkt)
    }
    things.append(thing)
    print(thing)
    

In [None]:
fig = plotly.subplots.make_subplots(1, 1)

for thing in things:
    tr = Scatter(x=thing['xx'], y=thing['yy'], mode='markers+lines', marker={'color': 'black'})
    fig.append_trace(tr, 1, 1)
    
fig['layout']['width'] = 500
fig['layout']['height'] = 500
fig

## Cluster the lines based on their encodings

In [None]:
model = DBSCAN(eps=0.5, min_samples=1)
xx = np.vstack([
    encoder.encode(z['geom']).sparse().todense()
    for z in things
])
model.fit(xx)
print(model.labels_)


In [None]:
colors = ['red', 'blue', 'green']
fig = plotly.subplots.make_subplots(1, 1)

for label, thing in list(zip(model.labels_, things)):
    tr = Scatter(x=thing['xx'], y=thing['yy'], name='cluster %d' % label,
                 mode='markers+lines', marker={'color': colors[label]})
    fig.append_trace(tr, 1, 1)
    
fig['layout']['width'] = 500
fig['layout']['height'] = 500

fig

In [None]:
xx[0,:]