/
point_clusterer.py
123 lines (104 loc) · 4.1 KB
/
point_clusterer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import math
from geopandas import GeoDataFrame
from pandas import DataFrame
from shapely.geometry import Point
from movingpandas.geometry_utils import C_EARTH, measure_distance_euclidean
class PointClusterer:
"""
Clusters points using a grid-based approach. Cluster seeds are a regular
point grid.
References
----------
* Andrienko, N., & Andrienko, G. (2011). Spatial generalization and
aggregation of massive movement data. IEEE Transactions on
visualization and computer graphics, 17(2), 205-219.
"""
def __init__(self, points, max_distance, is_latlon):
df = DataFrame(points, columns=["geometry"])
bbox = GeoDataFrame(df).total_bounds
cell_size = max_distance
if is_latlon:
cell_size = cell_size / C_EARTH * 360
self.grid = _Grid(bbox, cell_size)
self.grid.insert_points(points)
self.grid.redistribute_points(points)
def get_clusters(self):
return self.grid.resulting_clusters
class _PointCluster:
def __init__(self, pt):
self.points = [pt]
self.centroid = pt
def add_point(self, pt):
self.points.append(pt)
def delete_points(self):
self.points = []
def recompute_centroid(self):
x = [pt.x for pt in self.points]
y = [pt.y for pt in self.points]
self.centroid = Point(sum(x) / len(x), sum(y) / len(y))
class _Grid:
def __init__(self, bbox, cell_size):
w = bbox[2] - bbox[0]
h = bbox[3] - bbox[1]
self.x_min = bbox[0]
self.y_min = bbox[1]
self.cell_size = cell_size
self.cells = []
# in the rare case that the points are horizontal or vertical,
# fallback to a 1x1 cell matrix
self.n_rows = max(1, int(math.ceil(h / self.cell_size)))
self.n_cols = max(1, int(math.ceil(w / self.cell_size)))
for i in range(0, self.n_cols):
self.cells.append([])
for j in range(0, self.n_rows):
self.cells[i].append(None)
self.resulting_clusters = []
def insert_points(self, points):
for pt in points:
c = self.get_closest_centroid(pt, self.cell_size)
if not c:
g = _PointCluster(pt)
self.resulting_clusters.append(g)
(i, j) = self.get_grid_position(g.centroid)
self.cells[i][j] = g
else:
(i, j) = c
g = self.cells[i][j]
if g:
g.add_point(pt)
g.recompute_centroid()
else:
print("Error: no group in cell {0},{1}".format(i, j))
print(pt)
def get_group(self, centroid):
"""returns the group with the provided centroid"""
for g in self.resulting_clusters:
if g.centroid.compare(centroid):
return g
def get_closest_centroid(self, pt, max_dist=100000000):
(i, j) = self.get_grid_position(pt)
shortest_dist = self.cell_size * 100
nearest_centroid = None
for k in range(max(i - 1, 0), min(i + 2, self.n_cols)):
for m in range(max(j - 1, 0), min(j + 2, self.n_rows)):
if not self.cells[k][m]: # no centroid in this cell yet
continue
dist = measure_distance_euclidean(pt, self.cells[k][m].centroid)
if dist <= max_dist and dist < shortest_dist:
nearest_centroid = (k, m)
shortest_dist = dist
return nearest_centroid
def get_grid_position(self, pt):
i = int(math.floor((pt.x - self.x_min) / self.cell_size))
j = int(math.floor((pt.y - self.y_min) / self.cell_size))
return i, j
def redistribute_points(self, points):
for g in self.resulting_clusters:
g.delete_points()
for pt in points:
(i, j) = self.get_closest_centroid(pt, self.cell_size * 20)
if i is not None and j is not None:
g = self.cells[i][j]
g.add_point(pt)
else:
print("Discarding {}".format(pt))