Skip to content

Commit

Permalink
ST_ClusterKMeans: handle effective K=0 when all the inputs are EMPTY
Browse files Browse the repository at this point in the history
Closes #4101
Closes #254



git-svn-id: http://svn.osgeo.org/postgis/trunk@16605 b70326c6-7e19-0410-871a-916f4a2858ee
  • Loading branch information
Komzpa committed Jun 3, 2018
1 parent d8d92f2 commit b8d56f0
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 17 deletions.
37 changes: 20 additions & 17 deletions liblwgeom/lwkmeans.c
Expand Up @@ -135,20 +135,8 @@ kmeans_init(POINT2D** objs, int* clusters, uint32_t n, POINT2D** centers, POINT2
double max_dst = -1;
double dst_p1, dst_p2;

assert(k > 0);

/* k = 1: first non-null is ok, and input check guarantees there's one */
if (k == 1)
{
for (i = 0; i < n; i++)
{
if (!objs[i]) continue;
centers_raw[0] = *((POINT2D *)objs[i]);
centers[0] = &(centers_raw[0]);
return;
}
assert(0);
}
/* k=0, k=1: "clustering" is just input validation */
assert(k > 1);

/* k >= 2: find two distant points greedily */
for (i = 1; i < n; i++)
Expand Down Expand Up @@ -333,9 +321,24 @@ lwgeom_cluster_2d_kmeans(const LWGEOM** geoms, uint32_t n, uint32_t k)
k = num_non_empty;
}

kmeans_init(objs, clusters, n, centers, centers_raw, k);

result = kmeans(objs, clusters, n, centers, k);
if (k > 1)
{
kmeans_init(objs, clusters, n, centers, centers_raw, k);
result = kmeans(objs, clusters, n, centers, k);
}
else
{
/* k=0: everythong is unclusterable
* k=1: mark up NULL and non-NULL */
for (i = 0; i < n; i++)
{
if (k == 0 || !objs[i])
clusters[i] = KMEANS_NULL_CLUSTER;
else
clusters[i] = 0;
}
result = LW_TRUE;
}

/* Before error handling, might as well clean up all the inputs */
lwfree(objs);
Expand Down
10 changes: 10 additions & 0 deletions regress/cluster.sql
Expand Up @@ -50,3 +50,13 @@ select '#4100a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_g
( ST_GeomFromEWKT('SRID=4326;POLYGON((-71.1261 42.2703 1,-71.1257 42.2703 1,-71.1257 42.2701 1,-71.126 42.2701 1,-71.1261 42.2702 1,-71.1261 42.2703 1))') ) ) As g(geom) CROSS JOIN generate_series(1,3) As i GROUP BY i )) As foo1 LIMIT 10) kmeans;

select '#4100b', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT(0 0)'::geometry), ('POINT(0 0)')) g(geom)) kmeans;


select '#4101a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_geom, 3) OVER() As result
FROM ((SELECT ST_GeomFromText('POINT EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('MULTIPOINT EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('MULTIPOLYGON EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('LINESTRING EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('MULTILINESTRING EMPTY',4326) As the_geom ) ) As foo1 LIMIT 10) kmeans;

select '#4101b', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT EMPTY'::geometry), ('POINT EMPTY')) g(geom)) kmeans;
4 changes: 4 additions & 0 deletions regress/cluster_expected
Expand Up @@ -34,3 +34,7 @@ NOTICE: kmeans_init: there are at least 3 duplicate inputs, number of output cl
#4100a|1
NOTICE: kmeans_init: there are at least 2 duplicate inputs, number of output clusters may be less than you requested
#4100b|1
NOTICE: lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
#4101a|1
NOTICE: lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
#4101b|1

0 comments on commit b8d56f0

Please sign in to comment.