diff --git a/NEWS b/NEWS
index 1bbbc4aeba1..2c0318d3b6f 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,11 @@
+PostGIS 3.1.0alpha4
+2020/xx/xx
+Only tickets not included in 3.1.0alpha3
+
+ * New features*
+ - #4801, ST_ClusterKMeans supports weights in POINT[Z]M geometries (Darafei Praliaskouski)
+
+
PostGIS 3.1.0alpha3
2020/11/19
Only tickets not included in 3.1.0alpha2
@@ -118,7 +126,6 @@ PostGIS 3.1.0alpha1
- #4149, ST_Simplify(geom, 0) is now O(N).
ST_Affine (ST_Translate, ST_TransScale, ST_Rotate) optimized.
ST_SnapToGrid optimized. (Darafei Praliaskouski)
- - #4574, Link Time Optimizations enabled (Darafei Praliaskouski)
- #4578, Add parallellism and cost properties to brin functions (Raúl Marín)
- #4473, Silence yacc warnings (Raúl Marín)
- #4589, Disable C asserts when building without "--enable-debug" (Raúl Marín)
diff --git a/doc/reference_cluster.xml b/doc/reference_cluster.xml
index 1ae50b3c2ee..6d0a693bce4 100644
--- a/doc/reference_cluster.xml
+++ b/doc/reference_cluster.xml
@@ -234,8 +234,9 @@ GEOMETRYCOLLECTION(LINESTRING(6 6,7 7))
K-means
cluster number for each input geometry. The distance used for clustering is the
distance between the centroids for 2D geometries, and distance between bounding box centers for 3D geometries.
+ For POINT inputs, M coordinate will be treated as weight of input and has to be larger than 0.
- Enhanced: 3.1.0 Support for 3D geometries
+ Enhanced: 3.1.0 Support for 3D geometries and weights
Availability: 2.3.0
diff --git a/liblwgeom/lwkmeans.c b/liblwgeom/lwkmeans.c
index bdb244a2675..c5c6a4ef961 100644
--- a/liblwgeom/lwkmeans.c
+++ b/liblwgeom/lwkmeans.c
@@ -19,23 +19,33 @@
*/
#define KMEANS_MAX_ITERATIONS 1000
+inline static double
+distance3d_sqr_pt4d_pt4d(const POINT4D *p1, const POINT4D *p2)
+{
+ double hside = p2->x - p1->x;
+ double vside = p2->y - p1->y;
+ double zside = p2->z - p1->z;
+
+ return hside * hside + vside * vside + zside * zside;
+}
+
static uint8_t
-update_r(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
+update_r(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
{
uint8_t converged = LW_TRUE;
for (uint32_t i = 0; i < n; i++)
{
- POINT3D obj = objs[i];
+ POINT4D obj = objs[i];
/* Initialize with distance to first cluster */
- double curr_distance = distance3d_sqr_pt_pt(&obj, ¢ers[0]);
+ double curr_distance = distance3d_sqr_pt4d_pt4d(&obj, ¢ers[0]);
int curr_cluster = 0;
/* Check all other cluster centers and find the nearest */
- for (uint32_t cluster = 1; cluster < k; cluster++)
+ for (int cluster = 1; cluster < k; cluster++)
{
- double distance = distance3d_sqr_pt_pt(&obj, ¢ers[cluster]);
+ double distance = distance3d_sqr_pt4d_pt4d(&obj, ¢ers[cluster]);
if (distance < curr_distance)
{
curr_distance = distance;
@@ -44,44 +54,42 @@ update_r(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
}
/* Store the nearest cluster this object is in */
- if (clusters[i] != (int)curr_cluster)
+ if (clusters[i] != curr_cluster)
{
converged = LW_FALSE;
- clusters[i] = (int)curr_cluster;
+ clusters[i] = curr_cluster;
}
}
return converged;
}
static void
-update_means(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t *weights, uint32_t k)
+update_means(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
{
- memset(weights, 0, sizeof(uint32_t) * k);
- memset(centers, 0, sizeof(POINT3D) * k);
+ memset(centers, 0, sizeof(POINT4D) * k);
for (uint32_t i = 0; i < n; i++)
{
int cluster = clusters[i];
- centers[cluster].x += objs[i].x;
- centers[cluster].y += objs[i].y;
- centers[cluster].z += objs[i].z;
- weights[cluster] += 1;
+ centers[cluster].x += objs[i].x * objs[i].m;
+ centers[cluster].y += objs[i].y * objs[i].m;
+ centers[cluster].z += objs[i].z * objs[i].m;
+ centers[cluster].m += objs[i].m;
}
for (uint32_t i = 0; i < k; i++)
{
- if (weights[i])
+ if (centers[i].m)
{
- centers[i].x /= weights[i];
- centers[i].y /= weights[i];
- centers[i].z /= weights[i];
+ centers[i].x /= centers[i].m;
+ centers[i].y /= centers[i].m;
+ centers[i].z /= centers[i].m;
}
}
}
static uint8_t
-kmeans(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
+kmeans(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
{
uint8_t converged = LW_FALSE;
- uint32_t *weights = lwalloc(sizeof(uint32_t) * k);
for (uint32_t i = 0; i < KMEANS_MAX_ITERATIONS; i++)
{
@@ -89,16 +97,15 @@ kmeans(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
converged = update_r(objs, clusters, n, centers, k);
if (converged)
break;
- update_means(objs, clusters, n, centers, weights, k);
+ update_means(objs, clusters, n, centers, k);
}
- lwfree(weights);
if (!converged)
lwerror("%s did not converge after %d iterations", __func__, KMEANS_MAX_ITERATIONS);
return converged;
}
static void
-kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
+kmeans_init(POINT4D *objs, uint32_t n, POINT4D *centers, uint32_t k)
{
double *distances;
uint32_t p1 = 0, p2 = 0;
@@ -114,8 +121,8 @@ kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
for (i = 1; i < n; i++)
{
/* if we found a larger distance, replace our choice */
- dst_p1 = distance3d_sqr_pt_pt(&objs[i], &objs[p1]);
- dst_p2 = distance3d_sqr_pt_pt(&objs[i], &objs[p2]);
+ dst_p1 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p1]);
+ dst_p2 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p2]);
if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
{
if (dst_p1 > dst_p2)
@@ -152,7 +159,7 @@ kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
/* initialize array with distance to first object */
for (j = 0; j < n; j++)
- distances[j] = distance3d_sqr_pt_pt(&objs[j], ¢ers[0]);
+ distances[j] = distance3d_sqr_pt4d_pt4d(&objs[j], ¢ers[0]);
distances[p1] = -1;
distances[p2] = -1;
@@ -170,7 +177,7 @@ kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
continue;
/* update minimal distance with previosuly accepted cluster */
- current_distance = distance3d_sqr_pt_pt(&objs[j], ¢ers[i - 1]);
+ current_distance = distance3d_sqr_pt4d_pt4d(&objs[j], ¢ers[i - 1]);
if (current_distance < distances[j])
distances[j] = current_distance;
@@ -199,7 +206,7 @@ int *
lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
{
uint32_t num_non_empty = 0;
- uint8_t result = LW_FALSE;
+ uint8_t converged = LW_FALSE;
assert(k > 0);
assert(n > 0);
@@ -214,7 +221,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
}
/* An array of objects to be analyzed. */
- POINT3D *objs = lwalloc(sizeof(POINT3D) * n);
+ POINT4D *objs = lwalloc(sizeof(POINT4D) * n);
/* Array to mark unclusterable objects. Will be returned as KMEANS_NULL_CLUSTER. */
uint8_t *geom_valid = lwalloc(sizeof(uint8_t) * n);
@@ -225,14 +232,15 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
memset(clusters, 0, sizeof(int) * n);
/* An array of clusters centers for the algorithm. */
- POINT3D *centers = lwalloc(sizeof(POINT3D) * k);
- memset(centers, 0, sizeof(POINT3D) * k);
+ POINT4D *centers = lwalloc(sizeof(POINT4D) * k);
+ memset(centers, 0, sizeof(POINT4D) * k);
/* Prepare the list of object pointers for K-means */
for (uint32_t i = 0; i < n; i++)
{
const LWGEOM *geom = geoms[i];
- POINT3D out = {0, 0, 0};
+ /* Unset M values will be 1 */
+ POINT4D out = {0, 0, 0, 1};
/* Null/empty geometries get geom_valid=LW_FALSE set earlier with memset */
if ((!geom) || lwgeom_is_empty(geom))
@@ -245,6 +253,13 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
out.y = lwpoint_get_y(lwgeom_as_lwpoint(geom));
if (lwgeom_has_z(geom))
out.z = lwpoint_get_z(lwgeom_as_lwpoint(geom));
+ if (lwgeom_has_m(geom))
+ {
+ out.m = lwpoint_get_m(lwgeom_as_lwpoint(geom));
+ if (out.m <= 0)
+ lwerror("%s has an input point geometry with weight in M less or equal to 0",
+ __func__);
+ }
}
else if (!lwgeom_has_z(geom))
{
@@ -291,9 +306,9 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
memset(clusters_dense, 0, sizeof(int) * num_non_empty);
kmeans_init(objs, num_non_empty, centers, k);
- result = kmeans(objs, clusters_dense, num_non_empty, centers, k);
+ converged = kmeans(objs, clusters_dense, num_non_empty, centers, k);
- if (result)
+ if (converged)
{
uint32_t d = 0;
for (uint32_t i = 0; i < n; i++)
@@ -309,7 +324,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
/* k=0: everything is unclusterable */
for (uint32_t i = 0; i < n; i++)
clusters[i] = KMEANS_NULL_CLUSTER;
- result = LW_TRUE;
+ converged = LW_TRUE;
}
else
{
@@ -321,7 +336,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
else
clusters[i] = 0;
}
- result = LW_TRUE;
+ converged = LW_TRUE;
}
/* Before error handling, might as well clean up all the inputs */
@@ -330,7 +345,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
lwfree(geom_valid);
/* Good result */
- if (result)
+ if (converged)
return clusters;
/* Bad result, not going to need the answer */