Skip to content

Commit

Permalink
Polish over lwgeom_remove_repeated_points_in_place and NEWS entry
Browse files Browse the repository at this point in the history
  • Loading branch information
Komzpa committed Jun 6, 2021
1 parent 3555834 commit 423b519
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 114 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ PostGIS 3.2.0
- #4881, #4884, Store sign of edge_id for lineal TopoGeometry in relation table
to retain direction (Sandro Santilli)
- #4628, Add an option to disable ANALYZE when loading shapefiles (Stefan Corneliu Petrea)
- #4924, Faster ST_RemoveRepeatedPoints on large multipoints, O(NlogN) instead of O(N^2)
(Aliaksandr Kalenik, Darafei Praliaskouski)

* New features*
- #4841, FindTopology to quickly get a topology record (Sandro Santilli)
Expand Down
4 changes: 2 additions & 2 deletions liblwgeom/cunit/cu_algorithm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ static void test_lwgeom_remove_repeated_points(void)
modified = lwgeom_remove_repeated_points_in_place(g, 1);
ASSERT_INT_EQUAL(modified, LW_TRUE);
ewkt = lwgeom_to_ewkt(g);
ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,5 5,0 10,10 10)");
ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,0 10,5 5,10 0,10 10)");
lwgeom_free(g);
lwfree(ewkt);

Expand All @@ -1187,7 +1187,7 @@ static void test_lwgeom_remove_repeated_points(void)
ASSERT_INT_EQUAL(modified, LW_TRUE);
ewkt = lwgeom_to_ewkt(g);
ASSERT_STRING_EQUAL(
ewkt, "MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60)");
ewkt, "MULTIPOINT(0 0,0 10,5 5,5 8,8 5,8 8,10 0,10 10,50 50,50 60,55 55,55 58,58 55,58 58,60 50,60 60)");

This comment has been minimized.

Copy link
@pramsey

pramsey Jun 10, 2021

Member

"The algorithms implemented by qsort(), qsort_r(), and heapsort() are not stable; that is, if two members compare as equal, their order in the sorted array is undefined." I'm getting a different result, as the second sort on x on my platform is coming up with a different ordering than yours.

This comment has been minimized.

Copy link
@pramsey

pramsey Jun 10, 2021

Member

I've patched things up with normalize in the tests

lwgeom_free(g);
lwfree(ewkt);

Expand Down
212 changes: 102 additions & 110 deletions liblwgeom/lwgeom.c
Original file line number Diff line number Diff line change
Expand Up @@ -1579,145 +1579,137 @@ lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
int geometry_modified = LW_FALSE;
switch (geom->type)
{
/* No-op! Cannot remote points */
case POINTTYPE:
case TRIANGLETYPE:
return geometry_modified;
case LINETYPE:
/* No-op! Cannot remove points */
case POINTTYPE:
case TRIANGLETYPE:
return geometry_modified;
case LINETYPE: {
LWLINE *g = (LWLINE *)(geom);
POINTARRAY *pa = g->points;
uint32_t npoints = pa->npoints;
ptarray_remove_repeated_points_in_place(pa, tolerance, 2);
geometry_modified = npoints != pa->npoints;
/* Invalid input, discard the collapsed line */
if (pa->npoints < 2)
{
LWLINE *g = (LWLINE*)(geom);
POINTARRAY *pa = g->points;
pa->npoints = 0;
geometry_modified = LW_TRUE;
}
break;
}
case POLYGONTYPE: {
uint32_t j = 0;
LWPOLY *g = (LWPOLY *)(geom);
for (uint32_t i = 0; i < g->nrings; i++)
{
POINTARRAY *pa = g->rings[i];
uint32_t npoints = pa->npoints;
ptarray_remove_repeated_points_in_place(pa, tolerance, 2);
geometry_modified = npoints != pa->npoints;
/* Invalid output */
if (pa->npoints == 1 && pa->maxpoints > 1)
ptarray_remove_repeated_points_in_place(pa, tolerance, 4);
geometry_modified |= npoints != pa->npoints;
/* Drop collapsed rings */
if (pa->npoints < 4)
{
/* Use first point as last point */
pa->npoints = 2;
ptarray_copy_point(pa, 0, 1);
geometry_modified = LW_TRUE;
ptarray_free(pa);
continue;
}
break;
g->rings[j++] = pa;
}
case POLYGONTYPE:
/* Update ring count */
g->nrings = j;
break;
}
case MULTIPOINTTYPE: {
double tolsq = tolerance * tolerance;
LWMPOINT *mpt = (LWMPOINT *)geom;

for (uint8_t dim = 0; dim < 2; dim++)
{
uint32_t i, j = 0;
LWPOLY *g = (LWPOLY*)(geom);
for (i = 0; i < g->nrings; i++)
/* sort by y, then by x - this way the result is sorted by x */
qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), dim ? cmp_point_x : cmp_point_y);
for (uint32_t i = 0; i < mpt->ngeoms; i++)
{
POINTARRAY *pa = g->rings[i];
int minpoints = 4;
uint32_t npoints = 0;
/* Skip zero'ed out rings */
if(!pa)
if (!mpt->geoms[i])
continue;
npoints = pa->npoints;
ptarray_remove_repeated_points_in_place(pa, tolerance, minpoints);
geometry_modified |= npoints != pa->npoints;
/* Drop collapsed rings */
if(pa->npoints < 4)
{
geometry_modified = LW_TRUE;
ptarray_free(pa);
continue;
}
g->rings[j++] = pa;
}
/* Update ring count */
g->nrings = j;
break;
}
case MULTIPOINTTYPE:
{
double tolsq = tolerance * tolerance;
LWMPOINT *mpt = (LWMPOINT *)geom;

for (uint8_t dim = 0; dim < 2; dim++)
{
qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), dim ? cmp_point_y : cmp_point_x);
for (uint32_t i = 0; i < mpt->ngeoms; i++)
const POINT2D *pti = getPoint2d_cp(mpt->geoms[i]->point, 0);

/* check upcoming points if they're within tolerance of current one */
for (uint32_t j = i + 1; j < mpt->ngeoms; j++)
{
if (!mpt->geoms[i])
if (!mpt->geoms[j])
continue;

const POINT2D *pti = getPoint2d_cp(mpt->geoms[i]->point, 0);
for (uint32_t j = i + 1; j < mpt->ngeoms; j++)
{
if (!mpt->geoms[j])
continue;
const POINT2D *ptj = getPoint2d_cp(mpt->geoms[j]->point, 0);

const POINT2D *ptj = getPoint2d_cp(mpt->geoms[j]->point, 0);
if ((dim ? ptj->y - pti->y : ptj->x - pti->x) > tolerance)
break;
/* check that the point is in the strip of tolerance around the point */
if ((dim ? ptj->x - pti->x : ptj->y - pti->y) > tolerance)
break;

if (distance2d_sqr_pt_pt(pti, ptj) <= tolsq)
{
lwpoint_free(mpt->geoms[j]);
mpt->geoms[j] = NULL;
}
/* remove any upcoming point that is within tolerance circle */
if (distance2d_sqr_pt_pt(pti, ptj) <= tolsq)
{
lwpoint_free(mpt->geoms[j]);
mpt->geoms[j] = NULL;
geometry_modified = LW_TRUE;
}
}

uint32_t i = 0;
for (uint32_t j = 0; j < mpt->ngeoms; j++)
{
if (mpt->geoms[j])
mpt->geoms[i++] = mpt->geoms[j];
}

geometry_modified |= mpt->ngeoms != i;
mpt->ngeoms = i;
}

break;
/* compactify array of points */
uint32_t i = 0;
for (uint32_t j = 0; j < mpt->ngeoms; j++)
if (mpt->geoms[j])
mpt->geoms[i++] = mpt->geoms[j];
mpt->ngeoms = i;
}

case CIRCSTRINGTYPE:
/* Dunno how to handle these, will return untouched */
return geometry_modified;
break;
}

/* Can process most multi* types as generic collection */
case MULTILINETYPE:
case MULTIPOLYGONTYPE:
case TINTYPE:
case COLLECTIONTYPE:
/* Curve types we mostly ignore, but allow the linear */
/* portions to be processed by recursing into them */
case MULTICURVETYPE:
case CURVEPOLYTYPE:
case MULTISURFACETYPE:
case COMPOUNDTYPE:
case CIRCSTRINGTYPE:
/* Dunno how to handle these, will return untouched */
return geometry_modified;

/* Can process most multi* types as generic collection */
case MULTILINETYPE:
case MULTIPOLYGONTYPE:
case TINTYPE:
case COLLECTIONTYPE:
/* Curve types we mostly ignore, but allow the linear */
/* portions to be processed by recursing into them */
case MULTICURVETYPE:
case CURVEPOLYTYPE:
case MULTISURFACETYPE:
case COMPOUNDTYPE: {
uint32_t i, j = 0;
LWCOLLECTION *col = (LWCOLLECTION *)(geom);
for (i = 0; i < col->ngeoms; i++)
{
uint32_t i, j = 0;
LWCOLLECTION *col = (LWCOLLECTION*)(geom);
for (i = 0; i < col->ngeoms; i++)
LWGEOM *g = col->geoms[i];
if (!g)
continue;
geometry_modified |= lwgeom_remove_repeated_points_in_place(g, tolerance);
/* Drop zero'ed out geometries */
if (lwgeom_is_empty(g))
{
LWGEOM *g = col->geoms[i];
if (!g) continue;
geometry_modified |= lwgeom_remove_repeated_points_in_place(g, tolerance);
/* Drop zero'ed out geometries */
if(lwgeom_is_empty(g))
{
lwgeom_free(g);
continue;
}
col->geoms[j++] = g;
lwgeom_free(g);
continue;
}
/* Update geometry count */
col->ngeoms = j;
break;
}
default:
{
lwerror("%s: unsupported geometry type: %s", __func__, lwtype_name(geom->type));
break;
col->geoms[j++] = g;
}
/* Update geometry count */
col->ngeoms = j;
break;
}
default: {
lwerror("%s: unsupported geometry type: %s", __func__, lwtype_name(geom->type));
break;
}
}

if (geometry_modified)
{
lwgeom_drop_bbox(geom);
}
return geometry_modified;
}

Expand Down
4 changes: 2 additions & 2 deletions regress/core/remove_repeated_points_expected
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
3|MULTILINESTRING((0 0,1 1,2 2,3 3),(5 5,4 4,2 2))
4|POLYGON((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5))
5|MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55)))
6|MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60)
7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
6|MULTIPOINT(0 0,0 10,5 5,5 8,8 5,8 8,10 0,10 10,50 50,50 60,55 55,55 58,58 55,58 58,60 50,60 60)
7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,0 10,5 5,5 8,8 5,8 8,10 0,10 10,50 50,50 60,55 55,55 58,58 55,58 58,60 50,60 60),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
8|POINT(0 0)
9|CURVEPOLYGON ZM (CIRCULARSTRING ZM (-2 0 0 0,-1 -1 1 2,0 0 2 4,1 -1 3 6,2 0 4 8,0 2 2 4,-2 0 0 0),(-1 0 1 2,0 0.5 2 4,1 0 3 6,0 1 3 4,-1 0 1 2))
10|LINESTRING(0 0,0 0)
Expand Down

0 comments on commit 423b519

Please sign in to comment.