Skip to content
Permalink
Browse files

[processing] Port Delete Duplicate Geometries to c++

  • Loading branch information
nyalldawson committed Dec 30, 2019
1 parent 2433ee5 commit 12006de1aa990a29baa3e696192e49d23daf3d50
@@ -84,9 +84,6 @@ qgis:delaunaytriangulation: >
qgis:deletecolumn: >
This algorithm takes a vector layer and generates a new one that has the exact same content but without the selected columns.

qgis:deleteduplicategeometries: >
This algorithm finds duplicated geometries and removes them. Attributes are not checked, so in case two features have identical geometries but different attributes, only one of them will be added to the result layer.

qgis:deleteholes: >
This algorithm takes a polygon layer and removes holes in polygons. It creates a new vector layer in which polygons with holes have been replaced by polygons with only their external ring. Attributes are not modified.

This file was deleted.

@@ -39,7 +39,6 @@
from .DefineProjection import DefineProjection
from .Delaunay import Delaunay
from .DeleteColumn import DeleteColumn
from .DeleteDuplicateGeometries import DeleteDuplicateGeometries
from .EliminateSelection import EliminateSelection
from .ExecuteSQL import ExecuteSQL
from .ExportGeometryInfo import ExportGeometryInfo
@@ -123,7 +122,6 @@ def getAlgs(self):
DefineProjection(),
Delaunay(),
DeleteColumn(),
DeleteDuplicateGeometries(),
EliminateSelection(),
ExecuteSQL(),
ExportGeometryInfo(),
@@ -1363,7 +1363,7 @@ tests:
name: expected/split_linez_by_length.shp
type: vector

- algorithm: qgis:deleteduplicategeometries
- algorithm: native:deleteduplicategeometries
name: Delete Duplicates with null geometries
params:
INPUT:
@@ -40,6 +40,7 @@ SET(QGIS_ANALYSIS_SRCS
processing/qgsalgorithmconstantraster.cpp
processing/qgsalgorithmconvexhull.cpp
processing/qgsalgorithmdbscanclustering.cpp
processing/qgsalgorithmdeleteduplicategeometries.cpp
processing/qgsalgorithmdensifygeometriesbycount.cpp
processing/qgsalgorithmdensifygeometriesbyinterval.cpp
processing/qgsalgorithmdifference.cpp
@@ -0,0 +1,199 @@
/***************************************************************************
qgsalgorithmdeleteduplicategeometries.cpp
-----------------------------------------
begin : December 2019
copyright : (C) 2019 by Nyall Dawson
email : nyall dot dawson at gmail dot com
***************************************************************************/

/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/

#include "qgsalgorithmdeleteduplicategeometries.h"
#include "qgsvectorlayer.h"
#include "qgsgeometryengine.h"

///@cond PRIVATE

QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
{
return QStringLiteral( "deleteduplicategeometries" );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
{
return QObject::tr( "Delete duplicate geometries" );
}

QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
{
return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
{
return QObject::tr( "Vector general" );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
{
return QStringLiteral( "vectorgeneral" );
}

void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
{
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
{
return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
"so in case two features have identical geometries but different attributes, only one of "
"them will be added to the result layer." );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
{
return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
}

QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
{
return new QgsDeleteDuplicateGeometriesAlgorithm();
}

bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
{
mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
if ( !mSource )
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );

return true;
}

QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
{
QString destId;
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(),
mSource->wkbType(), mSource->sourceCrs() ) );
if ( !sink )
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );

QgsFeatureIterator it = mSource->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );

double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
QHash< QgsFeatureId, QgsGeometry > geometries;
QSet< QgsFeatureId > nullGeometryFeatures;
QgsSpatialIndex index;
QgsFeature f;
long current = 0;
while ( it.nextFeature( f ) )
{
if ( feedback->isCanceled() )
break;

if ( !f.hasGeometry() )
{
nullGeometryFeatures.insert( f.id() );
}
else
{
geometries.insert( f.id(), f.geometry() );
index.addFeature( f );
}

// overall this loop takes about 10% of time
current++;
feedback->setProgress( 0.10 * current * step );
}

// start by assuming everything is unique, and chop away at this list
QHash< QgsFeatureId, QgsGeometry > uniqueFeatures = geometries;
current = 0;
long removed = 0;

for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
{
const QgsFeatureId featureId = it.key();
const QgsGeometry geometry = it.value();

if ( feedback->isCanceled() )
break;

if ( !uniqueFeatures.contains( featureId ) )
{
// feature was already marked as a duplicate
}
else
{
const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );

for ( const QgsFeatureId candidateId : candidates )
{
if ( candidateId == featureId )
continue;

if ( !uniqueFeatures.contains( candidateId ) )
{
// candidate already marked as a duplicate (not sure if this is possible,
// since it would mean the current feature would also have to be a duplicate!
// but let's be safe!)
continue;
}
else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
{
// candidate is a duplicate of feature
uniqueFeatures.remove( candidateId );
removed++;
}
}
}

current++;
feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
}

// now, fetch all the feature attributes for the unique features only
// be super-smart and don't re-fetch geometries
QSet< QgsFeatureId > outputFeatureIds = uniqueFeatures.keys().toSet();
outputFeatureIds.unite( nullGeometryFeatures );
step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();

QgsFeatureRequest request = QgsFeatureRequest().setFilterFids( outputFeatureIds ).setFlags( QgsFeatureRequest::NoGeometry );
it = mSource->getFeatures( request );
current = 0;
while ( it.nextFeature( f ) )
{
if ( feedback->isCanceled() )
break;

// use already fetched geometry
if ( !nullGeometryFeatures.contains( f.id() ) )
{
f.setGeometry( uniqueFeatures.value( f.id() ) );
}
sink->addFeature( f, QgsFeatureSink::FastInsert );

current++;
feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
}

feedback->pushInfo( QObject::tr( "%1 duplicate features removed" ).arg( removed ) );

QVariantMap outputs;
outputs.insert( QStringLiteral( "OUTPUT" ), destId );
outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast< long long >( removed ) );
outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
return outputs;
}

///@endcond

0 comments on commit 12006de

Please sign in to comment.
You can’t perform that action at this time.