-
-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEATURE][processing] New dbscan spatial clustering algorithm
Implements an optimised DBSCAN density based scanning cluster approach for clustering 2d point features.
- Loading branch information
1 parent
e9f85f5
commit c21fe59
Showing
4 changed files
with
375 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
301 changes: 301 additions & 0 deletions
301
src/analysis/processing/qgsalgorithmdbscanclustering.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,301 @@ | ||
/*************************************************************************** | ||
qgsalgorithmdbscanclustering.cpp | ||
--------------------- | ||
begin : July 2018 | ||
copyright : (C) 2018 by Nyall Dawson | ||
email : nyall dot dawson at gmail dot com | ||
***************************************************************************/ | ||
|
||
/*************************************************************************** | ||
* * | ||
* This program is free software; you can redistribute it and/or modify * | ||
* it under the terms of the GNU General Public License as published by * | ||
* the Free Software Foundation; either version 2 of the License, or * | ||
* (at your option) any later version. * | ||
* * | ||
***************************************************************************/ | ||
|
||
#include "qgsalgorithmdbscanclustering.h" | ||
#include "qgsspatialindexkdbush.h" | ||
#include <unordered_set> | ||
|
||
///@cond PRIVATE | ||
|
||
QString QgsDbscanClusteringAlgorithm::name() const | ||
{ | ||
return QStringLiteral( "dbscanclustering" ); | ||
} | ||
|
||
QString QgsDbscanClusteringAlgorithm::displayName() const | ||
{ | ||
return QObject::tr( "DBSCAN clustering" ); | ||
} | ||
|
||
QString QgsDbscanClusteringAlgorithm::shortDescription() const | ||
{ | ||
return QObject::tr( "Clusters point features using a density based scan algorithm." ); | ||
} | ||
|
||
QStringList QgsDbscanClusteringAlgorithm::tags() const | ||
{ | ||
return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' ); | ||
} | ||
|
||
QString QgsDbscanClusteringAlgorithm::group() const | ||
{ | ||
return QObject::tr( "Vector analysis" ); | ||
} | ||
|
||
QString QgsDbscanClusteringAlgorithm::groupId() const | ||
{ | ||
return QStringLiteral( "vectoranalysis" ); | ||
} | ||
|
||
void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & ) | ||
{ | ||
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), | ||
QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) ); | ||
addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ), | ||
QgsProcessingParameterNumber::Integer, 5, false, 1 ) ); | ||
addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ), | ||
QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) ); | ||
|
||
auto dbscanStarParam = qgis::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ), | ||
QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true ); | ||
dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced ); | ||
addParameter( dbscanStarParam.release() ); | ||
|
||
auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ), | ||
QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) ); | ||
fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced ); | ||
addParameter( fieldNameParam.release() ); | ||
|
||
|
||
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) ); | ||
} | ||
|
||
QString QgsDbscanClusteringAlgorithm::shortHelpString() const | ||
{ | ||
return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n" | ||
"The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." ); | ||
} | ||
|
||
QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const | ||
{ | ||
return new QgsDbscanClusteringAlgorithm(); | ||
} | ||
|
||
struct KDBushDataEqualById | ||
{ | ||
bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const | ||
{ | ||
return a.id == b.id; | ||
} | ||
}; | ||
|
||
struct KDBushDataHashById | ||
{ | ||
std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const | ||
{ | ||
return std::hash< QgsFeatureId > {}( a.id ); | ||
} | ||
}; | ||
|
||
bool operator <( const QgsSpatialIndexKDBushData &data, const QgsFeatureId id ) | ||
{ | ||
return data.id < id; | ||
}; | ||
|
||
bool operator <( const QgsFeatureId id, const QgsSpatialIndexKDBushData &data ) | ||
{ | ||
return id < data.id; | ||
}; | ||
|
||
QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap ¶meters, QgsProcessingContext &context, QgsProcessingFeedback *feedback ) | ||
{ | ||
std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) ); | ||
if ( !source ) | ||
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) ); | ||
|
||
const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) ); | ||
const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context ); | ||
const bool borderPointsAreNoise = parameterAsBool( parameters, QStringLiteral( "DBSCAN*" ), context ); | ||
|
||
QgsFields outputFields = source->fields(); | ||
const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context ); | ||
QgsFields newFields; | ||
newFields.append( QgsField( clusterFieldName, QVariant::Int ) ); | ||
outputFields = QgsProcessingUtils::combineFields( outputFields, newFields ); | ||
|
||
QString dest; | ||
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) ); | ||
if ( !sink ) | ||
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) ); | ||
|
||
// build spatial index | ||
feedback->pushInfo( QObject::tr( "Building spatial index" ) ); | ||
QgsSpatialIndexKDBush index( *source, feedback ); | ||
if ( feedback->isCanceled() ) | ||
return QVariantMap(); | ||
|
||
// dbscan! | ||
feedback->pushInfo( QObject::tr( "Analysing clusters" ) ); | ||
std::unordered_map< QgsFeatureId, int> idToCluster; | ||
idToCluster.reserve( index.size() ); | ||
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) ); | ||
const long featureCount = source->featureCount(); | ||
|
||
dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback ); | ||
|
||
// write clusters | ||
const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1; | ||
features = source->getFeatures(); | ||
int i = 0; | ||
QgsFeature feat; | ||
while ( features.nextFeature( feat ) ) | ||
{ | ||
i++; | ||
if ( feedback->isCanceled() ) | ||
{ | ||
break; | ||
} | ||
|
||
feedback->setProgress( 90 + i * writeStep ); | ||
QgsAttributes attr = feat.attributes(); | ||
auto cluster = idToCluster.find( feat.id() ); | ||
if ( cluster != idToCluster.end() ) | ||
{ | ||
attr << cluster->second; | ||
} | ||
else | ||
{ | ||
attr << QVariant(); | ||
} | ||
feat.setAttributes( attr ); | ||
sink->addFeature( feat, QgsFeatureSink::FastInsert ); | ||
} | ||
|
||
QVariantMap outputs; | ||
outputs.insert( QStringLiteral( "OUTPUT" ), dest ); | ||
return outputs; | ||
} | ||
|
||
|
||
void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize, | ||
const double eps, | ||
const bool borderPointsAreNoise, | ||
const long featureCount, | ||
QgsFeatureIterator features, | ||
QgsSpatialIndexKDBush &index, | ||
std::unordered_map< QgsFeatureId, int> &idToCluster, | ||
QgsProcessingFeedback *feedback ) | ||
{ | ||
const double step = featureCount > 0 ? 90.0 / featureCount : 1; | ||
|
||
std::unordered_set< QgsFeatureId > visited; | ||
visited.reserve( index.size() ); | ||
|
||
QgsFeature feat; | ||
int clusterIdx = 0; | ||
int i = 0; | ||
|
||
while ( features.nextFeature( feat ) ) | ||
{ | ||
if ( feedback->isCanceled() ) | ||
{ | ||
break; | ||
} | ||
|
||
if ( !feat.hasGeometry() ) | ||
{ | ||
feedback->setProgress( ++i * step ); | ||
continue; | ||
} | ||
|
||
if ( visited.find( feat.id() ) != visited.end() ) | ||
{ | ||
// already visited! | ||
continue; | ||
} | ||
|
||
QgsPointXY point; | ||
if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == QgsWkbTypes::Point ) | ||
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) ); | ||
else | ||
{ | ||
// not a point geometry | ||
feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) ); | ||
feedback->setProgress( ++i * step ); | ||
continue; | ||
} | ||
|
||
std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within; | ||
|
||
if ( minSize > 1 ) | ||
{ | ||
index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data ) | ||
{ | ||
within.insert( data ); | ||
} ); | ||
if ( within.size() < minSize ) | ||
continue; | ||
|
||
visited.insert( feat.id() ); | ||
} | ||
else | ||
{ | ||
// optimised case for minSize == 1, we can skip the initial check | ||
within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) ); | ||
} | ||
|
||
// start new cluster | ||
clusterIdx++; | ||
idToCluster[ feat.id() ] = clusterIdx; | ||
feedback->setProgress( ++i * step ); | ||
|
||
while ( !within.empty() ) | ||
{ | ||
if ( feedback->isCanceled() ) | ||
{ | ||
break; | ||
} | ||
|
||
QgsSpatialIndexKDBushData j = *within.begin(); | ||
within.erase( within.begin() ); | ||
|
||
if ( visited.find( j.id ) != visited.end() ) | ||
{ | ||
// already visited! | ||
continue; | ||
} | ||
|
||
visited.insert( j.id ); | ||
feedback->setProgress( ++i * step ); | ||
|
||
// check from this point | ||
QgsPointXY point2 = j.point(); | ||
|
||
std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2; | ||
index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data ) | ||
{ | ||
within2.insert( data ); | ||
} ); | ||
if ( within2.size() >= minSize ) | ||
{ | ||
// expand neighbourhood | ||
std::set_difference( within2.begin(), | ||
within2.end(), | ||
visited.begin(), visited.end(), | ||
std::inserter( within, within.end() ) ); | ||
} | ||
if ( !borderPointsAreNoise || within2.size() >= minSize ) | ||
{ | ||
idToCluster[ j.id ] = clusterIdx; | ||
} | ||
} | ||
} | ||
} | ||
|
||
///@endcond | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/*************************************************************************** | ||
qgsalgorithmdbscanclustering.h | ||
--------------------- | ||
begin : July 2018 | ||
copyright : (C) 2018 by Nyall Dawson | ||
email : nyall dot dawson at gmail dot com | ||
***************************************************************************/ | ||
|
||
/*************************************************************************** | ||
* * | ||
* This program is free software; you can redistribute it and/or modify * | ||
* it under the terms of the GNU General Public License as published by * | ||
* the Free Software Foundation; either version 2 of the License, or * | ||
* (at your option) any later version. * | ||
* * | ||
***************************************************************************/ | ||
|
||
#ifndef QGSALGORITHMDBSCANCLUSTERING_H | ||
#define QGSALGORITHMDBSCANCLUSTERING_H | ||
|
||
#define SIP_NO_FILE | ||
|
||
#include "qgis.h" | ||
#include "qgis_analysis.h" | ||
#include "qgsprocessingalgorithm.h" | ||
#include <unordered_map> | ||
|
||
class QgsSpatialIndexKDBush; | ||
|
||
///@cond PRIVATE | ||
|
||
|
||
/** | ||
* Native k-means clustering algorithm. | ||
*/ | ||
class ANALYSIS_EXPORT QgsDbscanClusteringAlgorithm : public QgsProcessingAlgorithm | ||
{ | ||
|
||
public: | ||
|
||
QgsDbscanClusteringAlgorithm() = default; | ||
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override; | ||
QString name() const override; | ||
QString displayName() const override; | ||
QString shortDescription() const override; | ||
QStringList tags() const override; | ||
QString group() const override; | ||
QString groupId() const override; | ||
QString shortHelpString() const override; | ||
QgsDbscanClusteringAlgorithm *createInstance() const override SIP_FACTORY; | ||
|
||
protected: | ||
|
||
QVariantMap processAlgorithm( const QVariantMap ¶meters, | ||
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override; | ||
private: | ||
static void dbscan( std::size_t minSize, | ||
double eps, | ||
bool borderPointsAreNoise, | ||
long featureCount, | ||
QgsFeatureIterator features, | ||
QgsSpatialIndexKDBush &index, | ||
std::unordered_map< QgsFeatureId, int> &idToCluster, | ||
QgsProcessingFeedback *feedback ); | ||
}; | ||
|
||
///@endcond PRIVATE | ||
|
||
#endif // QGSALGORITHMDBSCANCLUSTERING_H | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters