-
-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEATURE][processing] Add 'Join by location (summary)' algorithm
Like the main Join Attributes by Location algorithm, this algorithm takes two layers and combines the attributes based on a spatial criteria. However this algorithm calculates summaries for the attributes for all matching features, e.g. calculating the mean/min/max/etc. The list of fields to summaries, and the summaries to calculate for those, can be selected.
- Loading branch information
1 parent
458e994
commit be88da8
Showing
22 changed files
with
1,802 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
341 changes: 341 additions & 0 deletions
341
python/plugins/processing/algs/qgis/SpatialJoinSummary.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,341 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
*************************************************************************** | ||
SpatialJoin.py | ||
--------------------- | ||
Date : September 2017 | ||
Copyright : (C) 2017 by Nyall Dawson | ||
Email : nyall dot dawson at gmail dot com | ||
*************************************************************************** | ||
* * | ||
* This program is free software; you can redistribute it and/or modify * | ||
* it under the terms of the GNU General Public License as published by * | ||
* the Free Software Foundation; either version 2 of the License, or * | ||
* (at your option) any later version. * | ||
* * | ||
*************************************************************************** | ||
""" | ||
from builtins import range | ||
|
||
__author__ = 'Nyall Dawson' | ||
__date__ = 'September 2017' | ||
__copyright__ = '(C) 2017, Nyall Dawson' | ||
|
||
# This will get replaced with a git SHA1 when you do a git archive | ||
|
||
__revision__ = '$Format:%H$' | ||
|
||
import os | ||
|
||
from collections import defaultdict | ||
|
||
from qgis.PyQt.QtGui import QIcon | ||
from qgis.PyQt.QtCore import QVariant | ||
from qgis.core import (NULL, | ||
QgsField, | ||
QgsFields, | ||
QgsFeatureSink, | ||
QgsFeatureRequest, | ||
QgsGeometry, | ||
QgsCoordinateTransform, | ||
QgsStatisticalSummary, | ||
QgsDateTimeStatisticalSummary, | ||
QgsStringStatisticalSummary, | ||
QgsProcessing, | ||
QgsProcessingParameterBoolean, | ||
QgsProcessingParameterFeatureSource, | ||
QgsProcessingParameterEnum, | ||
QgsProcessingParameterField, | ||
QgsProcessingParameterFeatureSink) | ||
|
||
from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm | ||
from processing.tools import vector | ||
|
||
pluginPath = os.path.split(os.path.split(os.path.dirname(__file__))[0])[0] | ||
|
||
|
||
class SpatialJoinSummary(QgisAlgorithm): | ||
INPUT = "INPUT" | ||
JOIN = "JOIN" | ||
PREDICATE = "PREDICATE" | ||
JOIN_FIELDS = "JOIN_FIELDS" | ||
SUMMARIES = "SUMMARIES" | ||
DISCARD_NONMATCHING = "DISCARD_NONMATCHING" | ||
OUTPUT = "OUTPUT" | ||
|
||
def icon(self): | ||
return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'join_location.png')) | ||
|
||
def group(self): | ||
return self.tr('Vector general') | ||
|
||
def __init__(self): | ||
super().__init__() | ||
|
||
def initAlgorithm(self, config=None): | ||
self.predicates = ( | ||
('intersects', self.tr('intersects')), | ||
('contains', self.tr('contains')), | ||
('equals', self.tr('equals')), | ||
('touches', self.tr('touches')), | ||
('overlaps', self.tr('overlaps')), | ||
('within', self.tr('within')), | ||
('crosses', self.tr('crosses'))) | ||
|
||
self.statistics = [ | ||
('count', self.tr('count')), | ||
('unique', self.tr('unique')), | ||
('min', self.tr('min')), | ||
('max', self.tr('max')), | ||
('range', self.tr('range')), | ||
('sum', self.tr('sum')), | ||
('mean', self.tr('mean')), | ||
('median', self.tr('median')), | ||
('stddev', self.tr('stddev')), | ||
('minority', self.tr('minority')), | ||
('majority', self.tr('majority')), | ||
('q1', self.tr('q1')), | ||
('q3', self.tr('q3')), | ||
('iqr', self.tr('iqr')), | ||
('empty', self.tr('empty')), | ||
('filled', self.tr('filled')), | ||
('min_length', self.tr('min_length')), | ||
('max_length', self.tr('max_length')), | ||
('mean_length', self.tr('mean_length'))] | ||
|
||
self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT, | ||
self.tr('Input layer'), | ||
[QgsProcessing.TypeVectorAnyGeometry])) | ||
self.addParameter(QgsProcessingParameterFeatureSource(self.JOIN, | ||
self.tr('Join layer'), | ||
[QgsProcessing.TypeVectorAnyGeometry])) | ||
self.addParameter(QgsProcessingParameterEnum(self.PREDICATE, | ||
self.tr('Geometric predicate'), | ||
options=[p[1] for p in self.predicates], | ||
allowMultiple=True, defaultValue=[0])) | ||
self.addParameter(QgsProcessingParameterField(self.JOIN_FIELDS, | ||
self.tr('Fields to summarise (leave empty to use all fields)'), | ||
parentLayerParameterName=self.JOIN, | ||
allowMultiple=True, optional=True)) | ||
self.addParameter(QgsProcessingParameterEnum(self.SUMMARIES, | ||
self.tr( | ||
'Summaries to calculate (leave empty to use all available)'), | ||
options=[p[1] for p in self.statistics], | ||
allowMultiple=True, optional=True)) | ||
self.addParameter(QgsProcessingParameterBoolean(self.DISCARD_NONMATCHING, | ||
self.tr('Discard records which could not be joined'), | ||
defaultValue=False)) | ||
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, | ||
self.tr('Joined layer'))) | ||
|
||
def name(self): | ||
return 'joinbylocationsummary' | ||
|
||
def displayName(self): | ||
return self.tr('Join attributes by location (summary)') | ||
|
||
def tags(self): | ||
return self.tr( | ||
"summary,aggregate,join,intersects,intersecting,touching,within,contains,overlaps,relation,spatial").split( | ||
',') | ||
|
||
def processAlgorithm(self, parameters, context, feedback): | ||
source = self.parameterAsSource(parameters, self.INPUT, context) | ||
join_source = self.parameterAsSource(parameters, self.JOIN, context) | ||
join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context) | ||
discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context) | ||
summaries = [self.statistics[i][0] for i in | ||
sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))] | ||
|
||
if not summaries: | ||
# none selected, so use all | ||
summaries = [s[0] for s in self.statistics] | ||
|
||
source_fields = source.fields() | ||
fields_to_join = QgsFields() | ||
join_field_indexes = [] | ||
if not join_fields: | ||
# no fields selected, use all | ||
join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))] | ||
|
||
def addFieldKeepType(original, stat): | ||
""" | ||
Adds a field to the output, keeping the same data type as the original | ||
""" | ||
field = QgsField(original) | ||
field.setName(field.name() + '_' + stat) | ||
fields_to_join.append(field) | ||
|
||
def addField(original, stat, type): | ||
""" | ||
Adds a field to the output, with a specified type | ||
""" | ||
field = QgsField(original) | ||
field.setName(field.name() + '_' + stat) | ||
field.setType(type) | ||
if type == QVariant.Double: | ||
field.setLength(20) | ||
field.setPrecision(6) | ||
fields_to_join.append(field) | ||
|
||
numeric_fields = ( | ||
('count', QVariant.Int, 'count'), | ||
('unique', QVariant.Int, 'variety'), | ||
('min', QVariant.Double, 'min'), | ||
('max', QVariant.Double, 'max'), | ||
('range', QVariant.Double, 'range'), | ||
('sum', QVariant.Double, 'sum'), | ||
('mean', QVariant.Double, 'mean'), | ||
('median', QVariant.Double, 'median'), | ||
('stddev', QVariant.Double, 'stDev'), | ||
('minority', QVariant.Double, 'minority'), | ||
('majority', QVariant.Double, 'majority'), | ||
('q1', QVariant.Double, 'firstQuartile'), | ||
('q3', QVariant.Double, 'thirdQuartile'), | ||
('iqr', QVariant.Double, 'interQuartileRange') | ||
) | ||
|
||
datetime_fields = ( | ||
('count', QVariant.Int, 'count'), | ||
('unique', QVariant.Int, 'countDistinct'), | ||
('empty', QVariant.Int, 'countMissing'), | ||
('filled', QVariant.Int), | ||
('min', None), | ||
('max', None) | ||
) | ||
|
||
string_fields = ( | ||
('count', QVariant.Int, 'count'), | ||
('unique', QVariant.Int, 'countDistinct'), | ||
('empty', QVariant.Int, 'countMissing'), | ||
('filled', QVariant.Int), | ||
('min', None, 'min'), | ||
('max', None, 'max'), | ||
('min_length', QVariant.Int, 'minLength'), | ||
('max_length', QVariant.Int, 'maxLength'), | ||
('mean_length', QVariant.Double, 'meanLength') | ||
) | ||
|
||
field_types = [] | ||
for f in join_fields: | ||
idx = join_source.fields().lookupField(f) | ||
if idx >= 0: | ||
join_field_indexes.append(idx) | ||
|
||
join_field = join_source.fields().at(idx) | ||
if join_field.isNumeric(): | ||
field_types.append('numeric') | ||
field_list = numeric_fields | ||
elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): | ||
field_types.append('datetime') | ||
field_list = datetime_fields | ||
else: | ||
field_types.append('string') | ||
field_list = string_fields | ||
|
||
for f in field_list: | ||
if f[0] in summaries: | ||
if f[1] is not None: | ||
addField(join_field, f[0], f[1]) | ||
else: | ||
addFieldKeepType(join_field, f[0]) | ||
|
||
out_fields = vector.combineFields(source_fields, fields_to_join) | ||
|
||
(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, | ||
out_fields, source.wkbType(), source.sourceCrs()) | ||
|
||
# do the join | ||
predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)] | ||
|
||
features = source.getFeatures() | ||
total = 100.0 / source.featureCount() if source.featureCount() else 0 | ||
|
||
# bounding box transform | ||
bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs()) | ||
|
||
for current, f in enumerate(features): | ||
if feedback.isCanceled(): | ||
break | ||
|
||
if not f.hasGeometry(): | ||
if not discard_nomatch: | ||
sink.addFeature(f, QgsFeatureSink.FastInsert) | ||
continue | ||
|
||
bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox()) | ||
engine = None | ||
|
||
values = [] | ||
|
||
request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs()) | ||
for test_feat in join_source.getFeatures(request): | ||
if feedback.isCanceled(): | ||
break | ||
|
||
join_attributes = [] | ||
for a in join_field_indexes: | ||
join_attributes.append(test_feat.attributes()[a]) | ||
|
||
if engine is None: | ||
engine = QgsGeometry.createGeometryEngine(f.geometry().geometry()) | ||
engine.prepareGeometry() | ||
|
||
for predicate in predicates: | ||
if getattr(engine, predicate)(test_feat.geometry().geometry()): | ||
values.append(join_attributes) | ||
break | ||
|
||
feedback.setProgress(int(current * total)) | ||
|
||
if len(values) == 0: | ||
if discard_nomatch: | ||
continue | ||
else: | ||
sink.addFeature(f, QgsFeatureSink.FastInsert) | ||
else: | ||
attrs = f.attributes() | ||
for i in range(len(join_field_indexes)): | ||
attribute_values = [v[i] for v in values] | ||
field_type = field_types[i] | ||
if field_type == 'numeric': | ||
stat = QgsStatisticalSummary() | ||
for v in attribute_values: | ||
stat.addVariant(v) | ||
stat.finalize() | ||
for s in numeric_fields: | ||
if s[0] in summaries: | ||
attrs.append(getattr(stat, s[2])()) | ||
elif field_type == 'datetime': | ||
stat = QgsDateTimeStatisticalSummary() | ||
stat.calculate(attribute_values) | ||
for s in datetime_fields: | ||
if s[0] in summaries: | ||
if s[0] == 'filled': | ||
attrs.append(stat.count() - stat.countMissing()) | ||
elif s[0] == 'min': | ||
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min)) | ||
elif s[0] == 'max': | ||
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max)) | ||
else: | ||
attrs.append(getattr(stat, s[2])()) | ||
else: | ||
stat = QgsStringStatisticalSummary() | ||
for v in attribute_values: | ||
if v == NULL: | ||
stat.addString('') | ||
else: | ||
stat.addString(str(v)) | ||
stat.finalize() | ||
for s in string_fields: | ||
if s[0] in summaries: | ||
if s[0] == 'filled': | ||
attrs.append(stat.count() - stat.countMissing()) | ||
else: | ||
attrs.append(getattr(stat, s[2])()) | ||
|
||
f.setAttributes(attrs) | ||
sink.addFeature(f, QgsFeatureSink.FastInsert) | ||
|
||
return {self.OUTPUT: dest_id} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
1 change: 1 addition & 0 deletions
1
python/plugins/processing/tests/testdata/custom/points_with_date.prj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] |
1 change: 1 addition & 0 deletions
1
python/plugins/processing/tests/testdata/custom/points_with_date.qpj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] |
Binary file added
BIN
+352 Bytes
python/plugins/processing/tests/testdata/custom/points_with_date.shp
Binary file not shown.
Binary file added
BIN
+172 Bytes
python/plugins/processing/tests/testdata/custom/points_with_date.shx
Binary file not shown.
Oops, something went wrong.