Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
[FEATURE][processing] Add 'Join by location (summary)' algorithm
Like the main Join Attributes by Location algorithm, this algorithm takes two layers and combines the attributes based on a spatial criteria. However this algorithm calculates summaries for the attributes for all matching features, e.g. calculating the mean/min/max/etc. The list of fields to summaries, and the summaries to calculate for those, can be selected.
- Loading branch information
Showing
with
1,802 additions
and 3 deletions.
- +7 −1 python/plugins/processing/algs/help/qgis.yaml
- +2 −0 python/plugins/processing/algs/qgis/QGISAlgorithmProvider.py
- +341 −0 python/plugins/processing/algs/qgis/SpatialJoinSummary.py
- +1 −1 python/plugins/processing/algs/qgis/StatisticsByCategories.py
- BIN python/plugins/processing/tests/testdata/custom/points_with_date.dbf
- +1 −0 python/plugins/processing/tests/testdata/custom/points_with_date.prj
- +1 −0 python/plugins/processing/tests/testdata/custom/points_with_date.qpj
- BIN python/plugins/processing/tests/testdata/custom/points_with_date.shp
- BIN python/plugins/processing/tests/testdata/custom/points_with_date.shx
- +64 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_date.gfs
- +76 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_date.gml
- +172 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_intersect.gfs
- +142 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_intersect.gml
- +172 −0 .../plugins/processing/tests/testdata/expected/join_by_location_summary_intersect_discardnomatch.gfs
- +121 −0 .../plugins/processing/tests/testdata/expected/join_by_location_summary_intersect_discardnomatch.gml
- +73 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_string.gfs
- +131 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_string.gml
- +62 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_subset_stats.gfs
- +76 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_subset_stats.gml
- +102 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_touches.gfs
- +100 −0 python/plugins/processing/tests/testdata/expected/join_by_location_summary_touches.gml
- +158 −1 python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml
@@ -0,0 +1,341 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
*************************************************************************** | ||
SpatialJoin.py | ||
--------------------- | ||
Date : September 2017 | ||
Copyright : (C) 2017 by Nyall Dawson | ||
Email : nyall dot dawson at gmail dot com | ||
*************************************************************************** | ||
* * | ||
* This program is free software; you can redistribute it and/or modify * | ||
* it under the terms of the GNU General Public License as published by * | ||
* the Free Software Foundation; either version 2 of the License, or * | ||
* (at your option) any later version. * | ||
* * | ||
*************************************************************************** | ||
""" | ||
from builtins import range | ||
|
||
__author__ = 'Nyall Dawson' | ||
__date__ = 'September 2017' | ||
__copyright__ = '(C) 2017, Nyall Dawson' | ||
|
||
# This will get replaced with a git SHA1 when you do a git archive | ||
|
||
__revision__ = '$Format:%H$' | ||
|
||
import os | ||
|
||
from collections import defaultdict | ||
|
||
from qgis.PyQt.QtGui import QIcon | ||
from qgis.PyQt.QtCore import QVariant | ||
from qgis.core import (NULL, | ||
QgsField, | ||
QgsFields, | ||
QgsFeatureSink, | ||
QgsFeatureRequest, | ||
QgsGeometry, | ||
QgsCoordinateTransform, | ||
QgsStatisticalSummary, | ||
QgsDateTimeStatisticalSummary, | ||
QgsStringStatisticalSummary, | ||
QgsProcessing, | ||
QgsProcessingParameterBoolean, | ||
QgsProcessingParameterFeatureSource, | ||
QgsProcessingParameterEnum, | ||
QgsProcessingParameterField, | ||
QgsProcessingParameterFeatureSink) | ||
|
||
from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm | ||
from processing.tools import vector | ||
|
||
pluginPath = os.path.split(os.path.split(os.path.dirname(__file__))[0])[0] | ||
|
||
|
||
class SpatialJoinSummary(QgisAlgorithm): | ||
INPUT = "INPUT" | ||
JOIN = "JOIN" | ||
PREDICATE = "PREDICATE" | ||
JOIN_FIELDS = "JOIN_FIELDS" | ||
SUMMARIES = "SUMMARIES" | ||
DISCARD_NONMATCHING = "DISCARD_NONMATCHING" | ||
OUTPUT = "OUTPUT" | ||
|
||
def icon(self): | ||
return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'join_location.png')) | ||
|
||
def group(self): | ||
return self.tr('Vector general') | ||
|
||
def __init__(self): | ||
super().__init__() | ||
|
||
def initAlgorithm(self, config=None): | ||
self.predicates = ( | ||
('intersects', self.tr('intersects')), | ||
('contains', self.tr('contains')), | ||
('equals', self.tr('equals')), | ||
('touches', self.tr('touches')), | ||
('overlaps', self.tr('overlaps')), | ||
('within', self.tr('within')), | ||
('crosses', self.tr('crosses'))) | ||
|
||
self.statistics = [ | ||
('count', self.tr('count')), | ||
('unique', self.tr('unique')), | ||
('min', self.tr('min')), | ||
('max', self.tr('max')), | ||
('range', self.tr('range')), | ||
('sum', self.tr('sum')), | ||
('mean', self.tr('mean')), | ||
('median', self.tr('median')), | ||
('stddev', self.tr('stddev')), | ||
('minority', self.tr('minority')), | ||
('majority', self.tr('majority')), | ||
('q1', self.tr('q1')), | ||
('q3', self.tr('q3')), | ||
('iqr', self.tr('iqr')), | ||
('empty', self.tr('empty')), | ||
('filled', self.tr('filled')), | ||
('min_length', self.tr('min_length')), | ||
('max_length', self.tr('max_length')), | ||
('mean_length', self.tr('mean_length'))] | ||
|
||
self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT, | ||
self.tr('Input layer'), | ||
[QgsProcessing.TypeVectorAnyGeometry])) | ||
self.addParameter(QgsProcessingParameterFeatureSource(self.JOIN, | ||
self.tr('Join layer'), | ||
[QgsProcessing.TypeVectorAnyGeometry])) | ||
self.addParameter(QgsProcessingParameterEnum(self.PREDICATE, | ||
self.tr('Geometric predicate'), | ||
options=[p[1] for p in self.predicates], | ||
allowMultiple=True, defaultValue=[0])) | ||
self.addParameter(QgsProcessingParameterField(self.JOIN_FIELDS, | ||
self.tr('Fields to summarise (leave empty to use all fields)'), | ||
parentLayerParameterName=self.JOIN, | ||
allowMultiple=True, optional=True)) | ||
self.addParameter(QgsProcessingParameterEnum(self.SUMMARIES, | ||
self.tr( | ||
'Summaries to calculate (leave empty to use all available)'), | ||
options=[p[1] for p in self.statistics], | ||
allowMultiple=True, optional=True)) | ||
self.addParameter(QgsProcessingParameterBoolean(self.DISCARD_NONMATCHING, | ||
self.tr('Discard records which could not be joined'), | ||
defaultValue=False)) | ||
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, | ||
self.tr('Joined layer'))) | ||
|
||
def name(self): | ||
return 'joinbylocationsummary' | ||
|
||
def displayName(self): | ||
return self.tr('Join attributes by location (summary)') | ||
|
||
def tags(self): | ||
return self.tr( | ||
"summary,aggregate,join,intersects,intersecting,touching,within,contains,overlaps,relation,spatial").split( | ||
',') | ||
|
||
def processAlgorithm(self, parameters, context, feedback): | ||
source = self.parameterAsSource(parameters, self.INPUT, context) | ||
join_source = self.parameterAsSource(parameters, self.JOIN, context) | ||
join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context) | ||
discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context) | ||
summaries = [self.statistics[i][0] for i in | ||
sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))] | ||
|
||
if not summaries: | ||
# none selected, so use all | ||
summaries = [s[0] for s in self.statistics] | ||
|
||
source_fields = source.fields() | ||
fields_to_join = QgsFields() | ||
join_field_indexes = [] | ||
if not join_fields: | ||
# no fields selected, use all | ||
join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))] | ||
|
||
def addFieldKeepType(original, stat): | ||
""" | ||
Adds a field to the output, keeping the same data type as the original | ||
""" | ||
field = QgsField(original) | ||
field.setName(field.name() + '_' + stat) | ||
fields_to_join.append(field) | ||
|
||
def addField(original, stat, type): | ||
""" | ||
Adds a field to the output, with a specified type | ||
""" | ||
field = QgsField(original) | ||
field.setName(field.name() + '_' + stat) | ||
field.setType(type) | ||
if type == QVariant.Double: | ||
field.setLength(20) | ||
field.setPrecision(6) | ||
fields_to_join.append(field) | ||
|
||
numeric_fields = ( | ||
('count', QVariant.Int, 'count'), | ||
('unique', QVariant.Int, 'variety'), | ||
('min', QVariant.Double, 'min'), | ||
('max', QVariant.Double, 'max'), | ||
('range', QVariant.Double, 'range'), | ||
('sum', QVariant.Double, 'sum'), | ||
('mean', QVariant.Double, 'mean'), | ||
('median', QVariant.Double, 'median'), | ||
('stddev', QVariant.Double, 'stDev'), | ||
('minority', QVariant.Double, 'minority'), | ||
('majority', QVariant.Double, 'majority'), | ||
('q1', QVariant.Double, 'firstQuartile'), | ||
('q3', QVariant.Double, 'thirdQuartile'), | ||
('iqr', QVariant.Double, 'interQuartileRange') | ||
) | ||
|
||
datetime_fields = ( | ||
('count', QVariant.Int, 'count'), | ||
('unique', QVariant.Int, 'countDistinct'), | ||
('empty', QVariant.Int, 'countMissing'), | ||
('filled', QVariant.Int), | ||
('min', None), | ||
('max', None) | ||
) | ||
|
||
string_fields = ( | ||
('count', QVariant.Int, 'count'), | ||
('unique', QVariant.Int, 'countDistinct'), | ||
('empty', QVariant.Int, 'countMissing'), | ||
('filled', QVariant.Int), | ||
('min', None, 'min'), | ||
('max', None, 'max'), | ||
('min_length', QVariant.Int, 'minLength'), | ||
('max_length', QVariant.Int, 'maxLength'), | ||
('mean_length', QVariant.Double, 'meanLength') | ||
) | ||
|
||
field_types = [] | ||
for f in join_fields: | ||
idx = join_source.fields().lookupField(f) | ||
if idx >= 0: | ||
join_field_indexes.append(idx) | ||
|
||
join_field = join_source.fields().at(idx) | ||
if join_field.isNumeric(): | ||
field_types.append('numeric') | ||
field_list = numeric_fields | ||
elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): | ||
field_types.append('datetime') | ||
field_list = datetime_fields | ||
else: | ||
field_types.append('string') | ||
field_list = string_fields | ||
|
||
for f in field_list: | ||
if f[0] in summaries: | ||
if f[1] is not None: | ||
addField(join_field, f[0], f[1]) | ||
else: | ||
addFieldKeepType(join_field, f[0]) | ||
|
||
out_fields = vector.combineFields(source_fields, fields_to_join) | ||
|
||
(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, | ||
out_fields, source.wkbType(), source.sourceCrs()) | ||
|
||
# do the join | ||
predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)] | ||
|
||
features = source.getFeatures() | ||
total = 100.0 / source.featureCount() if source.featureCount() else 0 | ||
|
||
# bounding box transform | ||
bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs()) | ||
|
||
for current, f in enumerate(features): | ||
if feedback.isCanceled(): | ||
break | ||
|
||
if not f.hasGeometry(): | ||
if not discard_nomatch: | ||
sink.addFeature(f, QgsFeatureSink.FastInsert) | ||
continue | ||
|
||
bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox()) | ||
engine = None | ||
|
||
values = [] | ||
|
||
request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs()) | ||
for test_feat in join_source.getFeatures(request): | ||
if feedback.isCanceled(): | ||
break | ||
|
||
join_attributes = [] | ||
for a in join_field_indexes: | ||
join_attributes.append(test_feat.attributes()[a]) | ||
|
||
if engine is None: | ||
engine = QgsGeometry.createGeometryEngine(f.geometry().geometry()) | ||
engine.prepareGeometry() | ||
|
||
for predicate in predicates: | ||
if getattr(engine, predicate)(test_feat.geometry().geometry()): | ||
values.append(join_attributes) | ||
break | ||
|
||
feedback.setProgress(int(current * total)) | ||
|
||
if len(values) == 0: | ||
if discard_nomatch: | ||
continue | ||
else: | ||
sink.addFeature(f, QgsFeatureSink.FastInsert) | ||
else: | ||
attrs = f.attributes() | ||
for i in range(len(join_field_indexes)): | ||
attribute_values = [v[i] for v in values] | ||
field_type = field_types[i] | ||
if field_type == 'numeric': | ||
stat = QgsStatisticalSummary() | ||
for v in attribute_values: | ||
stat.addVariant(v) | ||
stat.finalize() | ||
for s in numeric_fields: | ||
if s[0] in summaries: | ||
attrs.append(getattr(stat, s[2])()) | ||
elif field_type == 'datetime': | ||
stat = QgsDateTimeStatisticalSummary() | ||
stat.calculate(attribute_values) | ||
for s in datetime_fields: | ||
if s[0] in summaries: | ||
if s[0] == 'filled': | ||
attrs.append(stat.count() - stat.countMissing()) | ||
elif s[0] == 'min': | ||
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min)) | ||
elif s[0] == 'max': | ||
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max)) | ||
else: | ||
attrs.append(getattr(stat, s[2])()) | ||
else: | ||
stat = QgsStringStatisticalSummary() | ||
for v in attribute_values: | ||
if v == NULL: | ||
stat.addString('') | ||
else: | ||
stat.addString(str(v)) | ||
stat.finalize() | ||
for s in string_fields: | ||
if s[0] in summaries: | ||
if s[0] == 'filled': | ||
attrs.append(stat.count() - stat.countMissing()) | ||
else: | ||
attrs.append(getattr(stat, s[2])()) | ||
|
||
f.setAttributes(attrs) | ||
sink.addFeature(f, QgsFeatureSink.FastInsert) | ||
|
||
return {self.OUTPUT: dest_id} |
Binary file not shown.
@@ -0,0 +1 @@ | ||
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] |
@@ -0,0 +1 @@ | ||
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] |
Binary file not shown.
Binary file not shown.
Oops, something went wrong.