Skip to content

Commit

Permalink
[FEATURE][processing] Add 'Join by location (summary)' algorithm
Browse files Browse the repository at this point in the history
Like the main Join Attributes by Location algorithm, this algorithm
takes two layers and combines the attributes based on a spatial
criteria.

However this algorithm calculates summaries for the attributes for
all matching features, e.g. calculating the mean/min/max/etc.

The list of fields to summaries, and the summaries to
calculate for those, can be selected.
  • Loading branch information
nyalldawson committed Sep 13, 2017
1 parent 458e994 commit be88da8
Show file tree
Hide file tree
Showing 22 changed files with 1,802 additions and 3 deletions.
8 changes: 7 additions & 1 deletion python/plugins/processing/algs/help/qgis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -267,12 +267,18 @@ qgis:joinattributesbylocation: >

The additional attributes and their values are taken from a second vector layer. A spatial criteria is applied to select the values from the second layer that are added to each feature from the first layer in the resulting one.


qgis:joinattributestable: >
This algorithm takes an input vector layer and creates a new vector layer that is an extended version of the input one, with additional attributes in its attribute table.

The additional attributes and their values are taken from a second vector layer. An attribute is selected in each of them to define the join criteria.

qgis:joinbylocationsummary: >
This algorithm takes an input vector layer and creates a new vector layer that is an extended version of the input one, with additional attributes in its attribute table.

The additional attributes and their values are taken from a second vector layer. A spatial criteria is applied to select the values from the second layer that are added to each feature from the first layer in the resulting one.

The algorithm calculates a statistical summary for the values from matching features in the second layer (e.g. maximum value, mean value, etc).

qgis:keepnbiggestparts: >
This algorithm takes a polygon layer and creates a new polygon layer in which multipart geometries have been removed, leaving only the n largest (in terms of area) parts.

Expand Down
2 changes: 2 additions & 0 deletions python/plugins/processing/algs/qgis/QGISAlgorithmProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@
from .SpatialiteExecuteSQL import SpatialiteExecuteSQL
from .SpatialIndex import SpatialIndex
from .SpatialJoin import SpatialJoin
from .SpatialJoinSummary import SpatialJoinSummary
from .SplitWithLines import SplitWithLines
from .StatisticsByCategories import StatisticsByCategories
from .SumLines import SumLines
Expand Down Expand Up @@ -291,6 +292,7 @@ def getAlgs(self):
SpatialiteExecuteSQL(),
SpatialIndex(),
SpatialJoin(),
SpatialJoinSummary(),
SplitWithLines(),
StatisticsByCategories(),
SumLines(),
Expand Down
341 changes: 341 additions & 0 deletions python/plugins/processing/algs/qgis/SpatialJoinSummary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,341 @@
# -*- coding: utf-8 -*-

"""
***************************************************************************
SpatialJoin.py
---------------------
Date : September 2017
Copyright : (C) 2017 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************
"""
from builtins import range

__author__ = 'Nyall Dawson'
__date__ = 'September 2017'
__copyright__ = '(C) 2017, Nyall Dawson'

# This will get replaced with a git SHA1 when you do a git archive

__revision__ = '$Format:%H$'

import os

from collections import defaultdict

from qgis.PyQt.QtGui import QIcon
from qgis.PyQt.QtCore import QVariant
from qgis.core import (NULL,
QgsField,
QgsFields,
QgsFeatureSink,
QgsFeatureRequest,
QgsGeometry,
QgsCoordinateTransform,
QgsStatisticalSummary,
QgsDateTimeStatisticalSummary,
QgsStringStatisticalSummary,
QgsProcessing,
QgsProcessingParameterBoolean,
QgsProcessingParameterFeatureSource,
QgsProcessingParameterEnum,
QgsProcessingParameterField,
QgsProcessingParameterFeatureSink)

from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm
from processing.tools import vector

pluginPath = os.path.split(os.path.split(os.path.dirname(__file__))[0])[0]


class SpatialJoinSummary(QgisAlgorithm):
INPUT = "INPUT"
JOIN = "JOIN"
PREDICATE = "PREDICATE"
JOIN_FIELDS = "JOIN_FIELDS"
SUMMARIES = "SUMMARIES"
DISCARD_NONMATCHING = "DISCARD_NONMATCHING"
OUTPUT = "OUTPUT"

def icon(self):
return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'join_location.png'))

def group(self):
return self.tr('Vector general')

def __init__(self):
super().__init__()

def initAlgorithm(self, config=None):
self.predicates = (
('intersects', self.tr('intersects')),
('contains', self.tr('contains')),
('equals', self.tr('equals')),
('touches', self.tr('touches')),
('overlaps', self.tr('overlaps')),
('within', self.tr('within')),
('crosses', self.tr('crosses')))

self.statistics = [
('count', self.tr('count')),
('unique', self.tr('unique')),
('min', self.tr('min')),
('max', self.tr('max')),
('range', self.tr('range')),
('sum', self.tr('sum')),
('mean', self.tr('mean')),
('median', self.tr('median')),
('stddev', self.tr('stddev')),
('minority', self.tr('minority')),
('majority', self.tr('majority')),
('q1', self.tr('q1')),
('q3', self.tr('q3')),
('iqr', self.tr('iqr')),
('empty', self.tr('empty')),
('filled', self.tr('filled')),
('min_length', self.tr('min_length')),
('max_length', self.tr('max_length')),
('mean_length', self.tr('mean_length'))]

self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT,
self.tr('Input layer'),
[QgsProcessing.TypeVectorAnyGeometry]))
self.addParameter(QgsProcessingParameterFeatureSource(self.JOIN,
self.tr('Join layer'),
[QgsProcessing.TypeVectorAnyGeometry]))
self.addParameter(QgsProcessingParameterEnum(self.PREDICATE,
self.tr('Geometric predicate'),
options=[p[1] for p in self.predicates],
allowMultiple=True, defaultValue=[0]))
self.addParameter(QgsProcessingParameterField(self.JOIN_FIELDS,
self.tr('Fields to summarise (leave empty to use all fields)'),
parentLayerParameterName=self.JOIN,
allowMultiple=True, optional=True))
self.addParameter(QgsProcessingParameterEnum(self.SUMMARIES,
self.tr(
'Summaries to calculate (leave empty to use all available)'),
options=[p[1] for p in self.statistics],
allowMultiple=True, optional=True))
self.addParameter(QgsProcessingParameterBoolean(self.DISCARD_NONMATCHING,
self.tr('Discard records which could not be joined'),
defaultValue=False))
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT,
self.tr('Joined layer')))

def name(self):
return 'joinbylocationsummary'

def displayName(self):
return self.tr('Join attributes by location (summary)')

def tags(self):
return self.tr(
"summary,aggregate,join,intersects,intersecting,touching,within,contains,overlaps,relation,spatial").split(
',')

def processAlgorithm(self, parameters, context, feedback):
source = self.parameterAsSource(parameters, self.INPUT, context)
join_source = self.parameterAsSource(parameters, self.JOIN, context)
join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context)
discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context)
summaries = [self.statistics[i][0] for i in
sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))]

if not summaries:
# none selected, so use all
summaries = [s[0] for s in self.statistics]

source_fields = source.fields()
fields_to_join = QgsFields()
join_field_indexes = []
if not join_fields:
# no fields selected, use all
join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))]

def addFieldKeepType(original, stat):
"""
Adds a field to the output, keeping the same data type as the original
"""
field = QgsField(original)
field.setName(field.name() + '_' + stat)
fields_to_join.append(field)

def addField(original, stat, type):
"""
Adds a field to the output, with a specified type
"""
field = QgsField(original)
field.setName(field.name() + '_' + stat)
field.setType(type)
if type == QVariant.Double:
field.setLength(20)
field.setPrecision(6)
fields_to_join.append(field)

numeric_fields = (
('count', QVariant.Int, 'count'),
('unique', QVariant.Int, 'variety'),
('min', QVariant.Double, 'min'),
('max', QVariant.Double, 'max'),
('range', QVariant.Double, 'range'),
('sum', QVariant.Double, 'sum'),
('mean', QVariant.Double, 'mean'),
('median', QVariant.Double, 'median'),
('stddev', QVariant.Double, 'stDev'),
('minority', QVariant.Double, 'minority'),
('majority', QVariant.Double, 'majority'),
('q1', QVariant.Double, 'firstQuartile'),
('q3', QVariant.Double, 'thirdQuartile'),
('iqr', QVariant.Double, 'interQuartileRange')
)

datetime_fields = (
('count', QVariant.Int, 'count'),
('unique', QVariant.Int, 'countDistinct'),
('empty', QVariant.Int, 'countMissing'),
('filled', QVariant.Int),
('min', None),
('max', None)
)

string_fields = (
('count', QVariant.Int, 'count'),
('unique', QVariant.Int, 'countDistinct'),
('empty', QVariant.Int, 'countMissing'),
('filled', QVariant.Int),
('min', None, 'min'),
('max', None, 'max'),
('min_length', QVariant.Int, 'minLength'),
('max_length', QVariant.Int, 'maxLength'),
('mean_length', QVariant.Double, 'meanLength')
)

field_types = []
for f in join_fields:
idx = join_source.fields().lookupField(f)
if idx >= 0:
join_field_indexes.append(idx)

join_field = join_source.fields().at(idx)
if join_field.isNumeric():
field_types.append('numeric')
field_list = numeric_fields
elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime):
field_types.append('datetime')
field_list = datetime_fields
else:
field_types.append('string')
field_list = string_fields

for f in field_list:
if f[0] in summaries:
if f[1] is not None:
addField(join_field, f[0], f[1])
else:
addFieldKeepType(join_field, f[0])

out_fields = vector.combineFields(source_fields, fields_to_join)

(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
out_fields, source.wkbType(), source.sourceCrs())

# do the join
predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)]

features = source.getFeatures()
total = 100.0 / source.featureCount() if source.featureCount() else 0

# bounding box transform
bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs())

for current, f in enumerate(features):
if feedback.isCanceled():
break

if not f.hasGeometry():
if not discard_nomatch:
sink.addFeature(f, QgsFeatureSink.FastInsert)
continue

bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox())
engine = None

values = []

request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs())
for test_feat in join_source.getFeatures(request):
if feedback.isCanceled():
break

join_attributes = []
for a in join_field_indexes:
join_attributes.append(test_feat.attributes()[a])

if engine is None:
engine = QgsGeometry.createGeometryEngine(f.geometry().geometry())
engine.prepareGeometry()

for predicate in predicates:
if getattr(engine, predicate)(test_feat.geometry().geometry()):
values.append(join_attributes)
break

feedback.setProgress(int(current * total))

if len(values) == 0:
if discard_nomatch:
continue
else:
sink.addFeature(f, QgsFeatureSink.FastInsert)
else:
attrs = f.attributes()
for i in range(len(join_field_indexes)):
attribute_values = [v[i] for v in values]
field_type = field_types[i]
if field_type == 'numeric':
stat = QgsStatisticalSummary()
for v in attribute_values:
stat.addVariant(v)
stat.finalize()
for s in numeric_fields:
if s[0] in summaries:
attrs.append(getattr(stat, s[2])())
elif field_type == 'datetime':
stat = QgsDateTimeStatisticalSummary()
stat.calculate(attribute_values)
for s in datetime_fields:
if s[0] in summaries:
if s[0] == 'filled':
attrs.append(stat.count() - stat.countMissing())
elif s[0] == 'min':
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min))
elif s[0] == 'max':
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max))
else:
attrs.append(getattr(stat, s[2])())
else:
stat = QgsStringStatisticalSummary()
for v in attribute_values:
if v == NULL:
stat.addString('')
else:
stat.addString(str(v))
stat.finalize()
for s in string_fields:
if s[0] in summaries:
if s[0] == 'filled':
attrs.append(stat.count() - stat.countMissing())
else:
attrs.append(getattr(stat, s[2])())

f.setAttributes(attrs)
sink.addFeature(f, QgsFeatureSink.FastInsert)

return {self.OUTPUT: dest_id}
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def addField(name):
"""
Adds a field to the output, keeping the same data type as the value_field
"""
field = value_field
field = QgsField(value_field)
field.setName(name)
fields.append(field)

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit be88da8

Please sign in to comment.