Skip to content
Permalink
Browse files

Port Stats by Category to new API

Improvements:
- keep original field type and name for category field
- add unit test
  • Loading branch information
nyalldawson committed Aug 5, 2017
1 parent adda744 commit b93be39c2469edb776ad898a4693c92a9800ecbe
@@ -132,6 +132,7 @@
from .SpatialiteExecuteSQL import SpatialiteExecuteSQL
from .SpatialIndex import SpatialIndex
from .SplitWithLines import SplitWithLines
from .StatisticsByCategories import StatisticsByCategories
from .SumLines import SumLines
from .SymmetricalDifference import SymmetricalDifference
from .TextToFloat import TextToFloat
@@ -149,7 +150,6 @@
# from .SelectByLocation import SelectByLocation
# from .SpatialJoin import SpatialJoin
# from .GeometryConvert import GeometryConvert
# from .StatisticsByCategories import StatisticsByCategories
# from .FieldsCalculator import FieldsCalculator
# from .FieldPyculator import FieldsPyculator
# from .PointsDisplacement import PointsDisplacement
@@ -190,7 +190,7 @@ def getAlgs(self):
# SpatialJoin(),
# GeometryConvert(), FieldsCalculator(),
# FieldsPyculator(),
# StatisticsByCategories(),
#
# RasterLayerStatistics(), PointsDisplacement(),
# PointsFromPolygons(),
# PointsFromLines(),
@@ -298,6 +298,7 @@ def getAlgs(self):
SpatialiteExecuteSQL(),
SpatialIndex(),
SplitWithLines(),
StatisticsByCategories(),
SumLines(),
SymmetricalDifference(),
TextToFloat(),
@@ -26,19 +26,24 @@

__revision__ = '$Format:%H$'

from qgis.core import (QgsApplication,
QgsFeatureSink,
from qgis.core import (QgsProcessingParameterFeatureSource,
QgsStatisticalSummary,
QgsProcessingUtils)
from processing.core.outputs import OutputTable
QgsFeatureRequest,
QgsProcessingParameterField,
QgsProcessingParameterFeatureSink,
QgsFields,
QgsField,
QgsWkbTypes,
QgsCoordinateReferenceSystem,
QgsFeature,
QgsFeatureSink)
from qgis.PyQt.QtCore import QVariant
from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm
from processing.core.parameters import ParameterVector
from processing.core.parameters import ParameterTableField


class StatisticsByCategories(QgisAlgorithm):

INPUT_LAYER = 'INPUT_LAYER'
INPUT = 'INPUT'
VALUES_FIELD_NAME = 'VALUES_FIELD_NAME'
CATEGORIES_FIELD_NAME = 'CATEGORIES_FIELD_NAME'
OUTPUT = 'OUTPUT'
@@ -50,16 +55,16 @@ def __init__(self):
super().__init__()

def initAlgorithm(self, config=None):
self.addParameter(ParameterVector(self.INPUT_LAYER,
self.tr('Input vector layer')))
self.addParameter(ParameterTableField(self.VALUES_FIELD_NAME,
self.tr('Field to calculate statistics on'),
self.INPUT_LAYER, ParameterTableField.DATA_TYPE_NUMBER))
self.addParameter(ParameterTableField(self.CATEGORIES_FIELD_NAME,
self.tr('Field with categories'),
self.INPUT_LAYER, ParameterTableField.DATA_TYPE_ANY))
self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT,
self.tr('Input vector layer')))
self.addParameter(QgsProcessingParameterField(self.VALUES_FIELD_NAME,
self.tr('Field to calculate statistics on'),
parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Numeric))
self.addParameter(QgsProcessingParameterField(self.CATEGORIES_FIELD_NAME,
self.tr('Field with categories'),
parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Any))

self.addOutput(OutputTable(self.OUTPUT, self.tr('Statistics by category')))
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, self.tr('Statistics by category')))

def name(self):
return 'statisticsbycategories'
@@ -68,36 +73,51 @@ def displayName(self):
return self.tr('Statistics by categories')

def processAlgorithm(self, parameters, context, feedback):
layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context)
valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME)
categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME)
source = self.parameterAsSource(parameters, self.INPUT, context)
value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context)
category_field_name = self.parameterAsString(parameters, self.CATEGORIES_FIELD_NAME, context)

output = self.getOutputFromName(self.OUTPUT)
valuesField = layer.fields().lookupField(valuesFieldName)
categoriesField = layer.fields().lookupField(categoriesFieldName)
value_field_index = source.fields().lookupField(value_field_name)
category_field_index = source.fields().lookupField(category_field_name)

features = QgsProcessingUtils.getFeatures(layer, context)
total = 100.0 / layer.featureCount() if layer.featureCount() else 0
features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry))
total = 100.0 / source.featureCount() if source.featureCount() else 0
values = {}
for current, feat in enumerate(features):
if feedback.isCanceled():
break

feedback.setProgress(int(current * total))
attrs = feat.attributes()
try:
value = float(attrs[valuesField])
cat = str(attrs[categoriesField])
value = float(attrs[value_field_index])
cat = attrs[category_field_index]
if cat not in values:
values[cat] = []
values[cat].append(value)
except:
pass

fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count']
writer = output.getTableWriter(fields)
fields = QgsFields()
fields.append(source.fields().at(category_field_index))
fields.append(QgsField('min', QVariant.Double))
fields.append(QgsField('max', QVariant.Double))
fields.append(QgsField('mean', QVariant.Double))
fields.append(QgsField('stddev', QVariant.Double))
fields.append(QgsField('sum', QVariant.Double))
fields.append(QgsField('count', QVariant.Int))

(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem())

stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max |
QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample |
QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count)

for (cat, v) in list(values.items()):
stat.calculate(v)
record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]
writer.addRecord(record)
f = QgsFeature()
f.setAttributes([cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()])
sink.addFeature(f, QgsFeatureSink.FastInsert)

return {self.OUTPUT: dest_id}
@@ -0,0 +1,45 @@
<GMLFeatureClassList>
<GMLFeatureClass>
<Name>stats_by_category</Name>
<ElementPath>stats_by_category</ElementPath>
<GeometryType>100</GeometryType>
<DatasetSpecificInfo>
<FeatureCount>3</FeatureCount>
</DatasetSpecificInfo>
<PropertyDefn>
<Name>id2</Name>
<ElementPath>id2</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>min</Name>
<ElementPath>min</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>max</Name>
<ElementPath>max</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>mean</Name>
<ElementPath>mean</ElementPath>
<Type>Real</Type>
</PropertyDefn>
<PropertyDefn>
<Name>stddev</Name>
<ElementPath>stddev</ElementPath>
<Type>Real</Type>
</PropertyDefn>
<PropertyDefn>
<Name>sum</Name>
<ElementPath>sum</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>count</Name>
<ElementPath>count</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
</GMLFeatureClass>
</GMLFeatureClassList>
@@ -0,0 +1,42 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=""
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy><gml:null>missing</gml:null></gml:boundedBy>

<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.0">
<ogr:id2>2</ogr:id2>
<ogr:min>1</ogr:min>
<ogr:max>4</ogr:max>
<ogr:mean>2.5</ogr:mean>
<ogr:stddev>2.12132034355964</ogr:stddev>
<ogr:sum>5</ogr:sum>
<ogr:count>2</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.1">
<ogr:id2>1</ogr:id2>
<ogr:min>2</ogr:min>
<ogr:max>5</ogr:max>
<ogr:mean>3.5</ogr:mean>
<ogr:stddev>2.12132034355964</ogr:stddev>
<ogr:sum>7</ogr:sum>
<ogr:count>2</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.2">
<ogr:id2>0</ogr:id2>
<ogr:min>3</ogr:min>
<ogr:max>9</ogr:max>
<ogr:mean>6.6</ogr:mean>
<ogr:stddev>2.30217288664427</ogr:stddev>
<ogr:sum>33</ogr:sum>
<ogr:count>5</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
</ogr:FeatureCollection>
@@ -2489,6 +2489,19 @@ tests:
name: expected/single_to_multi.gml
type: vector

- algorithm: qgis:statisticsbycategories
name: stats by category
params:
VALUES_FIELD_NAME: id
CATEGORIES_FIELD_NAME: id2
INPUT:
name: points.gml
type: vector
results:
OUTPUT:
name: expected/stats_by_category.gml
type: vector

# - algorithm: qgis:zonalstatistics
# name: simple zonal statistics
# params:

0 comments on commit b93be39

Please sign in to comment.
You can’t perform that action at this time.