Skip to content

Commit f184ec9

Browse files
committed
Merge pull request #2471 from nyalldawson/processing_fixes
[processing] misc improvements - use QgsStatisticalSummary for statistic calculations - faster expression request for ExtractByAttribute alg
2 parents fd50136 + 48afc42 commit f184ec9

File tree

6 files changed

+66
-80
lines changed

6 files changed

+66
-80
lines changed

python/core/qgsstatisticalsummary.sip

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class QgsStatisticalSummary
4343
/** Constructor for QgsStatisticalSummary
4444
* @param stats flags for statistics to calculate
4545
*/
46-
QgsStatisticalSummary( const QgsStatisticalSummary::Statistics& stats = QgsStatisticalSummary::Statistics( 0 ) );
46+
QgsStatisticalSummary( const QgsStatisticalSummary::Statistics& stats = QgsStatisticalSummary::All );
4747

4848
virtual ~QgsStatisticalSummary();
4949

python/plugins/processing/algs/qgis/BasicStatisticsNumbers.py

+47-35
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
import math
2929

30+
from qgis.core import QgsStatisticalSummary
3031
from processing.core.GeoAlgorithm import GeoAlgorithm
3132
from processing.core.parameters import ParameterVector
3233
from processing.core.parameters import ParameterTableField
@@ -51,6 +52,11 @@ class BasicStatisticsNumbers(GeoAlgorithm):
5152
RANGE = 'RANGE'
5253
MEDIAN = 'MEDIAN'
5354
UNIQUE = 'UNIQUE'
55+
MINORITY = 'MINORITY'
56+
MAJORITY = 'MAJORITY'
57+
FIRSTQUARTILE = 'FIRSTQUARTILE'
58+
THIRDQUARTILE = 'THIRDQUARTILE'
59+
IQR = 'IQR'
5460

5561
def defineCharacteristics(self):
5662
self.name, self.i18n_name = self.trAlgorithm('Basic statistics for numeric fields')
@@ -70,11 +76,16 @@ def defineCharacteristics(self):
7076
self.addOutput(OutputNumber(self.MAX, self.tr('Maximum value')))
7177
self.addOutput(OutputNumber(self.SUM, self.tr('Sum')))
7278
self.addOutput(OutputNumber(self.MEAN, self.tr('Mean value')))
79+
self.addOutput(OutputNumber(self.STD_DEV, self.tr('Standard deviation')))
7380
self.addOutput(OutputNumber(self.COUNT, self.tr('Count')))
7481
self.addOutput(OutputNumber(self.RANGE, self.tr('Range')))
7582
self.addOutput(OutputNumber(self.MEDIAN, self.tr('Median')))
7683
self.addOutput(OutputNumber(self.UNIQUE, self.tr('Number of unique values')))
77-
self.addOutput(OutputNumber(self.STD_DEV, self.tr('Standard deviation')))
84+
self.addOutput(OutputNumber(self.MINORITY, self.tr('Minority (rarest occurring value)')))
85+
self.addOutput(OutputNumber(self.MAJORITY, self.tr('Majority (most frequently occurring value)')))
86+
self.addOutput(OutputNumber(self.FIRSTQUARTILE, self.tr('First quartile')))
87+
self.addOutput(OutputNumber(self.THIRDQUARTILE, self.tr('Third quartile')))
88+
self.addOutput(OutputNumber(self.IQR, self.tr('Interquartile Range (IQR)')))
7889

7990
def processAlgorithm(self, progress):
8091
layer = dataobjects.getObjectFromUri(
@@ -92,6 +103,11 @@ def processAlgorithm(self, progress):
92103
meanValue = 0
93104
medianValue = 0
94105
stdDevValue = 0
106+
minority = 0
107+
majority = 0
108+
firstQuartile = 0
109+
thirdQuartile = 0
110+
iqr = 0
95111

96112
isFirst = True
97113
values = []
@@ -102,43 +118,30 @@ def processAlgorithm(self, progress):
102118
current = 0
103119
for ft in features:
104120
if ft.attributes()[index]:
105-
value = float(ft.attributes()[index])
106-
if isFirst:
107-
minValue = value
108-
maxValue = value
109-
isFirst = False
110-
else:
111-
if value < minValue:
112-
minValue = value
113-
if value > maxValue:
114-
maxValue = value
115-
116-
values.append(value)
117-
sumValue += value
121+
values.append(float(ft.attributes()[index]))
118122

119123
current += 1
120124
progress.setPercentage(int(current * total))
121125

122-
# Calculate additional values
123-
rValue = maxValue - minValue
124-
uniqueValue = vector.getUniqueValuesCount(layer, index)
125-
126-
if count > 0:
127-
meanValue = sumValue / count
128-
if meanValue != 0.00:
129-
for v in values:
130-
stdDevValue += (v - meanValue) * (v - meanValue)
131-
stdDevValue = math.sqrt(stdDevValue / count)
132-
cvValue = stdDevValue / meanValue
133-
134-
if count > 1:
135-
tmp = sorted(values)
136-
137-
# Calculate median
138-
if count % 2 == 0:
139-
medianValue = 0.5 * (tmp[(count - 1) / 2] + tmp[count / 2])
140-
else:
141-
medianValue = tmp[(count + 1) / 2 - 1]
126+
stat = QgsStatisticalSummary()
127+
stat.calculate(values)
128+
129+
count = stat.count()
130+
uniqueValue = stat.variety()
131+
minValue = stat.min()
132+
maxValue = stat.max()
133+
rValue = stat.range()
134+
sumValue = stat.sum()
135+
meanValue = stat.mean()
136+
medianValue = stat.median()
137+
stdDevValue = stat.stDev()
138+
if meanValue != 0.00:
139+
cvValue = stdDevValue / meanValue
140+
minority = stat.minority()
141+
majority = stat.majority()
142+
firstQuartile = stat.firstQuartile()
143+
thirdQuartile = stat.thirdQuartile()
144+
iqr = stat.interQuartileRange()
142145

143146
data = []
144147
data.append('Count: ' + unicode(count))
@@ -151,6 +154,11 @@ def processAlgorithm(self, progress):
151154
data.append('Median value: ' + unicode(medianValue))
152155
data.append('Standard deviation: ' + unicode(stdDevValue))
153156
data.append('Coefficient of Variation: ' + unicode(cvValue))
157+
data.append('Minority (rarest occurring value): ' + unicode(minority))
158+
data.append('Majority (most frequently occurring value): ' + unicode(majority))
159+
data.append('First quartile: ' + unicode(firstQuartile))
160+
data.append('Third quartile: ' + unicode(thirdQuartile))
161+
data.append('Interquartile Range (IQR): ' + unicode(iqr))
154162

155163
self.createHTML(outputFile, data)
156164

@@ -163,7 +171,11 @@ def processAlgorithm(self, progress):
163171
self.setOutputValue(self.MEAN, meanValue)
164172
self.setOutputValue(self.MEDIAN, medianValue)
165173
self.setOutputValue(self.STD_DEV, stdDevValue)
166-
self.setOutputValue(self.CV, cvValue)
174+
self.setOutputValue(self.MINORITY, minority)
175+
self.setOutputValue(self.MAJORITY, majority)
176+
self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
177+
self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
178+
self.setOutputValue(self.IQR, iqr)
167179

168180
def createHTML(self, outputFile, algData):
169181
f = open(outputFile, 'w')

python/plugins/processing/algs/qgis/ExtractByAttribute.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
__revision__ = '$Format:%H$'
2727

2828
from PyQt4.QtCore import QVariant
29-
from qgis.core import QgsExpression
29+
from qgis.core import QgsExpression, QgsFeatureRequest
3030
from processing.core.GeoAlgorithm import GeoAlgorithm
3131
from processing.core.GeoAlgorithmExecutionException import GeoAlgorithmExecutionException
3232
from processing.core.parameters import ParameterVector
@@ -117,15 +117,12 @@ def processAlgorithm(self, progress):
117117
self.tr('Unsupported field type "%s"' % fields[idx].typeName()))
118118

119119
expression = QgsExpression(expr)
120-
expression.prepare(fields)
121-
122-
features = vector.features(layer)
120+
if not expression.hasParserError():
121+
req = QgsFeatureRequest(expression)
122+
else:
123+
raise GeoAlgorithmExecutionException(expression.parserErrorString())
123124

124-
count = len(features)
125-
total = 100.0 / float(count)
126-
for count, f in enumerate(features):
127-
if expression.evaluate(f, fields):
128-
writer.addFeature(f)
129-
progress.setPercentage(int(count * total))
125+
for f in layer.getFeatures(req):
126+
writer.addFeature(f)
130127

131128
del writer

python/plugins/processing/algs/qgis/StatisticsByCategories.py

+7-32
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
__revision__ = '$Format:%H$'
2727

28-
import math
28+
from qgis.core import QgsStatisticalSummary
2929
from processing.core.outputs import OutputTable
3030
from processing.core.GeoAlgorithm import GeoAlgorithm
3131
from processing.tools import dataobjects, vector
@@ -83,36 +83,11 @@ def processAlgorithm(self, progress):
8383

8484
fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count']
8585
writer = output.getTableWriter(fields)
86+
stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max |
87+
QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample |
88+
QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count)
89+
8690
for (cat, v) in values.items():
87-
(min, max, mean, stddev, sum) = calculateStats(v)
88-
record = [cat, min, max, mean, stddev, sum, len(v)]
91+
stat.calculate(v)
92+
record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]
8993
writer.addRecord(record)
90-
91-
92-
def calculateStats(values):
93-
n = 0
94-
sum = 0
95-
mean = 0
96-
M2 = 0
97-
minvalue = None
98-
maxvalue = None
99-
100-
for v in values:
101-
sum += v
102-
n = n + 1
103-
delta = v - mean
104-
mean = mean + delta / n
105-
M2 = M2 + delta * (v - mean)
106-
if minvalue is None:
107-
minvalue = v
108-
maxvalue = v
109-
else:
110-
minvalue = min(v, minvalue)
111-
maxvalue = max(v, maxvalue)
112-
113-
if n > 1:
114-
variance = M2 / (n - 1)
115-
else:
116-
variance = 0
117-
stddev = math.sqrt(variance)
118-
return (minvalue, maxvalue, mean, stddev, sum)

src/core/qgsstatisticalsummary.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class CORE_EXPORT QgsStatisticalSummary
6565
/** Constructor for QgsStatisticalSummary
6666
* @param stats flags for statistics to calculate
6767
*/
68-
QgsStatisticalSummary( const QgsStatisticalSummary::Statistics& stats = Statistics( 0 ) );
68+
QgsStatisticalSummary( const QgsStatisticalSummary::Statistics& stats = All );
6969

7070
virtual ~QgsStatisticalSummary();
7171

tests/src/core/testqgsstatisticalsummary.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,9 @@ void TestQgsStatisticSummary::individualStatCalculations()
156156
QgsStatisticalSummary::Statistic stat = ( QgsStatisticalSummary::Statistic ) statInt;
157157
QFETCH( double, expected );
158158

159-
QgsStatisticalSummary s;
159+
//start with a summary which calculates NO statistics
160+
QgsStatisticalSummary s( QgsStatisticalSummary::Statistics( 0 ) );
161+
//set it to calculate just a single statistic
160162
s.setStatistics( stat );
161163
QCOMPARE( s.statistics(), stat );
162164

0 commit comments

Comments
 (0)