diff --git a/python/core/core.sip b/python/core/core.sip index 42de8669a717..655cf67c4d5d 100644 --- a/python/core/core.sip +++ b/python/core/core.sip @@ -43,6 +43,7 @@ %Include qgsgeometry.sip %Include qgsgeometryvalidator.sip %Include qgsgeometrysimplifier.sip +%Include qgshistogram.sip %Include qgsmaptopixelgeometrysimplifier.sip %Include qgsgml.sip %Include qgsgmlschema.sip diff --git a/python/core/qgshistogram.sip b/python/core/qgshistogram.sip new file mode 100644 index 000000000000..f63edbbff508 --- /dev/null +++ b/python/core/qgshistogram.sip @@ -0,0 +1,66 @@ +/** \ingroup core + * \class QgsHistogram + * \brief Calculator for a numeric histogram from a list of values. + * + * \note Added in version 2.9 + */ + +class QgsHistogram +{ +%TypeHeaderCode +#include "qgshistogram.h" +%End + + public: + + QgsHistogram(); + + virtual ~QgsHistogram(); + + /** Assigns numeric source values for the histogram. + * @param values list of doubles + */ + void setValues( const QList& values ); + + /** Assigns numeric source values for the histogram from a vector layer's field or as the + * result of an expression. + * @param layer vector layer + * @param fieldOrExpression field name or expression to be evaluated + * @returns true if values were successfully set + */ + bool setValues( QgsVectorLayer* layer, const QString& fieldOrExpression ); + + /** Calculates the optimal bin width using the Freedman-Diaconis rule. Bins widths are + * determined by the inter-quartile range of values and the number of values. + * @returns optimal width for bins + * @see optimalNumberBins + * @note values must first be specified using @link setValues @endlink + */ + double optimalBinWidth() const; + + /** Returns the optimal number of bins for the source values, calculated using the + * Freedman-Diaconis rule. The number of bins are determined by the inter-quartile range + * of values and the number of values. + * @returns optimal number of bins + * @see optimalBinWidth + * @note values must first be specified using @link setValues @endlink + */ + int optimalNumberBins() const; + + /** Returns a list of edges for the histogram for a specified number of bins. This list + * will be length bins + 1, as both the first and last value are also included. + * @param bins number of bins + * @return list of bin edges + * @note values must first be specified using @link setValues @endlink + */ + QList binEdges( int bins ) const; + + /** Returns the calculated list of the counts for the histogram bins. + * @param bins number of histogram bins + * @return list of histogram counts + * @note values must first be specified using @link setValues @endlink + */ + QList counts( int bins ) const; + +}; + diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 5da20ca50499..ddb5e6c18d33 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -107,6 +107,7 @@ SET(QGIS_CORE_SRCS qgsgeometryvalidator.cpp qgsgml.cpp qgsgmlschema.cpp + qgshistogram.cpp qgslayerdefinition.cpp qgslabel.cpp qgslabelattributes.cpp @@ -510,6 +511,7 @@ SET(QGIS_CORE_HDRS qgsfontutils.h qgsgeometry.h qgsgeometrycache.h + qgshistogram.h qgslayerdefinition.h qgslabel.h qgslabelattributes.h diff --git a/src/core/qgshistogram.cpp b/src/core/qgshistogram.cpp new file mode 100644 index 000000000000..535d1cb1c296 --- /dev/null +++ b/src/core/qgshistogram.cpp @@ -0,0 +1,125 @@ +/*************************************************************************** + qgshistogram.cpp + ---------------- + begin : May 2015 + copyright : (C) 2015 by Nyall Dawson + email : nyall dot dawson at gmail dot com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "qgshistogram.h" + +#include "qgsstatisticalsummary.h" +#include "qgsvectorlayer.h" +#include + +QgsHistogram::QgsHistogram() + : mMax( 0 ) + , mMin( 0 ) + , mIQR( 0 ) +{ + +} + +QgsHistogram::~QgsHistogram() +{ + +} + +void QgsHistogram::prepareValues() +{ + qSort( mValues.begin(), mValues.end() ); + + QgsStatisticalSummary s; + s.setStatistics( QgsStatisticalSummary::Max | QgsStatisticalSummary::Min | QgsStatisticalSummary::InterQuartileRange ); + s.calculate( mValues ); + mMin = s.min(); + mMax = s.max(); + mIQR = s.interQuartileRange(); +} + +void QgsHistogram::setValues( const QList &values ) +{ + mValues = values; + prepareValues(); +} + +bool QgsHistogram::setValues( QgsVectorLayer *layer, const QString &fieldOrExpression ) +{ + mValues.clear(); + if ( !layer ) + return false; + + bool ok; + mValues = layer->getDoubleValues( fieldOrExpression, ok ); + if ( !ok ) + return false; + + prepareValues(); + return true; +} + +double QgsHistogram::optimalBinWidth() const +{ + //Freedman-Diaconis rule + return 2.0 * mIQR * qPow( mValues.count(), -1 / 3.0 ); +} + +int QgsHistogram::optimalNumberBins() const +{ + return ceil(( mMax - mMin ) / optimalBinWidth() ); +} + +QList QgsHistogram::binEdges( int bins ) const +{ + double binWidth = ( mMax - mMin ) / bins; + + QList edges; + edges << mMin; + double current = mMin; + for ( int i = 0; i < bins; ++i ) + { + current += binWidth; + edges << current; + } + return edges; +} + +QList QgsHistogram::counts( int bins ) const +{ + QList edges = binEdges( bins ); + + QList binCounts; + binCounts.reserve( bins ); + int currentValueIndex = 0; + for ( int i = 0; i < bins; ++i ) + { + int count = 0; + while ( mValues.at( currentValueIndex ) < edges.at( i + 1 ) ) + { + count++; + currentValueIndex++; + if ( currentValueIndex >= mValues.count() ) + break; + } + binCounts << count; + } + + if ( currentValueIndex < mValues.count() ) + { + //last value needs to be added + binCounts[ bins - 1 ] = binCounts.last() + 1; + } + + return binCounts; +} + + diff --git a/src/core/qgshistogram.h b/src/core/qgshistogram.h new file mode 100644 index 000000000000..eacd92ca57d2 --- /dev/null +++ b/src/core/qgshistogram.h @@ -0,0 +1,97 @@ +/*************************************************************************** + qgshistogram.h + -------------- + begin : May 2015 + copyright : (C) 2015 by Nyall Dawson + email : nyall dot dawson at gmail dot com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef QGSHISTOGRAM_H +#define QGSHISTOGRAM_H + +#include + +class QgsVectorLayer; + + +/** \ingroup core + * \class QgsHistogram + * \brief Calculator for a numeric histogram from a list of values. + * + * \note Added in version 2.9 + */ + +class CORE_EXPORT QgsHistogram +{ + public: + + QgsHistogram(); + + virtual ~QgsHistogram(); + + /** Assigns numeric source values for the histogram. + * @param values list of doubles + */ + void setValues( const QList& values ); + + /** Assigns numeric source values for the histogram from a vector layer's field or as the + * result of an expression. + * @param layer vector layer + * @param fieldOrExpression field name or expression to be evaluated + * @returns true if values were successfully set + */ + bool setValues( QgsVectorLayer* layer, const QString& fieldOrExpression ); + + /** Calculates the optimal bin width using the Freedman-Diaconis rule. Bins widths are + * determined by the inter-quartile range of values and the number of values. + * @returns optimal width for bins + * @see optimalNumberBins + * @note values must first be specified using @link setValues @endlink + */ + double optimalBinWidth() const; + + /** Returns the optimal number of bins for the source values, calculated using the + * Freedman-Diaconis rule. The number of bins are determined by the inter-quartile range + * of values and the number of values. + * @returns optimal number of bins + * @see optimalBinWidth + * @note values must first be specified using @link setValues @endlink + */ + int optimalNumberBins() const; + + /** Returns a list of edges for the histogram for a specified number of bins. This list + * will be length bins + 1, as both the first and last value are also included. + * @param bins number of bins + * @return list of bin edges + * @note values must first be specified using @link setValues @endlink + */ + QList binEdges( int bins ) const; + + /** Returns the calculated list of the counts for the histogram bins. + * @param bins number of histogram bins + * @return list of histogram counts + * @note values must first be specified using @link setValues @endlink + */ + QList counts( int bins ) const; + + private: + + QList mValues; + double mMax; + double mMin; + double mIQR; + + void prepareValues(); + +}; + +#endif // QGSHISTOGRAM_H diff --git a/tests/src/core/CMakeLists.txt b/tests/src/core/CMakeLists.txt index 717c08cf847e..902074905fe5 100644 --- a/tests/src/core/CMakeLists.txt +++ b/tests/src/core/CMakeLists.txt @@ -156,4 +156,5 @@ ADD_QGIS_TEST(imageoperationtest testqgsimageoperation.cpp) ADD_QGIS_TEST(painteffecttest testqgspainteffect.cpp) ADD_QGIS_TEST(painteffectregistrytest testqgspainteffectregistry.cpp) ADD_QGIS_TEST(statisticalsummarytest testqgsstatisticalsummary.cpp) +ADD_QGIS_TEST(histogramtest testqgshistogram.cpp) diff --git a/tests/src/core/testqgshistogram.cpp b/tests/src/core/testqgshistogram.cpp new file mode 100644 index 000000000000..8a1076aae195 --- /dev/null +++ b/tests/src/core/testqgshistogram.cpp @@ -0,0 +1,157 @@ +/*************************************************************************** + testqgshistogram.cpp + -------------------- + Date : May 2015 + Copyright : (C) 2015 by Nyall Dawson + Email : nyall dot dawson at gmail dot com + *************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include + +#include "qgsapplication.h" +#include "qgsvectorlayer.h" +#include "qgsvectordataprovider.h" +#include "qgshistogram.h" + +/** \ingroup UnitTests + * This is a unit test for QgsHistogram + */ +class TestQgsHistogram : public QObject +{ + Q_OBJECT + + public: + TestQgsHistogram(); + + private slots: + void initTestCase(); + void cleanupTestCase(); + void init() {} + void cleanup() {} + void optimalBinWidth(); + void optimalBinCount(); + void binEdges(); + void counts(); + void fromLayer(); + + private: + +}; + +TestQgsHistogram::TestQgsHistogram() +{ + +} + +void TestQgsHistogram::initTestCase() +{ + QgsApplication::init(); + QgsApplication::initQgis(); + +} + +void TestQgsHistogram::cleanupTestCase() +{ + QgsApplication::exitQgis(); +} + +void TestQgsHistogram::optimalBinWidth() +{ + QList vals; + vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10; + + QgsHistogram h; + h.setValues( vals ); + QVERIFY( qgsDoubleNear( h.optimalBinWidth(), 4.641, 0.001 ) ); +} + +void TestQgsHistogram::optimalBinCount() +{ + QList vals; + vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10; + + QgsHistogram h; + h.setValues( vals ); + QCOMPARE( h.optimalNumberBins(), 2 ); +} + +void TestQgsHistogram::binEdges() +{ + QList vals; + vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10; + + QgsHistogram h; + h.setValues( vals ); + QList edges = h.binEdges( 3 ); + QCOMPARE( edges.count(), 4 ); + QCOMPARE( edges.at( 0 ), 1.0 ); + QCOMPARE( edges.at( 1 ), 4.0 ); + QCOMPARE( edges.at( 2 ), 7.0 ); + QCOMPARE( edges.at( 3 ), 10.0 ); +} + +void TestQgsHistogram::counts() +{ + QList vals; + vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10; + + QgsHistogram h; + h.setValues( vals ); + QList counts = h.counts( 1 ); + QList expected; + expected << 10; + QCOMPARE( counts, expected ); + + counts = h.counts( 2 ); + expected.clear(); + expected << 5 << 5; + QCOMPARE( counts, expected ); + + counts = h.counts( 5 ); + expected.clear(); + expected << 2 << 2 << 2 << 2 << 2; + QCOMPARE( counts, expected ); + + counts = h.counts( 20 ); + expected.clear(); + expected << 1 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 1; + QCOMPARE( counts, expected ); +} + +void TestQgsHistogram::fromLayer() +{ + QgsHistogram h; + + QVERIFY( !h.setValues( 0, QString() )); + + QgsVectorLayer* layer = new QgsVectorLayer( "Point?field=col1:real", "layer", "memory" ); + QVERIFY( layer->isValid() ); + QgsFeatureList features; + for ( int i = 1; i <= 10; ++i ) + { + QgsFeature f( layer->dataProvider()->fields(), i ); + f.setAttribute( "col1", i ); + features << f; + } + layer->dataProvider()->addFeatures( features ); + + QVERIFY( !h.setValues( layer, QString() )); + QVERIFY( h.setValues( layer, QString( "col1" ) ) ); + QListcounts = h.counts( 5 ); + QList expected; + expected << 2 << 2 << 2 << 2 << 2; + QCOMPARE( counts, expected ); + + delete layer; +} + +QTEST_MAIN( TestQgsHistogram ) +#include "testqgshistogram.moc"