Permalink
Show file tree
Hide file tree
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
New class QgsStringStatisticalSummary, for calculating statistics
on lists of strings
- Loading branch information
1 parent
ceba526
commit 0493cbf
Showing
7 changed files
with
508 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
/** \ingroup core | ||
* \class QgsStringStatisticalSummary | ||
* \brief Calculator for summary statistics and aggregates for a list of strings. | ||
* | ||
* Statistics are calculated by calling @link calculate @endlink and passing a list of strings. The | ||
* individual statistics can then be retrieved using the associated methods. Note that not all statistics | ||
* are calculated by default. Statistics which require slower computations are only calculated by | ||
* specifying the statistic in the constructor or via @link setStatistics @endlink. | ||
* | ||
* \note Added in version 2.16 | ||
*/ | ||
|
||
|
||
class QgsStringStatisticalSummary | ||
{ | ||
%TypeHeaderCode | ||
#include <qgsstringstatisticalsummary.h> | ||
%End | ||
|
||
public: | ||
|
||
public: | ||
|
||
//! Enumeration of flags that specify statistics to be calculated | ||
enum Statistic | ||
{ | ||
Count, //!< Count | ||
CountDistinct, //!< Number of distinct string values | ||
CountMissing, //!< Number of missing (null) values | ||
Min, //!< Minimum string value | ||
Max, //!< Maximum string value | ||
MinimumLength, //!< Minimum length of string | ||
MaximumLength, //!< Maximum length of string | ||
All, //! All statistics | ||
}; | ||
typedef QFlags<QgsStringStatisticalSummary::Statistic> Statistics; | ||
|
||
/** Constructor for QgsStringStatistics | ||
* @param stats flags for statistics to calculate | ||
*/ | ||
QgsStringStatisticalSummary( const QgsStringStatisticalSummary::Statistics& stats = All ); | ||
|
||
/** Returns flags which specify which statistics will be calculated. Some statistics | ||
* are always calculated (eg count). | ||
* @see setStatistics | ||
*/ | ||
Statistics statistics() const; | ||
|
||
/** Sets flags which specify which statistics will be calculated. Some statistics | ||
* are always calculated (eg count). | ||
* @param stats flags for statistics to calculate | ||
* @see statistics | ||
*/ | ||
void setStatistics( const Statistics& stats ); | ||
|
||
/** Resets the calculated values | ||
*/ | ||
void reset(); | ||
|
||
/** Calculates summary statistics for a list of strings. | ||
* @param values list of strings | ||
*/ | ||
void calculate( const QStringList& values ); | ||
|
||
/** Calculates summary statistics for a list of variants. Any non-string variants will be | ||
* ignored. | ||
* @param values list of variants | ||
*/ | ||
void calculate( const QVariantList& values ); | ||
|
||
/** Returns the value of a specified statistic | ||
* @param stat statistic to return | ||
* @returns calculated value of statistic | ||
*/ | ||
QVariant statistic( Statistic stat ) const; | ||
|
||
/** Returns the calculated count of values. | ||
*/ | ||
int count() const; | ||
|
||
/** Returns the number of distinct string values. | ||
* @see distinctValues() | ||
*/ | ||
int countDistinct() const; | ||
|
||
/** Returns the set of distinct string values. | ||
* @see countDistinct() | ||
*/ | ||
QSet< QString > distinctValues() const; | ||
|
||
/** Returns the number of missing (null) string values. | ||
*/ | ||
int countMissing() const; | ||
|
||
/** Returns the minimum (non-null) string value. | ||
*/ | ||
QString min() const; | ||
|
||
/** Returns the maximum (non-null) string value. | ||
*/ | ||
QString max() const; | ||
|
||
/** Returns the minimum length of strings. | ||
*/ | ||
int minLength() const; | ||
|
||
/** Returns the maximum length of strings. | ||
*/ | ||
int maxLength() const; | ||
|
||
/** Returns the friendly display name for a statistic | ||
* @param statistic statistic to return name for | ||
*/ | ||
static QString displayName( Statistic statistic ); | ||
|
||
}; | ||
|
||
QFlags<QgsStringStatisticalSummary::Statistic> operator|(QgsStringStatisticalSummary::Statistic f1, QFlags<QgsStringStatisticalSummary::Statistic> f2); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/*************************************************************************** | ||
qgsstringstatisticalsummary.cpp | ||
------------------------------- | ||
Date : May 2016 | ||
Copyright : (C) 2016 by Nyall Dawson | ||
Email : nyall dot dawson at gmail dot com | ||
*************************************************************************** | ||
* * | ||
* This program is free software; you can redistribute it and/or modify * | ||
* it under the terms of the GNU General Public License as published by * | ||
* the Free Software Foundation; either version 2 of the License, or * | ||
* (at your option) any later version. * | ||
* * | ||
***************************************************************************/ | ||
|
||
#include "qgsstringstatisticalsummary.h" | ||
#include <QString> | ||
#include <QStringList> | ||
#include <QObject> | ||
#include <QVariant> | ||
#include <QVariantList> | ||
#include "limits.h" | ||
|
||
/*************************************************************************** | ||
* This class is considered CRITICAL and any change MUST be accompanied with | ||
* full unit tests in test_qgsstringstatisticalsummary.py. | ||
* See details in QEP #17 | ||
****************************************************************************/ | ||
|
||
QgsStringStatisticalSummary::QgsStringStatisticalSummary( const QgsStringStatisticalSummary::Statistics& stats ) | ||
: mStatistics( stats ) | ||
{ | ||
reset(); | ||
} | ||
|
||
void QgsStringStatisticalSummary::reset() | ||
{ | ||
mCount = 0; | ||
mValues.clear(); | ||
mCountMissing = 0; | ||
mMin.clear(); | ||
mMax.clear(); | ||
mMinLength = INT_MAX; | ||
mMaxLength = 0; | ||
} | ||
|
||
void QgsStringStatisticalSummary::calculate( const QStringList& values ) | ||
{ | ||
reset(); | ||
|
||
Q_FOREACH ( const QString& string, values ) | ||
{ | ||
testString( string ); | ||
} | ||
} | ||
|
||
void QgsStringStatisticalSummary::calculate( const QVariantList& values ) | ||
{ | ||
reset(); | ||
|
||
Q_FOREACH ( const QVariant& variant, values ) | ||
{ | ||
if ( variant.type() == QVariant::String ) | ||
{ | ||
testString( variant.toString() ); | ||
} | ||
} | ||
} | ||
|
||
void QgsStringStatisticalSummary::testString( const QString& string ) | ||
{ | ||
mCount++; | ||
|
||
if ( string.isEmpty() ) | ||
mCountMissing++; | ||
|
||
if ( mStatistics & CountDistinct ) | ||
{ | ||
mValues << string; | ||
} | ||
if ( mStatistics & Min ) | ||
{ | ||
if ( !mMin.isEmpty() && !string.isEmpty() ) | ||
{ | ||
mMin = qMin( mMin, string ); | ||
} | ||
else if ( mMin.isEmpty() && !string.isEmpty() ) | ||
{ | ||
mMin = string; | ||
} | ||
} | ||
if ( mStatistics & Max ) | ||
{ | ||
if ( !mMax.isEmpty() && !string.isEmpty() ) | ||
{ | ||
mMax = qMax( mMax, string ); | ||
} | ||
else if ( mMax.isEmpty() && !string.isEmpty() ) | ||
{ | ||
mMax = string; | ||
} | ||
} | ||
mMinLength = qMin( mMinLength, string.length() ); | ||
mMaxLength = qMax( mMaxLength, string.length() ); | ||
} | ||
|
||
QVariant QgsStringStatisticalSummary::statistic( QgsStringStatisticalSummary::Statistic stat ) const | ||
{ | ||
switch ( stat ) | ||
{ | ||
case Count: | ||
return mCount; | ||
case CountDistinct: | ||
return mValues.count(); | ||
case CountMissing: | ||
return mCountMissing; | ||
case Min: | ||
return mMin; | ||
case Max: | ||
return mMax; | ||
case MinimumLength: | ||
return mMinLength; | ||
case MaximumLength: | ||
return mMaxLength; | ||
case All: | ||
return 0; | ||
} | ||
return 0; | ||
} | ||
|
||
QString QgsStringStatisticalSummary::displayName( QgsStringStatisticalSummary::Statistic statistic ) | ||
{ | ||
switch ( statistic ) | ||
{ | ||
case Count: | ||
return QObject::tr( "Count" ); | ||
case CountDistinct: | ||
return QObject::tr( "Count (distinct)" ); | ||
case CountMissing: | ||
return QObject::tr( "Count (missing)" ); | ||
case Min: | ||
return QObject::tr( "Minimum" ); | ||
case Max: | ||
return QObject::tr( "Maximum" ); | ||
case MinimumLength: | ||
return QObject::tr( "Minimum length" ); | ||
case MaximumLength: | ||
return QObject::tr( "Maximum length" ); | ||
case All: | ||
return QString(); | ||
} | ||
return QString(); | ||
} | ||
|
Oops, something went wrong.