Skip to content
Permalink
Browse files

Merge pull request #41364 from domi4484/modeMajorityConfusion

Fix array mode/majority confusion
  • Loading branch information
m-kuhn committed Feb 20, 2021
2 parents f14edc3 + 5a9782d commit 14e5c6094df778aad6d5b0b5812a446d3ff4d0e3
@@ -2,10 +2,18 @@
"name": "array_majority",
"type": "function",
"groups": ["Arrays"],
"description": "Returns an array containing the most common value in an array. The returned array may contain more than one value if multiple values occur equally often.",
"arguments": [ {"arg":"array","description":"an array"} ],
"examples": [
{ "expression":"array_majority(array(0,1,42,42,43))", "returns":"[ 42 ]"},
{ "expression":"array_majority(array(0,0,1,2,2,42))", "returns":"[ 0, 2 ]"}
"description": "Returns the most common values in an array.",
"arguments": [
{"arg":"array","description":"an array"},
{"arg":"option='all'", "optional":true, "description":"a string specifying the return values handling. Valid options are:<br /><ul><li>all: Default, all most common values are returned in an array.</li><li>any: Returns one of the most common values.</li><li>median: Returns the median of the most common values. Non arithmetic values are ignored.</li><li>real_majority: Returns the value which occurs more than half the size of the array.</li></ul>"}
],
"examples": [
{ "expression":"array_majority(array(0,1,42,42,43), 'all')", "returns":"[ 42 ]"},
{ "expression":"array_majority(array(0,1,42,42,43,1), 'all')", "returns":"[ 42, 1 ]"},
{ "expression":"array_majority(array(0,1,42,42,43,1), 'any')", "returns":"1 or 42"},
{ "expression":"array_majority(array(0,1,1,2,2), 'median')", "returns":"1.5"},
{ "expression":"array_majority(array(0,1,42,42,43), 'real_majority')", "returns":"NULL"},
{ "expression":"array_majority(array(0,1,42,42,43,42), 'real_majority')", "returns":"NULL"},
{ "expression":"array_majority(array(0,1,42,42,43,42,42), 'real_majority')", "returns":"42"}
]
}
@@ -0,0 +1,19 @@
{
"name": "array_minority",
"type": "function",
"groups": ["Arrays"],
"description": "Returns the less common values in an array.",
"arguments": [
{"arg":"array","description":"an array"},
{"arg":"option='all'", "optional":true, "description":"a string specifying the return values handling. Valid options are:<br /><ul><li>all: Default, all less common values are returned in an array.</li><li>any: Returns one of the less common values.</li><li>median: Returns the median of the less common values. Non arithmetic values are ignored.</li><li>real_minority: Returns values which occur less than half the size of the array.</li></ul>"}
],
"examples": [
{ "expression":"array_minority(array(0,42,42), 'all')", "returns":"[ 0 ]"},
{ "expression":"array_minority(array(0,1,42,42), 'all')", "returns":"[ 0, 1 ]"},
{ "expression":"array_minority(array(0,1,42,42,43,1), 'any')", "returns":"0 or 43"},
{ "expression":"array_minority(array(1,2,3,3), 'median')", "returns":"1.5"},
{ "expression":"array_minority(array(0,1,42,42,43), 'real_minority')", "returns":"[ 42, 43, 0, 1 ]"},
{ "expression":"array_minority(array(0,1,42,42,43,42), 'real_minority')", "returns":"[ 42, 43, 0, 1 ]"},
{ "expression":"array_minority(array(0,1,42,42,43,42,42), 'real_minority')", "returns":"[ 43, 0, 1 ]"}
]
}
@@ -5419,19 +5419,6 @@ static QVariant fcnArrayMedian( const QVariantList &values, const QgsExpressionC
}
}

static QVariant fcnArrayMajority( const QVariantList &values, const QgsExpressionContext *, QgsExpression *parent, const QgsExpressionNodeFunction * )
{
const QVariantList list = QgsExpressionUtils::getListValue( values.at( 0 ), parent );
QHash< QVariant, int > hash;
for ( const auto &item : list )
{
++hash[item];
}
const QList< int > occurrences = hash.values();
const int maxValue = *std::max_element( occurrences.constBegin(), occurrences.constEnd() );
return list.isEmpty() ? QVariant() : hash.keys( maxValue );
}

static QVariant fcnArraySum( const QVariantList &values, const QgsExpressionContext *, QgsExpression *parent, const QgsExpressionNodeFunction * )
{
const QVariantList list = QgsExpressionUtils::getListValue( values.at( 0 ), parent );
@@ -5462,6 +5449,93 @@ static QVariant convertToSameType( const QVariant &value, QVariant::Type type )
return result;
}

static QVariant fcnArrayMajority( const QVariantList &values, const QgsExpressionContext *context, QgsExpression *parent, const QgsExpressionNodeFunction *node )
{
const QVariantList list = QgsExpressionUtils::getListValue( values.at( 0 ), parent );
QHash< QVariant, int > hash;
for ( const auto &item : list )
{
++hash[item];
}
const QList< int > occurrences = hash.values();
const int maxValue = *std::max_element( occurrences.constBegin(), occurrences.constEnd() );

const QString option = values.at( 1 ).toString();
if ( option.compare( QLatin1String( "all" ), Qt::CaseInsensitive ) == 0 )
{
return convertToSameType( hash.keys( maxValue ), values.at( 0 ).type() );
}
else if ( option.compare( QLatin1String( "any" ), Qt::CaseInsensitive ) == 0 )
{
if ( hash.isEmpty() )
return QVariant();

return QVariant( hash.keys( maxValue ).first() );
}
else if ( option.compare( QLatin1String( "median" ), Qt::CaseInsensitive ) == 0 )
{
return fcnArrayMedian( QVariantList() << convertToSameType( hash.keys( maxValue ), values.at( 0 ).type() ), context, parent, node );
}
else if ( option.compare( QLatin1String( "real_majority" ), Qt::CaseInsensitive ) == 0 )
{
if ( maxValue * 2 <= list.size() )
return QVariant();

return QVariant( hash.keys( maxValue ).first() );
}
else
{
parent->setEvalErrorString( QObject::tr( "No such option '%1'" ).arg( option ) );
return QVariant();
}
}

static QVariant fcnArrayMinority( const QVariantList &values, const QgsExpressionContext *context, QgsExpression *parent, const QgsExpressionNodeFunction *node )
{
const QVariantList list = QgsExpressionUtils::getListValue( values.at( 0 ), parent );
QHash< QVariant, int > hash;
for ( const auto &item : list )
{
++hash[item];
}
const QList< int > occurrences = hash.values();
const int minValue = *std::min_element( occurrences.constBegin(), occurrences.constEnd() );

const QString option = values.at( 1 ).toString();
if ( option.compare( QLatin1String( "all" ), Qt::CaseInsensitive ) == 0 )
{
return convertToSameType( hash.keys( minValue ), values.at( 0 ).type() );
}
else if ( option.compare( QLatin1String( "any" ), Qt::CaseInsensitive ) == 0 )
{
if ( hash.isEmpty() )
return QVariant();

return QVariant( hash.keys( minValue ).first() );
}
else if ( option.compare( QLatin1String( "median" ), Qt::CaseInsensitive ) == 0 )
{
return fcnArrayMedian( QVariantList() << convertToSameType( hash.keys( minValue ), values.at( 0 ).type() ), context, parent, node );
}
else if ( option.compare( QLatin1String( "real_minority" ), Qt::CaseInsensitive ) == 0 )
{
if ( hash.keys().isEmpty() )
return QVariant();

// Remove the majority, all others are minority
const int maxValue = *std::max_element( occurrences.constBegin(), occurrences.constEnd() );
if ( maxValue * 2 > list.size() )
hash.remove( hash.key( maxValue ) );

return convertToSameType( hash.keys(), values.at( 0 ).type() );
}
else
{
parent->setEvalErrorString( QObject::tr( "No such option '%1'" ).arg( option ) );
return QVariant();
}
}

static QVariant fcnArrayAppend( const QVariantList &values, const QgsExpressionContext *, QgsExpression *parent, const QgsExpressionNodeFunction * )
{
QVariantList list = QgsExpressionUtils::getListValue( values.at( 0 ), parent );
@@ -7068,7 +7142,8 @@ const QList<QgsExpressionFunction *> &QgsExpression::Functions()
<< new QgsStaticExpressionFunction( QStringLiteral( "array_max" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ), fcnArrayMaximum, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_mean" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ), fcnArrayMean, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_median" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ), fcnArrayMedian, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_majority" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ), fcnArrayMajority, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_majority" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ) << QgsExpressionFunction::Parameter( QStringLiteral( "option" ), true, QVariant( "all" ) ), fcnArrayMajority, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_minority" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ) << QgsExpressionFunction::Parameter( QStringLiteral( "option" ), true, QVariant( "all" ) ), fcnArrayMinority, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_sum" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ), fcnArraySum, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_append" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ) << QgsExpressionFunction::Parameter( QStringLiteral( "value" ) ), fcnArrayAppend, QStringLiteral( "Arrays" ) )
<< new QgsStaticExpressionFunction( QStringLiteral( "array_prepend" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "array" ) ) << QgsExpressionFunction::Parameter( QStringLiteral( "value" ) ), fcnArrayPrepend, QStringLiteral( "Arrays" ) )
@@ -1664,7 +1664,7 @@ class TestQgsExpression: public QObject
QTest::newRow( "array_last(array('a', 'b', 'c'))" ) << QStringLiteral( "array_last(array('a', 'b', 'c'))" ) << false << QVariant( "c" );
QTest::newRow( "array_last(array())" ) << QStringLiteral( "array_last(array())" ) << false << QVariant();

// array_min, array_max, array_mean, array_median, array_majority, array_sum
// array_min, array_max, array_mean, array_median, array_majority, array_minority, array_sum
QTest::newRow( "array_min('forty two')" ) << QStringLiteral( "array_min('forty two')" ) << true << QVariant();
QTest::newRow( "array_min(42)" ) << QStringLiteral( "array_min(42)" ) << true << QVariant();
QTest::newRow( "array_min(array())" ) << QStringLiteral( "array_min(array())" ) << false << QVariant();
@@ -1687,8 +1687,30 @@ class TestQgsExpression: public QObject
QTest::newRow( "array_median(array(0,0,1,2,2,42,'a','b'))" ) << QStringLiteral( "array_median(array(0,0,1,2,2,42,'a','b'))" ) << false << QVariant( 1.5 );
QTest::newRow( "array_majority('forty two')" ) << QStringLiteral( "array_majority('forty two')" ) << true << QVariant();
QTest::newRow( "array_majority(42)" ) << QStringLiteral( "array_majority(42)" ) << true << QVariant();
QTest::newRow( "array_majority(array())" ) << QStringLiteral( "array_majority(array())" ) << false << QVariant();
QTest::newRow( "array_majority(array(1,2,42,42,'a','b'))" ) << QStringLiteral( "array_majority(array(1,2,42,42,'a','b'))" ) << false << QVariant( QVariantList() << 42 );
QTest::newRow( "array_majority(array())" ) << QStringLiteral( "array_majority(array())" ) << false << QVariant( QVariantList() );
QTest::newRow( "array_majority(array(0,1,42,42,43), 'all')" ) << QStringLiteral( "array_majority(array(0,1,42,42,43), 'all')" ) << false << QVariant( QVariantList() << 42 );
QTest::newRow( "array_majority(array(0,1,43,'a','a','b'), 'all')" ) << QStringLiteral( "array_majority(array(0,1,43,'a','a','b'), 'all')" ) << false << QVariant( QVariantList() << "a" );
QTest::newRow( "array_majority(array(0,1,42,42,43,1)" ) << QStringLiteral( "array_sort(array_majority(array(0,1,42,42,43,1)))" ) << false << QVariant( QVariantList() << 1 << 42 );
QTest::newRow( "array_majority(array(0,1,42,42,43), 'any')" ) << QStringLiteral( "array_majority(array(0,1,42,42,43), 'any')" ) << false << QVariant( 42 );
QTest::newRow( "array_majority(array(0,1,1,2,2,42), 'median')" ) << QStringLiteral( "array_majority(array(0,1,1,2,2,42), 'median')" ) << false << QVariant( 1.5 );
QTest::newRow( "array_majority(array(0,1,1,2,2,42,'a','b'), 'median')" ) << QStringLiteral( "array_majority(array(0,1,1,2,2,42,'a','b'), 'median')" ) << false << QVariant( 1.5 );
QTest::newRow( "array_majority(array(0,1,2,42,'a','b','a'), 'median')" ) << QStringLiteral( "array_majority(array(0,1,2,42,'a','b','a'), 'median')" ) << false << QVariant();
QTest::newRow( "array_majority(array(0,1,42,42,43), 'real_majority')" ) << QStringLiteral( "array_majority(array(0,1,42,42,43), 'real_majority')" ) << false << QVariant();
QTest::newRow( "array_majority(array(0,1,42,42,43,42), 'real_majority')" ) << QStringLiteral( "array_majority(array(0,1,42,42,43,42), 'real_majority')" ) << false << QVariant();
QTest::newRow( "array_majority(array(0,1,42,42,43,42,42), 'real_majority')" ) << QStringLiteral( "array_majority(array(0,1,42,42,43,42,42), 'real_majority')" ) << false << QVariant( 42 );
QTest::newRow( "array_minority('forty two')" ) << QStringLiteral( "array_minority('forty two')" ) << true << QVariant();
QTest::newRow( "array_minority(42)" ) << QStringLiteral( "array_minority(42)" ) << true << QVariant();
QTest::newRow( "array_minority(array())" ) << QStringLiteral( "array_minority(array())" ) << false << QVariant( QVariantList() );
QTest::newRow( "array_minority(array(0,42,42), 'all')" ) << QStringLiteral( "array_minority(array(0,42,42), 'all')" ) << false << QVariant( QVariantList() << 0 );
QTest::newRow( "array_minority(array(42,42,'a'), 'all')" ) << QStringLiteral( "array_minority(array(42,42,'a'), 'all')" ) << false << QVariant( QVariantList() << "a" );
QTest::newRow( "array_minority(array(0,1,42,42))" ) << QStringLiteral( "array_sort(array_minority(array(0,1,42,42)))" ) << false << QVariant( QVariantList() << 0 << 1 );
QTest::newRow( "array_minority(array(0,42,42), 'any')" ) << QStringLiteral( "array_minority(array(0,42,42), 'any')" ) << false << QVariant( 0 );
QTest::newRow( "array_minority(array(1,2,3,3), 'median')" ) << QStringLiteral( "array_minority(array(1,2,3,3), 'median')" ) << false << QVariant( 1.5 );
QTest::newRow( "array_minority(array(1,2,3,3,'a'), 'median')" ) << QStringLiteral( "array_minority(array(1,2,3,3,'a'), 'median')" ) << false << QVariant( 1.5 );
QTest::newRow( "array_minority(array(1,1,3,3,'a'), 'median')" ) << QStringLiteral( "array_minority(array(1,1,3,3,'a'), 'median')" ) << false << QVariant();
QTest::newRow( "array_minority(array(0,1,42,42,43), 'real_minority')" ) << QStringLiteral( "array_sort(array_minority(array(0,1,42,42,43), 'real_minority'))" ) << false << QVariant( QVariantList() << 0 << 1 << 42 << 43 );
QTest::newRow( "array_minority(array(0,1,42,42,43,42), 'real_minority')" ) << QStringLiteral( "array_sort(array_minority(array(0,1,42,42,43,42), 'real_minority'))" ) << false << QVariant( QVariantList() << 0 << 1 << 42 << 43 );
QTest::newRow( "array_minority(array(0,1,42,42,43,42,42), 'real_minority')" ) << QStringLiteral( "array_sort(array_minority(array(0,1,42,42,43,42,42), 'real_minority'))" ) << false << QVariant( QVariantList() << 0 << 1 << 43 );
QTest::newRow( "array_sum('forty two')" ) << QStringLiteral( "array_sum('forty two')" ) << true << QVariant();
QTest::newRow( "array_sum(42)" ) << QStringLiteral( "array_sum(42)" ) << true << QVariant();
QTest::newRow( "array_sum(array())" ) << QStringLiteral( "array_sum(array())" ) << false << QVariant();

0 comments on commit 14e5c60

Please sign in to comment.