Skip to content
Permalink
Browse files

Use QRegularExpression::UseUnicodePropertiesOption in regular express…

…ions (#41507)

Should fix: #41453 in which values holding UTF diacritics were not handled.
While these expressions are used for (international) text values, I think
they should be usable with Unicode (instead of ASCII only)

See:
https://doc.qt.io/qt-5/qregularexpression.html
and:
https://doc.qt.io/qt-5/qregularexpression.html#PatternOption-enum
  • Loading branch information
rduivenvoorde committed Feb 12, 2021
1 parent e923edc commit b47c6a9c1eac9a8ff67901de2aeaaaa569061fcf
@@ -2,9 +2,9 @@
"name": "regexp_match",
"type": "function",
"groups": ["Conditionals", "String"],
"description": "Return the first matching position matching a regular expression within a string, or 0 if the substring is not found.",
"description": "Return the first matching position matching a regular expression within an unicode string, or 0 if the substring is not found.",
"arguments": [ {"arg":"input_string","description":"the string to test against the regular expression"},
{"arg":"regex","description":"The regular expression to test against. Backslash characters must be double escaped (e.g., \"\\\\\\\\s\" to match a white space character)."}
{"arg":"regex","description":"The regular expression to test against. Backslash characters must be double escaped (e.g., \"\\\\\\\\s\" to match a white space character or \"\\\\\\\\b\" to a match word boundary)."}
],
"examples": [ { "expression":"regexp_match('QGIS ROCKS','\\\\\\\\sROCKS')", "returns":"5"}]
"examples": [ { "expression":"regexp_match('QGIS ROCKS','\\\\\\\\sROCKS')", "returns":"5"}, { "expression":"regexp_match('Budač','udač\\\\\\\\b')", "returns":"2"}]
}
@@ -1421,7 +1421,7 @@ static QVariant fcnRegexpReplace( const QVariantList &values, const QgsExpressio
QString regexp = QgsExpressionUtils::getStringValue( values.at( 1 ), parent );
QString after = QgsExpressionUtils::getStringValue( values.at( 2 ), parent );

QRegularExpression re( regexp );
QRegularExpression re( regexp, QRegularExpression::UseUnicodePropertiesOption );
if ( !re.isValid() )
{
parent->setEvalErrorString( QObject::tr( "Invalid regular expression '%1': %2" ).arg( regexp, re.errorString() ) );
@@ -1435,7 +1435,7 @@ static QVariant fcnRegexpMatch( const QVariantList &values, const QgsExpressionC
QString str = QgsExpressionUtils::getStringValue( values.at( 0 ), parent );
QString regexp = QgsExpressionUtils::getStringValue( values.at( 1 ), parent );

QRegularExpression re( regexp );
QRegularExpression re( regexp, QRegularExpression::UseUnicodePropertiesOption );
if ( !re.isValid() )
{
parent->setEvalErrorString( QObject::tr( "Invalid regular expression '%1': %2" ).arg( regexp, re.errorString() ) );
@@ -1450,7 +1450,7 @@ static QVariant fcnRegexpMatches( const QVariantList &values, const QgsExpressio
QString regexp = QgsExpressionUtils::getStringValue( values.at( 1 ), parent );
QString empty = QgsExpressionUtils::getStringValue( values.at( 2 ), parent );

QRegularExpression re( regexp );
QRegularExpression re( regexp, QRegularExpression::UseUnicodePropertiesOption );
if ( !re.isValid() )
{
parent->setEvalErrorString( QObject::tr( "Invalid regular expression '%1': %2" ).arg( regexp, re.errorString() ) );
@@ -1482,7 +1482,7 @@ static QVariant fcnRegexpSubstr( const QVariantList &values, const QgsExpression
QString str = QgsExpressionUtils::getStringValue( values.at( 0 ), parent );
QString regexp = QgsExpressionUtils::getStringValue( values.at( 1 ), parent );

QRegularExpression re( regexp );
QRegularExpression re( regexp, QRegularExpression::UseUnicodePropertiesOption );
if ( !re.isValid() )
{
parent->setEvalErrorString( QObject::tr( "Invalid regular expression '%1': %2" ).arg( regexp, re.errorString() ) );
@@ -1405,6 +1405,11 @@ class TestQgsExpression: public QObject
QTest::newRow( "nullif substitute double" ) << "nullif(3.3, 3.3)" << false << QVariant();
QTest::newRow( "nullif substitute int" ) << "nullif(0, 0)" << false << QVariant();
QTest::newRow( "regexp match" ) << "regexp_match('abc','.b.')" << false << QVariant( 1 );
// testing unicode and \b, see #41453. \b tests for a 'word boundary'
QTest::newRow( "regexp match unicode" ) << "regexp_match('Budač','Buda\\\\b')" << false << QVariant( 0 );
QTest::newRow( "regexp match unicode 2" ) << "regexp_match('Buda','Buda\\\\b')" << false << QVariant( 1 );
QTest::newRow( "regexp match unicode 3" ) << "regexp_match('Budač','Budač\\\\b')" << false << QVariant( 1 );

QTest::newRow( "regexp match invalid" ) << "regexp_match('abc DEF','[[[')" << true << QVariant();
QTest::newRow( "regexp match escaped" ) << "regexp_match('abc DEF','\\\\s[A-Z]+')" << false << QVariant( 4 );
QTest::newRow( "regexp match false" ) << "regexp_match('abc DEF','\\\\s[a-z]+')" << false << QVariant( 0 );

0 comments on commit b47c6a9

Please sign in to comment.