Skip to content
Permalink
Browse files
Merge pull request #44138 from nirvn/core_regexp_more_more
[qt6] Move string utils and {vector,raster} layer class away from QRegExp
  • Loading branch information
nirvn committed Jul 13, 2021
2 parents db74bf5 + 846e0aa commit 557752ff73866c7d9aad7b5576be4ef83cdf2796
@@ -10,6 +10,7 @@




class QgsStringReplacement
{
%Docstring(signature="appended")
@@ -16,7 +16,6 @@
#include "qgsstringutils.h"
#include "qgslogger.h"
#include <QVector>
#include <QRegExp>
#include <QStringList>
#include <QTextBoundaryFinder>
#include <QRegularExpression>
@@ -524,33 +523,38 @@ QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )

// http://alanstorm.com/url_regex_explained
// note - there's more robust implementations available, but we need one which works within the limitation of QRegExp
static QRegExp urlRegEx( "(\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/))))" );
static QRegExp protoRegEx( "^(?:f|ht)tps?://|file://" );
static QRegExp emailRegEx( "([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" );
static thread_local QRegularExpression urlRegEx( "(\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/))))" );
static thread_local QRegularExpression protoRegEx( "^(?:f|ht)tps?://|file://" );
static thread_local QRegularExpression emailRegEx( "([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" );

int offset = 0;
bool found = false;
while ( urlRegEx.indexIn( converted, offset ) != -1 )
QRegularExpressionMatch match = urlRegEx.match( converted );
while ( match.hasMatch() )
{
found = true;
QString url = urlRegEx.cap( 1 );
QString url = match.captured( 1 );
QString protoUrl = url;
if ( protoRegEx.indexIn( protoUrl ) == -1 )
if ( !protoRegEx.match( protoUrl ).hasMatch() )
{
protoUrl.prepend( "http://" );
}
QString anchor = QStringLiteral( "<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
converted.replace( urlRegEx.pos( 1 ), url.length(), anchor );
offset = urlRegEx.pos( 1 ) + anchor.length();
converted.replace( match.capturedStart( 1 ), url.length(), anchor );
offset = match.capturedStart( 1 ) + anchor.length();
match = urlRegEx.match( converted, offset );
}

offset = 0;
while ( emailRegEx.indexIn( converted, offset ) != -1 )
match = emailRegEx.match( converted );
while ( match.hasMatch() )
{
found = true;
QString email = emailRegEx.cap( 1 );
QString email = match.captured( 1 );
QString anchor = QStringLiteral( "<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
converted.replace( emailRegEx.pos( 1 ), email.length(), anchor );
offset = emailRegEx.pos( 1 ) + anchor.length();
converted.replace( match.capturedStart( 1 ), email.length(), anchor );
offset = match.capturedStart( 1 ) + anchor.length();
match = emailRegEx.match( converted, offset );
}

if ( foundLinks )
@@ -567,16 +571,19 @@ QString QgsStringUtils::htmlToMarkdown( const QString &html )
converted.replace( QLatin1String( "<b>" ), QLatin1String( "**" ) );
converted.replace( QLatin1String( "</b>" ), QLatin1String( "**" ) );

static QRegExp hrefRegEx( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" );
static thread_local QRegularExpression hrefRegEx( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" );

int offset = 0;
while ( hrefRegEx.indexIn( converted, offset ) != -1 )
QRegularExpressionMatch match = hrefRegEx.match( converted );
while ( match.hasMatch() )
{
QString url = hrefRegEx.cap( 1 ).replace( QLatin1String( "\"" ), QString() );
QString url = match.captured( 1 ).replace( QLatin1String( "\"" ), QString() );
url.replace( '\'', QString() );
QString name = hrefRegEx.cap( 2 );
QString name = match.captured( 2 );
QString anchor = QStringLiteral( "[%1](%2)" ).arg( name, url );
converted.replace( hrefRegEx, anchor );
offset = hrefRegEx.pos( 1 ) + anchor.length();
converted.replace( match.capturedStart(), match.capturedLength(), anchor );
offset = match.capturedStart() + anchor.length();
match = hrefRegEx.match( converted, offset );
}

return converted;
@@ -588,19 +595,18 @@ QString QgsStringUtils::wordWrap( const QString &string, const int length, const
return string;

QString newstr;
QRegExp rx;
QRegularExpression rx;
int delimiterLength = 0;

if ( !customDelimiter.isEmpty() )
{
rx.setPatternSyntax( QRegExp::FixedString );
rx.setPattern( customDelimiter );
rx.setPattern( QRegularExpression::escape( customDelimiter ) );
delimiterLength = customDelimiter.length();
}
else
{
// \x200B is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
rx.setPattern( QStringLiteral( "[\\s\\x200B]" ) );
// \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
rx.setPattern( QStringLiteral( "[\\x{200B}\\s]" ) );
delimiterLength = 1;
}

@@ -689,8 +695,10 @@ QgsStringReplacement::QgsStringReplacement( const QString &match, const QString
, mWholeWordOnly( wholeWordOnly )
{
if ( mWholeWordOnly )
mRx = QRegExp( QString( "\\b%1\\b" ).arg( mMatch ),
mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
{
mRx.setPattern( QString( "\\b%1\\b" ).arg( mMatch ) );
mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
}
}

QString QgsStringReplacement::process( const QString &input ) const
@@ -14,12 +14,13 @@
***************************************************************************/

#include "qgis_core.h"
#include "qgis.h"

#include <QString>
#include <QRegExp>
#include <QRegularExpression>
#include <QList>
#include <QDomDocument>
#include <QFont> // for enum values
#include "qgis.h"

#ifndef QGSSTRINGUTILS_H
#define QGSSTRINGUTILS_H
@@ -101,7 +102,7 @@ class CORE_EXPORT QgsStringReplacement

bool mWholeWordOnly;

QRegExp mRx;
QRegularExpression mRx;
};


@@ -79,7 +79,7 @@ email : tim at linfiniti.com
#include <QList>
#include <QPainter>
#include <QPixmap>
#include <QRegExp>
#include <QRegularExpression>
#include <QSlider>
#include <QUrl>

@@ -2321,13 +2321,14 @@ QString QgsRasterLayer::encodedSource( const QString &source, const QgsReadWrite
{
// NETCDF:filename:variable
// filename can be quoted with " as it can contain colons
QRegExp r( "NETCDF:(.+):([^:]+)" );
if ( r.exactMatch( src ) )
const QRegularExpression netcdfEncodedRegExp( QRegularExpression::anchoredPattern( "NETCDF:(.+):([^:]+)" ) );
const QRegularExpressionMatch match = netcdfEncodedRegExp.match( src );
if ( match.hasMatch() )
{
QString filename = r.cap( 1 );
QString filename = match.captured( 1 );
if ( filename.startsWith( '"' ) && filename.endsWith( '"' ) )
filename = filename.mid( 1, filename.length() - 2 );
src = "NETCDF:\"" + context.pathResolver().writePath( filename ) + "\":" + r.cap( 2 );
src = "NETCDF:\"" + context.pathResolver().writePath( filename ) + "\":" + match.captured( 2 );
handled = true;
}
}
@@ -2346,38 +2347,41 @@ QString QgsRasterLayer::encodedSource( const QString &source, const QgsReadWrite
{
// HDF4_SDS:subdataset_type:file_name:subdataset_index
// filename can be quoted with " as it can contain colons
QRegExp r( "HDF4_SDS:([^:]+):(.+):([^:]+)" );
if ( r.exactMatch( src ) )
const QRegularExpression hdf4EncodedRegExp( QRegularExpression::anchoredPattern( "HDF4_SDS:([^:]+):(.+):([^:]+)" ) );
const QRegularExpressionMatch match = hdf4EncodedRegExp.match( src );
if ( match.hasMatch() )
{
QString filename = r.cap( 2 );
QString filename = match.captured( 2 );
if ( filename.startsWith( '"' ) && filename.endsWith( '"' ) )
filename = filename.mid( 1, filename.length() - 2 );
src = "HDF4_SDS:" + r.cap( 1 ) + ":\"" + context.pathResolver().writePath( filename ) + "\":" + r.cap( 3 );
src = "HDF4_SDS:" + match.captured( 1 ) + ":\"" + context.pathResolver().writePath( filename ) + "\":" + match.captured( 3 );
handled = true;
}
}
else if ( src.startsWith( QLatin1String( "HDF5:" ) ) )
{
// HDF5:file_name:subdataset
// filename can be quoted with " as it can contain colons
QRegExp r( "HDF5:(.+):([^:]+)" );
if ( r.exactMatch( src ) )
const QRegularExpression hdf5EncodedRegExp( QRegularExpression::anchoredPattern( "HDF5:(.+):([^:]+)" ) );
const QRegularExpressionMatch match = hdf5EncodedRegExp.match( src );
if ( match.hasMatch() )
{
QString filename = r.cap( 1 );
QString filename = match.captured( 1 );
if ( filename.startsWith( '"' ) && filename.endsWith( '"' ) )
filename = filename.mid( 1, filename.length() - 2 );
src = "HDF5:\"" + context.pathResolver().writePath( filename ) + "\":" + r.cap( 2 );
src = "HDF5:\"" + context.pathResolver().writePath( filename ) + "\":" + match.captured( 2 );
handled = true;
}
}
else if ( src.contains( QRegExp( "^(NITF_IM|RADARSAT_2_CALIB):" ) ) )
else if ( src.contains( QRegularExpression( "^(NITF_IM|RADARSAT_2_CALIB):" ) ) )
{
// NITF_IM:0:filename
// RADARSAT_2_CALIB:?:filename
QRegExp r( "([^:]+):([^:]+):(.+)" );
if ( r.exactMatch( src ) )
const QRegularExpression nitfRadarsatEncodedRegExp( QRegularExpression::anchoredPattern( "([^:]+):([^:]+):(.+)" ) );
const QRegularExpressionMatch match = nitfRadarsatEncodedRegExp.match( src );
if ( match.hasMatch() )
{
src = r.cap( 1 ) + ':' + r.cap( 2 ) + ':' + context.pathResolver().writePath( r.cap( 3 ) );
src = match.captured( 1 ) + ':' + match.captured( 2 ) + ':' + context.pathResolver().writePath( match.captured( 3 ) );
handled = true;
}
}
@@ -2510,13 +2514,14 @@ QString QgsRasterLayer::decodedSource( const QString &source, const QString &pro
{
// NETCDF:filename:variable
// filename can be quoted with " as it can contain colons
QRegExp r( "NETCDF:(.+):([^:]+)" );
if ( r.exactMatch( src ) )
const QRegularExpression netcdfDecodedRegExp( QRegularExpression::anchoredPattern( "NETCDF:(.+):([^:]+)" ) );
const QRegularExpressionMatch match = netcdfDecodedRegExp.match( src );
if ( match.hasMatch() )
{
QString filename = r.cap( 1 );
QString filename = match.captured( 1 );
if ( filename.startsWith( '"' ) && filename.endsWith( '"' ) )
filename = filename.mid( 1, filename.length() - 2 );
src = "NETCDF:\"" + context.pathResolver().readPath( filename ) + "\":" + r.cap( 2 );
src = "NETCDF:\"" + context.pathResolver().readPath( filename ) + "\":" + match.captured( 2 );
handled = true;
}
}
@@ -2535,38 +2540,41 @@ QString QgsRasterLayer::decodedSource( const QString &source, const QString &pro
{
// HDF4_SDS:subdataset_type:file_name:subdataset_index
// filename can be quoted with " as it can contain colons
QRegExp r( "HDF4_SDS:([^:]+):(.+):([^:]+)" );
if ( r.exactMatch( src ) )
const QRegularExpression hdf4DecodedRegExp( QRegularExpression::anchoredPattern( "HDF4_SDS:([^:]+):(.+):([^:]+)" ) );
const QRegularExpressionMatch match = hdf4DecodedRegExp.match( src );
if ( match.hasMatch() )
{
QString filename = r.cap( 2 );
QString filename = match.captured( 2 );
if ( filename.startsWith( '"' ) && filename.endsWith( '"' ) )
filename = filename.mid( 1, filename.length() - 2 );
src = "HDF4_SDS:" + r.cap( 1 ) + ":\"" + context.pathResolver().readPath( filename ) + "\":" + r.cap( 3 );
src = "HDF4_SDS:" + match.captured( 1 ) + ":\"" + context.pathResolver().readPath( filename ) + "\":" + match.captured( 3 );
handled = true;
}
}
else if ( src.startsWith( QLatin1String( "HDF5:" ) ) )
{
// HDF5:file_name:subdataset
// filename can be quoted with " as it can contain colons
QRegExp r( "HDF5:(.+):([^:]+)" );
if ( r.exactMatch( src ) )
const QRegularExpression hdf5DecodedRegExp( QRegularExpression::anchoredPattern( "HDF5:(.+):([^:]+)" ) );
const QRegularExpressionMatch match = hdf5DecodedRegExp.match( src );
if ( match.hasMatch() )
{
QString filename = r.cap( 1 );
QString filename = match.captured( 1 );
if ( filename.startsWith( '"' ) && filename.endsWith( '"' ) )
filename = filename.mid( 1, filename.length() - 2 );
src = "HDF5:\"" + context.pathResolver().readPath( filename ) + "\":" + r.cap( 2 );
src = "HDF5:\"" + context.pathResolver().readPath( filename ) + "\":" + match.captured( 2 );
handled = true;
}
}
else if ( src.contains( QRegExp( "^(NITF_IM|RADARSAT_2_CALIB):" ) ) )
else if ( src.contains( QRegularExpression( "^(NITF_IM|RADARSAT_2_CALIB):" ) ) )
{
// NITF_IM:0:filename
// RADARSAT_2_CALIB:?:filename
QRegExp r( "([^:]+):([^:]+):(.+)" );
if ( r.exactMatch( src ) )
const QRegularExpression niftRadarsatDecodedRegExp( QRegularExpression::anchoredPattern( "([^:]+):([^:]+):(.+)" ) );
const QRegularExpressionMatch match = niftRadarsatDecodedRegExp.match( src );
if ( match.hasMatch() )
{
src = r.cap( 1 ) + ':' + r.cap( 2 ) + ':' + context.pathResolver().readPath( r.cap( 3 ) );
src = match.captured( 1 ) + ':' + match.captured( 2 ) + ':' + context.pathResolver().readPath( match.captured( 3 ) );
handled = true;
}
}
@@ -21,28 +21,10 @@
* *
***************************************************************************/

#include <limits>

#include <QDir>
#include <QFile>
#include <QImage>
#include <QPainter>
#include <QPainterPath>
#include <QPolygonF>
#include <QProgressDialog>
#include <QString>
#include <QDomNode>
#include <QVector>
#include <QStringBuilder>
#include <QUrl>
#include <QUndoCommand>
#include <QUrlQuery>
#include <QUuid>

#include "qgis.h" //for globals
#include "qgssettings.h"
#include "qgsvectorlayer.h"
#include "qgsactionmanager.h"
#include "qgis.h" //for globals
#include "qgsapplication.h"
#include "qgsclipper.h"
#include "qgsconditionalstyle.h"
@@ -108,6 +90,25 @@

#include "diagram/qgsdiagram.h"

#include <QDir>
#include <QFile>
#include <QImage>
#include <QPainter>
#include <QPainterPath>
#include <QPolygonF>
#include <QProgressDialog>
#include <QString>
#include <QDomNode>
#include <QVector>
#include <QStringBuilder>
#include <QUrl>
#include <QUndoCommand>
#include <QUrlQuery>
#include <QUuid>
#include <QRegularExpression>

#include <limits>

#ifdef TESTPROVIDERLIB
#include <dlfcn.h>
#endif
@@ -1835,10 +1836,11 @@ bool QgsVectorLayer::setDataProvider( QString const &provider, const QgsDataProv
QgsDebugMsgLevel( QStringLiteral( "Beautifying layer name %1" ).arg( name() ), 3 );

// adjust the display name for postgres layers
QRegExp reg( R"lit("[^"]+"\."([^"] + )"( \([^)]+\))?)lit" );
if ( reg.indexIn( name() ) >= 0 )
const QRegularExpression reg( R"lit("[^"]+"\."([^"] + )"( \([^)]+\))?)lit" );
const QRegularExpressionMatch match = reg.match( name() );
if ( match.hasMatch() )
{
QStringList stuff = reg.capturedTexts();
QStringList stuff = match.capturedTexts();
QString lName = stuff[1];

const QMap<QString, QgsMapLayer *> &layers = QgsProject::instance()->mapLayers();
@@ -85,7 +85,7 @@ void TestQgsPalLabeling::wrapChar()
QCOMPARE( QgsPalLabeling::splitToLines( "with auto wrap", QString(), 6, false ), QStringList() << "with auto" << "wrap" );

// manual wrap character should take precedence
QCOMPARE( QgsPalLabeling::splitToLines( QStringLiteral( "with auto-wrap and manual-wrap" ), QStringLiteral( "-" ), 12, true ), QStringList() << "with auto" << "wrap and" << "manual" << "wrap" );
QCOMPARE( QgsPalLabeling::splitToLines( QStringLiteral( "with auto-wrap and manual-wrap" ), QStringLiteral( "-" ), 12, true ), QStringList() << "with" << "auto" << "wrap and" << "manual" << "wrap" );
QCOMPARE( QgsPalLabeling::splitToLines( QStringLiteral( "with auto-wrap and manual-wrap" ), QStringLiteral( "-" ), 6, false ), QStringList() << "with auto" << "wrap and" << "manual" << "wrap" );
}

0 comments on commit 557752f

Please sign in to comment.