From d872b07e6e0443c045ee3365f8ad54811397e579 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 14 Sep 2020 20:49:22 +0000 Subject: [PATCH] [DelimitedText provider] Fix performance issue with files with Unix end-of-line characters --- .../delimitedtext/qgsdelimitedtextfile.cpp | 73 +++++++++++-------- .../delimitedtext/qgsdelimitedtextfile.h | 1 + 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/src/providers/delimitedtext/qgsdelimitedtextfile.cpp b/src/providers/delimitedtext/qgsdelimitedtextfile.cpp index 21ea840cbc37..2356525faf8c 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextfile.cpp +++ b/src/providers/delimitedtext/qgsdelimitedtextfile.cpp @@ -583,40 +583,63 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bo // Identify position of \r , \n or \r\n // We should rather use mStream->readLine(), but it fails to detect \r // line endings. - int eolPos = mBuffer.indexOf( '\r', mPosInBuffer ); - int nextPos = 0; - if ( eolPos >= 0 ) + int eolPos = -1; { - nextPos = eolPos + 1; - // Check if there is a \n just afterwards - if ( eolPos + 1 < mBuffer.size() ) + if ( mLineNumber == 0 ) { - if ( mBuffer[eolPos + 1] == '\n' ) + // For the first line we don't know yet the end of line character, so + // manually scan for the first we find + const QChar *charBuffer = mBuffer.constData(); + const int bufferSize = mBuffer.size(); + for ( int pos = mPosInBuffer; pos < bufferSize; ++pos ) { - nextPos = eolPos + 2; + if ( charBuffer[pos] == '\r' || charBuffer[pos] == '\n' ) + { + mFirstEOLChar = charBuffer[pos]; + eolPos = pos; + break; + } } } else { - // If we are just at the end of the buffer, read an extra character - // from the stream - QString newChar = mStream->read( 1 ); - mBuffer += newChar; - if ( newChar == '\n' ) - { - nextPos = eolPos + 2; - } + // Once we know the end of line character, use optimized indexOf() + eolPos = mBuffer.indexOf( mFirstEOLChar, mPosInBuffer ); } } - else + if ( eolPos >= 0 ) { - eolPos = mBuffer.indexOf( '\n', mPosInBuffer ); - if ( eolPos >= 0 ) + int nextPos = eolPos + 1; + if ( mBuffer[eolPos] == '\r' ) { - nextPos = eolPos + 1; + // Check if there is a \n just afterwards + if ( eolPos + 1 < mBuffer.size() ) + { + if ( mBuffer[eolPos + 1] == '\n' ) + { + nextPos = eolPos + 2; + } + } + else + { + // If we are just at the end of the buffer, read an extra character + // from the stream + QString newChar = mStream->read( 1 ); + mBuffer += newChar; + if ( newChar == '\n' ) + { + nextPos = eolPos + 2; + } + } } + + // Extract the current line from the buffer + buffer = mBuffer.mid( mPosInBuffer, eolPos - mPosInBuffer ); + // Update current position in buffer to be the one next to the end of + // line character(s) + mPosInBuffer = nextPos; } - if ( eolPos < 0 ) + else { if ( mPosInBuffer == 0 ) { @@ -637,14 +660,6 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bo continue; } } - else - { - // Extract the current line from the buffer - buffer = mBuffer.mid( mPosInBuffer, eolPos - mPosInBuffer ); - // Update current position in buffer to be the one next to the end of - // line character(s) - mPosInBuffer = nextPos; - } mLineNumber++; if ( skipBlank && buffer.isEmpty() ) continue; return RecordOk; diff --git a/src/providers/delimitedtext/qgsdelimitedtextfile.h b/src/providers/delimitedtext/qgsdelimitedtextfile.h index 786972780769..10887c0f0895 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextfile.h +++ b/src/providers/delimitedtext/qgsdelimitedtextfile.h @@ -431,6 +431,7 @@ class QgsDelimitedTextFile : public QObject QString mBuffer; int mPosInBuffer = 0; int mMaxBufferSize = 0; + QChar mFirstEOLChar = 0; // '\r' if EOL is "\r" or "\r\n", or `\n' if EOL is "\n" QStringList mCurrentRecord; bool mHoldCurrentRecord = false; // Maximum number of record (ie maximum record number visited)