Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport release-3_10] [DelimitedText provider] Fix performance issue with files with Unix end-of-line characters #38745

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 44 additions & 29 deletions src/providers/delimitedtext/qgsdelimitedtextfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,40 +583,63 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bo
// Identify position of \r , \n or \r\n
// We should rather use mStream->readLine(), but it fails to detect \r
// line endings.
int eolPos = mBuffer.indexOf( '\r', mPosInBuffer );
int nextPos = 0;
if ( eolPos >= 0 )
int eolPos = -1;
{
nextPos = eolPos + 1;
// Check if there is a \n just afterwards
if ( eolPos + 1 < mBuffer.size() )
if ( mLineNumber == 0 )
{
if ( mBuffer[eolPos + 1] == '\n' )
// For the first line we don't know yet the end of line character, so
// manually scan for the first we find
const QChar *charBuffer = mBuffer.constData();
const int bufferSize = mBuffer.size();
for ( int pos = mPosInBuffer; pos < bufferSize; ++pos )
{
nextPos = eolPos + 2;
if ( charBuffer[pos] == '\r' || charBuffer[pos] == '\n' )
{
mFirstEOLChar = charBuffer[pos];
eolPos = pos;
break;
}
}
}
else
{
// If we are just at the end of the buffer, read an extra character
// from the stream
QString newChar = mStream->read( 1 );
mBuffer += newChar;
if ( newChar == '\n' )
{
nextPos = eolPos + 2;
}
// Once we know the end of line character, use optimized indexOf()
eolPos = mBuffer.indexOf( mFirstEOLChar, mPosInBuffer );
}
}
else
if ( eolPos >= 0 )
{
eolPos = mBuffer.indexOf( '\n', mPosInBuffer );
if ( eolPos >= 0 )
int nextPos = eolPos + 1;
if ( mBuffer[eolPos] == '\r' )
{
nextPos = eolPos + 1;
// Check if there is a \n just afterwards
if ( eolPos + 1 < mBuffer.size() )
{
if ( mBuffer[eolPos + 1] == '\n' )
{
nextPos = eolPos + 2;
}
}
else
{
// If we are just at the end of the buffer, read an extra character
// from the stream
QString newChar = mStream->read( 1 );
mBuffer += newChar;
if ( newChar == '\n' )
{
nextPos = eolPos + 2;
}
}
}

// Extract the current line from the buffer
buffer = mBuffer.mid( mPosInBuffer, eolPos - mPosInBuffer );
// Update current position in buffer to be the one next to the end of
// line character(s)
mPosInBuffer = nextPos;
}
if ( eolPos < 0 )
else
{
if ( mPosInBuffer == 0 )
{
Expand All @@ -637,14 +660,6 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bo
continue;
}
}
else
{
// Extract the current line from the buffer
buffer = mBuffer.mid( mPosInBuffer, eolPos - mPosInBuffer );
// Update current position in buffer to be the one next to the end of
// line character(s)
mPosInBuffer = nextPos;
}
mLineNumber++;
if ( skipBlank && buffer.isEmpty() ) continue;
return RecordOk;
Expand Down
1 change: 1 addition & 0 deletions src/providers/delimitedtext/qgsdelimitedtextfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ class QgsDelimitedTextFile : public QObject
QString mBuffer;
int mPosInBuffer = 0;
int mMaxBufferSize = 0;
QChar mFirstEOLChar = 0; // '\r' if EOL is "\r" or "\r\n", or `\n' if EOL is "\n"
QStringList mCurrentRecord;
bool mHoldCurrentRecord = false;
// Maximum number of record (ie maximum record number visited)
Expand Down