217 changes: 193 additions & 24 deletions src/providers/delimitedtext/qgsdelimitedtextfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
#include <QRegExp>
#include <QUrl>

static QString DefaultFieldName( "field_%1" );
static QRegExp InvalidFieldRegexp( "^\\d*(\\.\\d*)?$" );
// field_ is optional in following regexp to simplify QgsDelimitedTextFile::fieldNumber()
static QRegExp DefaultFieldRegexp( "^(?:field_)?(\\d+)$", Qt::CaseInsensitive );

QgsDelimitedTextFile::QgsDelimitedTextFile( QString url ) :
mFileName( QString() ),
mEncoding( "UTF-8" ),
Expand All @@ -39,7 +44,8 @@ QgsDelimitedTextFile::QgsDelimitedTextFile( QString url ) :
mSkipLines( 0 ),
mMaxFields( 0 ),
mLineNumber( 0 ),
mRecordLineNumber( 0 )
mRecordLineNumber( 0 ),
mMaxFieldCount( 0 )
{
// The default type is CSV
setTypeCSV();
Expand Down Expand Up @@ -80,7 +86,7 @@ bool QgsDelimitedTextFile::open()
return false;
}
mStream = new QTextStream( mFile );
if ( mEncoding.isEmpty() && mEncoding != "System" )
if ( ! mEncoding.isEmpty() )
{
QTextCodec *codec = QTextCodec::codecForName( mEncoding.toAscii() );
mStream->setCodec( codec );
Expand All @@ -93,8 +99,8 @@ bool QgsDelimitedTextFile::open()
void QgsDelimitedTextFile::resetDefinition()
{
close();
mColumnNames.clear();
mMaxFields = 0;
mFieldNames.clear();
mMaxFieldCount = 0;
}

// Extract the provider definition from the url
Expand Down Expand Up @@ -145,6 +151,12 @@ bool QgsDelimitedTextFile::setFromUrl( QUrl &url )
quote = "'\"";
escape = "";
}
else if ( type == "regexp " )
{
delimiter = "";
quote = "";
escape = "";
}
}
if ( url.hasQueryItem( "delimiter" ) )
{
Expand Down Expand Up @@ -174,6 +186,10 @@ bool QgsDelimitedTextFile::setFromUrl( QUrl &url )
{
mTrimFields = ! url.queryItemValue( "trimFields" ).toUpper().startsWith( 'N' );;
}
if ( url.hasQueryItem( "maxFields" ) )
{
mMaxFields = url.queryItemValue( "maxFields" ).toInt();
}

QgsDebugMsg( "Delimited text file is: " + mFileName );
QgsDebugMsg( "Encoding is: " + mEncoding );
Expand All @@ -182,6 +198,7 @@ bool QgsDelimitedTextFile::setFromUrl( QUrl &url )
QgsDebugMsg( "Quote character is: [" + quote + "]" );
QgsDebugMsg( "Escape character is: [" + escape + "]" );
QgsDebugMsg( "Skip lines: " + QString::number( mSkipLines ) );
QgsDebugMsg( "Maximum number of fields in record: " + QString::number( mMaxFields ) );
QgsDebugMsg( "Use headers: " + QString( mUseHeader ? "Yes" : "No" ) );
QgsDebugMsg( "Discard empty fields: " + QString( mDiscardEmptyFields ? "Yes" : "No" ) );
QgsDebugMsg( "Trim fields: " + QString( mTrimFields ? "Yes" : "No" ) );
Expand Down Expand Up @@ -240,6 +257,10 @@ QUrl QgsDelimitedTextFile::url()
{
url.addQueryItem( "skipEmptyFields", "Yes" );
}
if ( mMaxFields > 0 )
{
url.addQueryItem( "maxFields", QString::number( mMaxFields ) );
}
return url;
}

Expand Down Expand Up @@ -275,12 +296,18 @@ void QgsDelimitedTextFile::setTypeRegexp( QString regexp )
resetDefinition();
mType = DelimTypeRegexp;
mDelimRegexp.setPattern( regexp );
mAnchoredRegexp = regexp.startsWith( "^" );
mParser = &QgsDelimitedTextFile::parseRegexp;
mDefinitionValid = regexp.size() > 0 && mDelimRegexp.isValid();
if ( ! mDefinitionValid )
{
QgsDebugMsg( "Invalid regular expression in delimited text file delimiter: " + regexp );
}
else if ( mAnchoredRegexp && mDelimRegexp.captureCount() == 0 )
{
mDefinitionValid = false;
QgsDebugMsg( "Invalid anchored regular expression - must have capture groups: " + regexp );
}
}

QString QgsDelimitedTextFile::decodeChars( QString chars )
Expand Down Expand Up @@ -329,18 +356,101 @@ void QgsDelimitedTextFile::setTrimFields( bool trimFields )
mTrimFields = trimFields;
}

void QgsDelimitedTextFile::setMaxFields( int maxFields )
{
resetDefinition();
mMaxFields = maxFields;
}

void QgsDelimitedTextFile::setDiscardEmptyFields( bool discardEmptyFields )
{
resetDefinition();
mDiscardEmptyFields = discardEmptyFields;
}

QStringList &QgsDelimitedTextFile::columnNames()

void QgsDelimitedTextFile::setFieldNames( const QStringList &names )
{
mFieldNames.empty();
foreach ( QString name, names )
{
bool nameOk = true;
int fieldNo = mFieldNames.size() + 1;
name = name.trimmed();

// If the name is invalid then reset it to default name
if ( InvalidFieldRegexp.exactMatch( name ) )
{
name = DefaultFieldName.arg( fieldNo );
}
// If the name looks like a default field name (field_##), then it is
// valid if the number matches its column number..
else if ( DefaultFieldRegexp.indexIn( name ) == 0 )
{
int col = DefaultFieldRegexp.capturedTexts()[1].toInt();
nameOk = col == fieldNo;
}
// Otherwise it is valid if isn't the name of an existing field...
else
{
nameOk = ! mFieldNames.contains( name, Qt::CaseInsensitive );
}
// If it is not a valid name then try appending a number to generate
// a valid name.
if ( ! nameOk )
{
int suffix = 0;
QString basename = name + "_%1";
while ( true )
{
suffix++;
name = basename.arg( suffix );
// Not ok if it is already in the name list
if ( mFieldNames.contains( name, Qt::CaseInsensitive ) ) continue;
// Not ok if it is already in proposed names
if ( names.contains( name, Qt::CaseInsensitive ) ) continue;
break;
}
}
mFieldNames.append( name );
}
}


QStringList &QgsDelimitedTextFile::fieldNames()
{
// If not yet opened then reset file to read column headers
//
if ( mUseHeader && ! mFile ) reset();
// If have read more fields than field names, then append field names
// to match the field count (will only happen if parsed some records)
if ( mMaxFieldCount > mFieldNames.size() )
{
for ( int i = mFieldNames.size() + 1; i <= mMaxFieldCount; i++ )
{
mFieldNames.append( DefaultFieldName.arg( i ) );
}
}
return mFieldNames;
}

int QgsDelimitedTextFile::fieldIndex( QString name )
{
// If not yet opened then reset file to read column headers
//
if ( mUseHeader && ! mFile ) reset();
return mColumnNames;
// Try to determine the field based on a default field name, includes
// Field_### and simple integer fields.
if ( DefaultFieldRegexp.indexIn( name ) == 0 )
{
return DefaultFieldRegexp.capturedTexts()[1].toInt() - 1;
}
for ( int i = 0; i < mFieldNames.size(); i++ )
{
if ( mFieldNames[i].compare( name, Qt::CaseInsensitive ) == 0 ) return i;
}
return -1;

}

QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextRecord( QStringList &record )
Expand Down Expand Up @@ -368,8 +478,9 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::reset()
// Read the column names
if ( mUseHeader )
{
QgsDelimitedTextFile::Status result = ( this->*mParser )( mColumnNames );
mMaxFields = mColumnNames.size();
QStringList names;
QgsDelimitedTextFile::Status result = nextRecord( names );
setFieldNames( names );
return result;
}
return RecordOk;
Expand All @@ -396,6 +507,22 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bo
return RecordEOF;
}

void QgsDelimitedTextFile::appendField( QStringList &record, QString field, bool quoted )
{
if ( mMaxFields > 0 && record.size() >= mMaxFields ) return;
if ( quoted )
{
record.append( field );
}
else
{
if ( mTrimFields ) field = field.trimmed();
if ( !( mDiscardEmptyFields && field.isEmpty() ) ) record.append( field );
}
// Keep track of maximum number of non-empty fields in a record
if ( record.size() > mMaxFieldCount && ! field.isEmpty() ) mMaxFieldCount = record.size();
}

QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QStringList &fields )
{
fields.clear();
Expand All @@ -404,12 +531,54 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QStringList &fie
if ( status != RecordOk ) return status;
mRecordLineNumber = mLineNumber;

QStringList parts = buffer.split( mDelimRegexp );
foreach ( QString f, parts )
// If match is anchored, then only interested in records which actually match
// and extract capture groups
if ( mAnchoredRegexp )
{
if ( mDelimRegexp.indexIn( buffer ) < 0 ) return RecordInvalid;
QStringList groups = mDelimRegexp.capturedTexts();
for ( int i = 1; i < groups.size(); i++ )
{
appendField( fields, groups[i] );
}
return RecordOk;
}

int pos = 0;
int size = buffer.size();
while ( true )
{
if ( mTrimFields ) f = f.trimmed();
if ( mDiscardEmptyFields && f.isEmpty() ) continue;
fields.append( f );
if ( pos >= size ) break;
int matchPos = mDelimRegexp.indexIn( buffer, pos );
// If match won't advance cursor, then need to force it along one place
// to avoid infinite loop.
int matchLen = mDelimRegexp.matchedLength();
if ( matchPos == pos && matchLen == 0 )
{
matchPos = mDelimRegexp.indexIn( buffer, pos + 1 );
matchLen = mDelimRegexp.matchedLength();
}
// If no match, then field is to end of record
if ( matchPos < 0 )
{
appendField( fields, buffer.mid( pos ) );
break;
}
// Else append up to matched string, then any capture
// groups from match
appendField( fields, buffer.mid( pos, matchPos - pos ) );
if ( mDelimRegexp.captureCount() > 0 )
{
QStringList groups = mDelimRegexp.capturedTexts();
for ( int i = 1; i < groups.size(); i++ )
{
appendField( fields, groups[i] );
}
}
// Advance the buffer pointer
pos = matchPos + matchLen;

// Quit loop if we have enough fields.
if ( mMaxFields > 0 && fields.size() >= mMaxFields ) break;
}
return RecordOk;
Expand Down Expand Up @@ -445,7 +614,11 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseQuoted( QStringList &fie
if ( quoted || escaped )
{
status = nextLine( buffer, false );
if ( status != RecordOk ) return status;
if ( status != RecordOk )
{
status = RecordInvalid;
break;
}
field.append( '\n' );
cp = 0;
cpmax = buffer.size();
Expand Down Expand Up @@ -530,12 +703,8 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseQuoted( QStringList &fie
// If it is a delimiter, then end of field...
else if ( isDelim )
{
if ( mMaxFields <= 0 || fields.size() < mMaxFields )
{
// If wasn't quoted, then trim..
if ( mTrimFields && ! ended ) field = field.trimmed();
if ( ! field.isEmpty() || ended || ! mDiscardEmptyFields ) fields.append( field );
}
appendField( fields, field, ended );

// Clear the field
field.clear();
started = false;
Expand All @@ -560,12 +729,12 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseQuoted( QStringList &fie
}
}
// If reached the end of the record, then add the last field...
if ( started && ( mMaxFields <= 0 || fields.size() < mMaxFields ) )
if ( started )
{
if ( mTrimFields && ! ended ) field = field.trimmed();
if ( ! field.isEmpty() || ended || ! mDiscardEmptyFields ) fields.append( field );
appendField( fields, field, ended );

}
return RecordOk;
return status;
}

bool QgsDelimitedTextFile::isValid()
Expand Down
73 changes: 54 additions & 19 deletions src/providers/delimitedtext/qgsdelimitedtextfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,59 +142,89 @@ class QgsDelimitedTextFile
*/
void setTypeCSV( QString delim = QString( "," ), QString quote = QString( "\"" ), QString escape = QString( "\"" ) );

/* Set the number of header lines to skip
/** Set the number of header lines to skip
* @param skiplines The maximum lines to skip
*/
void setSkipLines( int skiplines );
/* Return the number of header lines to skip
/** Return the number of header lines to skip
* @return skiplines The maximum lines to skip
*/
int skipLines()
{
return mSkipLines;
}

/* Set reading column names from the first record
* @param useheaders Column names will be read if true
/** Set reading field names from the first record
* @param useheaders Field names will be read if true
*/
void setUseHeader( bool useheader = true );
/* Return the option for reading column names from the first record
* @return useheaders Column names will be read if true
/** Return the option for reading field names from the first record
* @return useheaders Field names will be read if true
*/
bool useHeader()
{
return mUseHeader;
}

/* Set the option for dicarding empty fields
/** Set the option for dicarding empty fields
* @param useheaders Empty fields will be discarded if true
*/
void setDiscardEmptyFields( bool discardEmptyFields = true );
/* Return the option for discarding empty fields
/** Return the option for discarding empty fields
* @return useheaders Empty fields will be discarded if true
*/
bool discardEmptyFields()
{
return mDiscardEmptyFields;
}

/* Set the option for trimming whitespace from fields
/** Set the option for trimming whitespace from fields
* @param trimFields Fields will be trimmed if true
*/
void setTrimFields( bool trimFields = true );
/* Return the option for trimming empty fields
/** Return the option for trimming empty fields
* @return useheaders Empty fields will be trimmed if true
*/
bool trimFields()
{
return mTrimFields;
}

/** Return the column names read from the header, or default names
* Col## if none defined. Will open and read the head of the file
* if required, then reset..
/** Set the maximum number of fields that will be read from a record
* By default the maximum number is unlimited (0)
* @param maxFields The maximum number of fields that will be read
*/
QStringList &columnNames();
void setMaxFields( int maxFields );
/** Return the maximum number of fields that will be read
* @return maxFields The maximum number of fields that will be read
*/
int maxFields() { return mMaxFields; }

/** Set the field names
* Field names are set from QStringList. Names may be modified
* to ensure that they are unique, not empty, and do not conflict
* with default field name (field_##)
* @param names A list of proposed field names
*/
void setFieldNames( const QStringList &names );

/** Return the field names read from the header, or default names
* field_## if none defined. Will open and read the head of the file
* if required, then reset. Note that if header record record has
* not been read then the field names are empty until records have
* been read. The list may be expanded as the file is read and records
* with more fields are loaded.
* @return names A list of field names in the file
*/
QStringList &fieldNames();

/** Return the index of a names field
* @param name The name of the field to find. This will also accept an
* integer string ("1" = first field).
* @return index The zero based index of the field name, or -1 if the field
* name does not exist or cannot be inferred
*/
int fieldIndex( QString name );

/** Reads the next record from the stream splits into string fields.
* @param fields The string list to populate with the fields
Expand Down Expand Up @@ -242,9 +272,6 @@ class QgsDelimitedTextFile
*/
static QString decodeChars( QString string );




private:

/** Open the file
Expand All @@ -258,7 +285,7 @@ class QgsDelimitedTextFile
void close();

/** Reset the status if the definition is changing (eg clear
* existing column names, etc...
* existing field names, etc...
*/
void resetDefinition();

Expand All @@ -273,6 +300,12 @@ class QgsDelimitedTextFile
*/
Status nextLine( QString &buffer, bool skipBlank = false );

/** Utility routine to add a field to a record, accounting for trimming
* and discarding, and maximum field count
*/

void appendField( QStringList &record, QString field, bool quoted = false );

// Pointer to the currently selected parser
Status( QgsDelimitedTextFile::*mParser )( QStringList &fields );

Expand All @@ -292,12 +325,14 @@ class QgsDelimitedTextFile

// Parameters used by parsers
QRegExp mDelimRegexp;
bool mAnchoredRegexp;
QString mDelimChars;
QString mQuoteChar;
QString mEscapeChar;

// Information extracted from file
QStringList mColumnNames;
QStringList mFieldNames;
int mLineNumber;
int mRecordLineNumber;
int mMaxFieldCount;
};
414 changes: 211 additions & 203 deletions src/providers/delimitedtext/qgsdelimitedtextprovider.cpp

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/providers/delimitedtext/qgsdelimitedtextprovider.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,9 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider

void clearInvalidLines();
void recordInvalidLine( QString message );
void handleInvalidLines();
void reportErrors( QStringList messages = QStringList() );
void resetStream();
bool recordIsEmpty( QStringList &record );

QgsGeometry *geomFromWkt( QString &sWkt );
bool pointFromXY( QString &sX, QString &sY, QgsPoint &point );
Expand Down
134 changes: 70 additions & 64 deletions src/providers/delimitedtext/qgsdelimitedtextsourceselect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ QgsDelimitedTextSourceSelect::QgsDelimitedTextSourceSelect( QWidget * parent, Qt
QDialog( parent, fl ),
mFile( new QgsDelimitedTextFile() ),
mExampleRowCount( 20 ),
mColumnNamePrefix( "Column_" ),
mPluginKey( "/Plugin-DelimitedText" ),
mLastFileType("")
mLastFileType( "" )
{

setupUi( this );
Expand All @@ -50,18 +49,25 @@ QgsDelimitedTextSourceSelect::QgsDelimitedTextSourceSelect( QWidget * parent, Qt
buttonBox->button( QDialogButtonBox::Ok )->hide();
}

cbxEncoding->clear();
cmbEncoding->clear();
QStringList codecs;
foreach ( QByteArray codec, QTextCodec::availableCodecs() )
{
cbxEncoding->addItem( codec );
codecs.append( codec );
}
cbxEncoding->setCurrentIndex( cbxEncoding->findText( "UTF-8" ) );
codecs.sort();
foreach ( QString codec, codecs )
{
cmbEncoding->addItem( codec );
}
cmbEncoding->setCurrentIndex( cmbEncoding->findText( "UTF-8" ) );
loadSettings();

updateFieldsAndEnable();

connect( txtFilePath, SIGNAL( textChanged( QString ) ), this, SLOT( updateFileName() ) );
connect( txtLayerName, SIGNAL( textChanged( QString ) ), this, SLOT( enableAccept() ) );
connect( cmbEncoding, SIGNAL( currentIndexChanged( int ) ), this, SLOT( updateFieldsAndEnable() ) );

connect( delimiterCSV, SIGNAL( toggled( bool ) ), this, SLOT( updateFieldsAndEnable() ) );
connect( delimiterChars, SIGNAL( toggled( bool ) ), this, SLOT( updateFieldsAndEnable() ) );
Expand Down Expand Up @@ -137,8 +143,6 @@ void QgsDelimitedTextSourceSelect::on_buttonBox_accepted()

QUrl url = mFile->url();

bool useHeader = mFile->useHeader();

if ( cbxPointIsComma->isChecked() )
{
url.addQueryItem( "decimalPoint", "," );
Expand All @@ -153,10 +157,8 @@ void QgsDelimitedTextSourceSelect::on_buttonBox_accepted()
if ( !cmbXField->currentText().isEmpty() && !cmbYField->currentText().isEmpty() )
{
QString field = cmbXField->currentText();
if ( ! useHeader ) field.remove( mColumnNamePrefix );
url.addQueryItem( "xField", field );
field = cmbYField->currentText();
if ( ! useHeader ) field.remove( mColumnNamePrefix );
url.addQueryItem( "yField", field );
}
}
Expand All @@ -165,7 +167,6 @@ void QgsDelimitedTextSourceSelect::on_buttonBox_accepted()
if ( ! cmbWktField->currentText().isEmpty() )
{
QString field = cmbWktField->currentText();
if ( ! useHeader ) field.remove( mColumnNamePrefix );
url.addQueryItem( "wktField", field );
}
if ( cmbGeometryType->currentIndex() > 0 )
Expand Down Expand Up @@ -249,7 +250,7 @@ void QgsDelimitedTextSourceSelect::loadSettings( QString subkey, bool loadGeomSe
}

QString encoding = settings.value( key + "/encoding", "" ).toString();
if ( ! encoding.isEmpty() ) cbxEncoding->setCurrentIndex( cbxEncoding->findText( encoding ) );
if ( ! encoding.isEmpty() ) cmbEncoding->setCurrentIndex( cmbEncoding->findText( encoding ) );
QString delimiters = settings.value( key + "/delimiters", "" ).toString();
if ( ! delimiters.isEmpty() ) setSelectedChars( delimiters );

Expand Down Expand Up @@ -281,7 +282,7 @@ void QgsDelimitedTextSourceSelect::saveSettings( QString subkey, bool saveGeomSe
QSettings settings;
QString key = mPluginKey;
if ( ! subkey.isEmpty() ) key.append( "/" ).append( subkey );
settings.setValue( key + "/encoding", cbxEncoding->currentText() );
settings.setValue( key + "/encoding", cmbEncoding->currentText() );
settings.setValue( key + "/geometry", saveGeometry() );

if ( delimiterCSV->isChecked() )
Expand Down Expand Up @@ -314,9 +315,9 @@ void QgsDelimitedTextSourceSelect::loadSettingsForFile( QString filename )
{
if ( filename.isEmpty() ) return;
QFileInfo fi( filename );
QString filetype=fi.suffix();
QString filetype = fi.suffix();
// Don't expect to change settings if not changing file type
if( filetype != mLastFileType ) loadSettings( fi.suffix(), true );
if ( filetype != mLastFileType ) loadSettings( fi.suffix(), true );
mLastFileType = filetype;
}

Expand All @@ -331,7 +332,7 @@ void QgsDelimitedTextSourceSelect::saveSettingsForFile( QString filename )
bool QgsDelimitedTextSourceSelect::loadDelimitedFileDefinition()
{
mFile->setFileName( txtFilePath->text() );
mFile->setEncoding( cbxEncoding->currentText() );
mFile->setEncoding( cmbEncoding->currentText() );
if ( delimiterChars->isChecked() )
{
mFile->setTypeCSV( selectedChars(), txtQuoteChars->text(), txtEscapeChars->text() );
Expand Down Expand Up @@ -383,27 +384,13 @@ void QgsDelimitedTextSourceSelect::updateFieldLists()
if ( ! loadDelimitedFileDefinition() )
return;

bool useHeader = mFile->useHeader();
QStringList fieldList;
QList<bool> isValidNumber;
QList<bool> isValidWkt;
QList<bool> isEmpty;

if ( useHeader )
{
fieldList = mFile->columnNames();
tblSample->setColumnCount( fieldList.size() );
tblSample->resizeColumnsToContents();
for ( int i = 0; i < fieldList.size(); i++ )
{
isValidNumber.append( false );
isValidWkt.append( false );
isEmpty.append( true );
}
}
// Put a sample set of records into the sample box. Also while scanning assess suitability of
// fields for use as coordinate and WKT fields

// put a lines into the sample box

QList<bool> isValidCoordinate;
QList<bool> isValidWkt;
QList<bool> isEmpty;
int counter = 0;
QStringList values;
QRegExp wktre( "^\\s*(?:MULTI)?(?:POINT|LINESTRING|POLYGON)\\s*Z?\\s*M?\\(", Qt::CaseInsensitive );
Expand All @@ -415,24 +402,20 @@ void QgsDelimitedTextSourceSelect::updateFieldLists()
if ( status != QgsDelimitedTextFile::RecordOk ) continue;
counter++;

// If don't have headers, then check column count and expand if necessary
// Don't count blank columns
// Look at count of non-blank fields

int nv = values.size();
while ( nv > 0 && values[nv-1].isEmpty() ) nv--;

if ( nv > fieldList.size() )
if ( isEmpty.size() < nv )
{
while ( fieldList.size() < nv )
while ( isEmpty.size() < nv )
{
int nc = fieldList.size();
QString column = mColumnNamePrefix + QString::number( nc + 1 );
fieldList.append( column );
isEmpty.append( true );
isValidNumber.append( false );
isValidCoordinate.append( false );
isValidWkt.append( false );
}
tblSample->setColumnCount( fieldList.size() );
tblSample->setColumnCount( nv );
}

tblSample->setRowCount( counter );
Expand All @@ -449,10 +432,10 @@ void QgsDelimitedTextSourceSelect::updateFieldLists()
if ( isEmpty[i] )
{
isEmpty[i] = false;
isValidNumber[i] = true;
isValidCoordinate[i] = true;
isValidWkt[i] = true;
}
if ( isValidNumber[i] )
if ( isValidCoordinate[i] )
{
bool ok = true;
if ( cbxPointIsComma->isChecked() )
Expand All @@ -467,7 +450,7 @@ void QgsDelimitedTextSourceSelect::updateFieldLists()
{
value.toDouble( &ok );
}
isValidNumber[i] = ok;
isValidCoordinate[i] = ok;
}
if ( isValidWkt[i] )
{
Expand All @@ -478,6 +461,19 @@ void QgsDelimitedTextSourceSelect::updateFieldLists()
}
}

QStringList fieldList = mFile->fieldNames();

if ( isEmpty.size() < fieldList.size() )
{
while ( isEmpty.size() < fieldList.size() )
{
isEmpty.append( true );
isValidCoordinate.append( false );
isValidWkt.append( false );
}
tblSample->setColumnCount( fieldList.size() );
}

tblSample->setHorizontalHeaderLabels( fieldList );
tblSample->resizeColumnsToContents();
tblSample->resizeRowsToContents();
Expand Down Expand Up @@ -508,11 +504,11 @@ void QgsDelimitedTextSourceSelect::updateFieldLists()
// Now try setting optional X,Y fields - will only reset the fields if
// not already set.

trySetXYField( fieldList, isValidNumber, "longitude", "latitude" );
trySetXYField( fieldList, isValidNumber, "lon", "lat" );
trySetXYField( fieldList, isValidNumber, "east", "north" );
trySetXYField( fieldList, isValidNumber, "x", "y" );
trySetXYField( fieldList, isValidNumber, "e", "n" );
trySetXYField( fieldList, isValidCoordinate, "longitude", "latitude" );
trySetXYField( fieldList, isValidCoordinate, "lon", "lat" );
trySetXYField( fieldList, isValidCoordinate, "east", "north" );
trySetXYField( fieldList, isValidCoordinate, "x", "y" );
trySetXYField( fieldList, isValidCoordinate, "e", "n" );

// And also a WKT field if there is one

Expand Down Expand Up @@ -581,7 +577,7 @@ bool QgsDelimitedTextSourceSelect::trySetXYField( QStringList &fields, QList<boo
if ( ! fields.contains( yfield, Qt::CaseInsensitive ) ) continue;
for ( int iy = 0; iy < fields.size(); iy++ )
{
if ( ! isValidNumber[i] ) continue;
if ( ! isValidNumber[iy] ) continue;
if ( iy == i ) continue;
if ( fields[iy].compare( yfield, Qt::CaseInsensitive ) == 0 )
{
Expand Down Expand Up @@ -639,19 +635,11 @@ void QgsDelimitedTextSourceSelect::updateFileName()

void QgsDelimitedTextSourceSelect::updateFieldsAndEnable()
{
// Check the regular expression is valid
lblRegexpError->setText( "" );
if ( delimiterRegexp->isChecked() )
{
QRegExp re( txtDelimiterRegexp->text() );
if ( ! re.isValid() ) lblRegexpError->setText( tr( "Expression is not valid" ) );
}

updateFieldLists();
enableAccept();
}

void QgsDelimitedTextSourceSelect::enableAccept()
bool QgsDelimitedTextSourceSelect::validate()
{
// Check that input data is valid - provide a status message if not..

Expand All @@ -674,9 +662,23 @@ void QgsDelimitedTextSourceSelect::enableAccept()
{
message = tr( "At least one delimiter character must be specified" );
}
else if ( delimiterRegexp->isChecked() && ! QRegExp( txtDelimiterRegexp->text() ).isValid() )

if ( message.isEmpty() && delimiterRegexp->isChecked() )
{
QRegExp re( txtDelimiterRegexp->text() );
if ( ! re.isValid() )
{
message = tr( "Regular expression is not valid" );
}
else if ( re.pattern().startsWith( "^" ) && re.captureCount() == 0 )
{
message = tr( "^.. expression needs capture groups" );
}
lblRegexpError->setText( message );
}
if ( ! message.isEmpty() )
{
message = tr( "Regular expression is not valid" );
// continue...
}
// Hopefully won't hit this none-specific message, but just in case ...
else if ( ! mFile->isValid() )
Expand Down Expand Up @@ -704,8 +706,12 @@ void QgsDelimitedTextSourceSelect::enableAccept()
{
enabled = true;
}

lblStatus->setText( message );
return enabled;
}

void QgsDelimitedTextSourceSelect::enableAccept()
{
bool enabled = validate();
buttonBox->button( QDialogButtonBox::Ok )->setEnabled( enabled );
}
2 changes: 1 addition & 1 deletion src/providers/delimitedtext/qgsdelimitedtextsourceselect.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class QgsDelimitedTextSourceSelect : public QDialog, private Ui::QgsDelimitedTex
private:
QgsDelimitedTextFile *mFile;
int mExampleRowCount;
QString mColumnNamePrefix;
QString mPluginKey;
QString mLastFileType;

Expand All @@ -67,6 +66,7 @@ class QgsDelimitedTextSourceSelect : public QDialog, private Ui::QgsDelimitedTex
void updateFileName();
void updateFieldsAndEnable();
void enableAccept();
bool validate();

signals:
void addVectorLayer( QString, QString, QString );
Expand Down
97 changes: 76 additions & 21 deletions src/ui/qgsdelimitedtextsourceselectbase.ui
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
</iconset>
</property>
<property name="toolTip">
<string>Number fields use comma for a decimal separator</string>
<string/>
</property>
<property name="statusTip">
<string>Number fields use comma for a decimal separator</string>
<string/>
</property>
<property name="whatsThis">
<string>Number fields use comma for a decimal separator</string>
<string/>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="1" column="0">
Expand All @@ -52,7 +52,7 @@
</sizepolicy>
</property>
<property name="toolTip">
<string>Each line in the file is split using a regular expression to define the end of each field</string>
<string>Each line in the file is split using a regular expression to define the end of each field</string>
</property>
<property name="statusTip">
<string>Each line in the file is split using a regular expression to define the end of each field</string>
Expand All @@ -68,13 +68,13 @@
<item row="2" column="0">
<widget class="QRadioButton" name="delimiterChars">
<property name="toolTip">
<string>Fields are delimited by specified delimiters with quote and escape characters</string>
<string>Fields are defined by the specified delimiter, quote, and escape characters</string>
</property>
<property name="statusTip">
<string>Fields are delimited by specified delimiters with quote and escape characters</string>
<string>Fields are defined by the specified delimiter, quote, and escape characters</string>
</property>
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Fields are delimited by specified delimiters with quote and escape characters&lt;/p&gt;&lt;p&gt;Fields quoted using a quote characters may contain delimiters and new lines&lt;/p&gt;&lt;p&gt;The escape character can escape new lines and quotes within quoted fields&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
<string>Fields are defined by the specified delimiter, quote, and escape characters</string>
</property>
<property name="text">
<string>Selected delimiters</string>
Expand Down Expand Up @@ -122,6 +122,15 @@
</property>
<item>
<widget class="QCheckBox" name="cbxDelimComma">
<property name="toolTip">
<string>Comma character is one of the delimiters</string>
</property>
<property name="statusTip">
<string>Comma character is one of the delimiters</string>
</property>
<property name="whatsThis">
<string>Comma character is one of the delimiters</string>
</property>
<property name="text">
<string>Comma</string>
</property>
Expand All @@ -132,6 +141,12 @@
<property name="toolTip">
<string>Tab character is one of the delimiters</string>
</property>
<property name="statusTip">
<string>Tab character is one of the delimiters</string>
</property>
<property name="whatsThis">
<string>Tab character is one of the delimiters</string>
</property>
<property name="text">
<string>Tab</string>
</property>
Expand All @@ -145,6 +160,12 @@
<property name="toolTip">
<string>Space character is one of the delimiters</string>
</property>
<property name="statusTip">
<string>Space character is one of the delimiters</string>
</property>
<property name="whatsThis">
<string>Space character is one of the delimiters</string>
</property>
<property name="text">
<string>Space</string>
</property>
Expand All @@ -155,13 +176,31 @@
</item>
<item>
<widget class="QCheckBox" name="cbxDelimColon">
<property name="toolTip">
<string>Colon character is one of the delimiters</string>
</property>
<property name="statusTip">
<string>Colon character is one of the delimiters</string>
</property>
<property name="whatsThis">
<string>Colon character is one of the delimiters</string>
</property>
<property name="text">
<string>Colon</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cbxDelimSemicolon">
<property name="toolTip">
<string>Semicolon character is one of the delimiters</string>
</property>
<property name="statusTip">
<string>Semicolon character is one of the delimiters</string>
</property>
<property name="whatsThis">
<string>Semicolon character is one of the delimiters</string>
</property>
<property name="text">
<string>Semicolon</string>
</property>
Expand Down Expand Up @@ -545,10 +584,13 @@
</size>
</property>
<property name="toolTip">
<string>Name of the field containing y values</string>
<string>Name of the field containing well known text value</string>
</property>
<property name="statusTip">
<string>Name of the field containing well known text value</string>
</property>
<property name="whatsThis">
<string>Name of the field containing y values. Choose a field from the list. The list is generated by parsing the header row of the delimited text file.</string>
<string>Name of the field containing well known text value</string>
</property>
<property name="editable">
<bool>false</bool>
Expand Down Expand Up @@ -742,7 +784,20 @@
</widget>
</item>
<item>
<widget class="QComboBox" name="cbxEncoding"/>
<widget class="QComboBox" name="cmbEncoding">
<property name="toolTip">
<string>Select the file encoding</string>
</property>
<property name="statusTip">
<string>Select the file encoding</string>
</property>
<property name="whatsThis">
<string>Select the file encoding</string>
</property>
<property name="insertPolicy">
<enum>QComboBox::InsertAtTop</enum>
</property>
</widget>
</item>
</layout>
</item>
Expand Down Expand Up @@ -829,13 +884,13 @@
</size>
</property>
<property name="toolTip">
<string>The number of lines at the beginning of the file to ignore</string>
<string>The number of lines to discard from the beginning of the file</string>
</property>
<property name="statusTip">
<string>The number of lines at the beginning of the file to ignore</string>
<string>The number of lines to discard from the beginning of the file</string>
</property>
<property name="whatsThis">
<string>The number of lines at the beginning of the file to ignore</string>
<string>The number of lines to discard from the beginning of the file</string>
</property>
<property name="wrapping">
<bool>false</bool>
Expand All @@ -851,13 +906,13 @@
<item>
<widget class="QCheckBox" name="cbxUseHeader">
<property name="toolTip">
<string>Field names are read from the first line. If not selected then fields are numbered</string>
<string>Field names are read from the first record. If not selected then fields are numbered</string>
</property>
<property name="statusTip">
<string>Field names are read from the first line. If not selected then fields are numbered</string>
<string>Field names are read from the first record. If not selected then fields are numbered</string>
</property>
<property name="whatsThis">
<string>Field names are read from the first line. If not selected then fields are numbered</string>
<string>Field names are read from the first record. If not selected then fields are numbered</string>
</property>
<property name="text">
<string>First record has field names</string>
Expand Down Expand Up @@ -929,16 +984,16 @@
<bool>true</bool>
</property>
<property name="toolTip">
<string>X and Y coordinates have comma for decimal separator</string>
<string>Number fields use comma for a decimal separator</string>
</property>
<property name="statusTip">
<string>X and Y coordinates have comma for decimal separator</string>
<string>Number fields use comma for a decimal separator</string>
</property>
<property name="whatsThis">
<string>X and Y coordinates have comma for decimal separator</string>
<string>Number fields use comma for a decimal separator</string>
</property>
<property name="text">
<string>Decimal point is comma</string>
<string>Decimal separator is comma</string>
</property>
</widget>
</item>
Expand Down Expand Up @@ -966,7 +1021,7 @@
<tabstop>txtFilePath</tabstop>
<tabstop>btnBrowseForFile</tabstop>
<tabstop>txtLayerName</tabstop>
<tabstop>cbxEncoding</tabstop>
<tabstop>cmbEncoding</tabstop>
<tabstop>delimiterCSV</tabstop>
<tabstop>delimiterChars</tabstop>
<tabstop>cbxDelimComma</tabstop>
Expand Down
453 changes: 418 additions & 35 deletions tests/src/python/test_qgsdelimitedtextprovider.py

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions tests/testdata/delimitedtext/test.badquote
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id,description,data,info
1,Unclosed quotes 1,"Quoted,data1
2,Unclosed quotes 2,"Quoted,data2",info2
3,Recovered after unclosed quore,"Data ok",inf3
4,Unclosed quotes to end of file,"Never ending field ...
3 changes: 3 additions & 0 deletions tests/testdata/delimitedtext/testfields.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,"description",data,,data,28,24.5,field_3,data_1
1,Generation of field names,Some data,Some info,,,,,,,,last data

2 changes: 2 additions & 0 deletions tests/testdata/delimitedtext/testlatin1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id|description|name
1|Correctly read latin1 encoding|This test is �
3 changes: 3 additions & 0 deletions tests/testdata/delimitedtext/testre.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
idREGEXPdescriptionREGEXPdataREGEXPinfo
1REGEXP Basic regular expression test REGEXP data1 REGEXP info
2REGEXP Basic regular expression test 2 RE data2 RE info2
4 changes: 4 additions & 0 deletions tests/testdata/delimitedtext/testre2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id description information
1 Anchored regexp Some data
2 Anchored regexp invalid
3 Anchored regexp recovered Some data
2 changes: 2 additions & 0 deletions tests/testdata/delimitedtext/testre3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
small
fi..ile
2 changes: 2 additions & 0 deletions tests/testdata/delimitedtext/testutf8.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id|description|name
1|Correctly read UTF8 encoding|Field has āccèntéd text