Skip to content

Commit

Permalink
Merge branch 'carolinux-5605-types'
Browse files Browse the repository at this point in the history
  • Loading branch information
ccrook committed Mar 21, 2015
2 parents 297a94f + 99e6a62 commit 60fcc4d
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 33 deletions.
10 changes: 7 additions & 3 deletions resources/context_help/QgsDelimitedTextSourceSelect
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ be double, otherwise the type will be text.
QGIS can also read the types from an OGR CSV driver compatible "csvt" file.
This is a file alongside the data file, but with a "t" appended to the file name.
The file should just contain one line which lists the type of each field.
Valid types are "integer", "real", "string", "date", "time", and "datetime". The date, time, and datetime types are treated as strings in QGIS.
Valid types are "integer", "long", "longlong", "real",
"string", "date", "time", and "datetime".
The date, time, and datetime types are treated as strings by the delimited text provider.
Each type may be followed by a width and precision, for example "real(10.4)".
The list of types are separated by commas, regardless of the delimiter used in the data file. An
example of a valid format file would be:
Expand Down Expand Up @@ -215,9 +217,11 @@ feature id to each record which is the line number in the source file on which
the record starts.
</p>
<p>
Each attribute also has a data type, one of string (text), integer, or real number.
Each attribute also has a data type, one of string (text), integer, longlong,
or real number.
The data type is inferred from the content of the fields - if every non blank value
is a valid integer then the type is integer, otherwise if it is a valid real
is a valid integer then the type is integer, otherwise if it is a valid long long
nubmer then the type is longlong, otherwise if it is a valid real
number then the type is real, otherwise the type is string. Note that this is
based on the content of the fields - quoting fields does not change the way they
are interpreted.
Expand Down
45 changes: 38 additions & 7 deletions src/providers/delimitedtext/qgsdelimitedtextprovider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ QStringList QgsDelimitedTextProvider::readCsvtFieldTypes( QString filename, QStr
// not allowed in OGR CSVT files. Also doesn't care if int and string fields have

strTypeList = strTypeList.toLower();
QRegExp reTypeList( "^(?:\\s*(\\\"?)(?:integer|real|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" );
QRegExp reTypeList( "^(?:\\s*(\\\"?)(?:integer|real|long|longlong|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" );
if ( ! reTypeList.exactMatch( strTypeList ) )
{
// Looks like this was supposed to be a CSVT file, so report bad formatted string
Expand Down Expand Up @@ -407,6 +407,7 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )

QList<bool> isEmpty;
QList<bool> couldBeInt;
QList<bool> couldBeLongLong;
QList<bool> couldBeDouble;

while ( true )
Expand Down Expand Up @@ -561,28 +562,46 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
{

QString &value = parts[i];
// Ignore empty fields - spreadsheet generated CSV files often
// have random empty fields at the end of a row
if ( value.isEmpty() )
continue;

// try to convert attribute values to integer and double
// Expand the columns to include this non empty field if necessary

while ( couldBeInt.size() <= i )
{
isEmpty.append( true );
couldBeInt.append( false );
couldBeLongLong.append( false );
couldBeDouble.append( false );
}

// If this column has been empty so far then initiallize it
// for possible types

if ( isEmpty[i] )
{
isEmpty[i] = false;
couldBeInt[i] = true;
couldBeLongLong[i] = true;
couldBeDouble[i] = true;
}

// Now test for still valid possible types for the field
// Types are possible until first record which cannot be parsed

if ( couldBeInt[i] )
{
value.toInt( &couldBeInt[i] );
}
if ( couldBeDouble[i] )

if ( couldBeLongLong[i] && ! couldBeInt[i] )
{
value.toLongLong( &couldBeLongLong[i] );
}

if ( couldBeDouble[i] && ! couldBeLongLong[i] )
{
if ( ! mDecimalPoint.isEmpty() )
{
Expand Down Expand Up @@ -620,7 +639,12 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
fieldType = QVariant::Int;
typeName = "integer";
}
else if ( csvtTypes[i] == "real" )
else if ( csvtTypes[i] == "long" || csvtTypes[i]== "longlong" )
{
fieldType = QVariant::LongLong; //QVariant doesn't support long
typeName = "longlong";
}
else if ( csvtTypes[i] == "real" || csvtTypes[i] == "double" )
{
fieldType = QVariant::Double;
typeName = "double";
Expand All @@ -633,6 +657,11 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
fieldType = QVariant::Int;
typeName = "integer";
}
else if ( couldBeLongLong[i] )
{
fieldType = QVariant::LongLong;
typeName = "longlong";
}
else if ( couldBeDouble[i] )
{
fieldType = QVariant::Double;
Expand Down Expand Up @@ -997,12 +1026,14 @@ bool QgsDelimitedTextProvider::setSubsetString( QString subset, bool updateFeatu

if ( valid )
{

if ( mSubsetExpression ) delete mSubsetExpression;
QgsExpression * tmpSubsetExpression = mSubsetExpression;
// using a tmp pointer to avoid the pointer being dereferenced by
// a friend class after it has been freed but before it has been
// reassigned
QString previousSubset = mSubsetString;
mSubsetString = subset;
mSubsetExpression = expression;

if ( tmpSubsetExpression ) delete tmpSubsetExpression;
// Update the feature count and extents if requested

// Usage of updateFeatureCount is a bit painful, basically expect that it
Expand Down
10 changes: 8 additions & 2 deletions tests/src/python/test_qgsdelimitedtextprovider.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# This will get replaced with a git SHA1 when you do a git archive
__revision__ = '$Format:%H$'

# This module provides unit test for the delimtied text provider. It uses data files in
# This module provides unit test for the delimited text provider. It uses data files in
# the testdata/delimitedtext directory.
#
# New tests can be created (or existing ones updated), but incorporating a createTest
Expand Down Expand Up @@ -227,7 +227,7 @@ def recordDifference( record1, record2 ):
return "Field {0} differs: {1:.50} versus {2:.50}".format(k,repr(r1k),repr(r2k))
for k in record2.keys():
if k not in record1:
return "Output contains extra field {0} is missing".format(k)
return "Output contains extra field {0}".format(k)
return ''

def runTest( file, requests, **params ):
Expand Down Expand Up @@ -650,6 +650,12 @@ def test_037_csvt_file_invalid_file(self):
requests=None
runTest(filename,requests,**params)

def test_038_type_inference(self):
# Skip lines
filename='testtypes.csv'
params={'yField': 'lat', 'xField': 'lon', 'type': 'csv'}
requests=None
runTest(filename,requests,**params)

if __name__ == '__main__':
unittest.main()
136 changes: 120 additions & 16 deletions tests/src/python/test_qgsdelimitedtextprovider_wanted.py
Original file line number Diff line number Diff line change
Expand Up @@ -2035,36 +2035,46 @@ def test_033_reset_subset_string():
wanted['log']=[]
return wanted


def test_034_csvt_file():
wanted={}
wanted['uri']=u'file://testcsvt.csv?geomType=none&type=csv'
wanted['fieldTypes']=['integer', 'text', 'integer', 'double', 'text', 'text', 'text']
wanted['fieldTypes']=['integer', 'text', 'integer', 'double', 'text', 'text', 'text', 'text', 'text', 'text', 'longlong', 'longlong']
wanted['data']={
2L: {
'id': u'1',
'description': u'Test csvt 1',
'f1': u'1',
'f2': u'1.2',
'f3': u'01',
'f4': u'text',
'f5': u'times',
'fint': u'1',
'freal': u'1.2',
'fstr': u'1',
'fstr_1': u'text',
'fdatetime': u'2015-03-02T12:30:00',
'fdate': u'2014-12-30',
'ftime': u'23:55',
'flong': u'-456',
'flonglong': u'-678',
'field_12': u'NULL',
'#fid': 2L,
'#geometry': 'None',
},
},
3L: {
'id': u'2',
'description': u'Test csvt 2',
'f1': u'3',
'f2': u'1.5',
'f3': u'99',
'f4': u'23.5',
'f5': u'80',
'fint': u'3',
'freal': u'1.5',
'fstr': u'99',
'fstr_1': u'23.5',
'fdatetime': u'80',
'fdate': u'2015-03-28',
'ftime': u'2014-12-30',
'flong': u'01:55',
'flonglong': u'9189304972279762602',
'field_12': u'-3123724580211819352',
'#fid': 3L,
'#geometry': 'None',
},
}
wanted['log']=[]
},
}
wanted['log']=[
]
return wanted


Expand Down Expand Up @@ -2165,3 +2175,97 @@ def test_037_csvt_file_invalid_file():
}
wanted['log']=[]
return wanted

def test_038_type_inference():
wanted={}
wanted['uri']=u'file://testtypes.csv?yField=lat&xField=lon&type=csv'
wanted['fieldTypes']=['text', 'double', 'double', 'text', 'text', 'integer', 'longlong', 'double', 'text']
wanted['data']={
2L: {
'id': u'line1',
'description': u'1.0',
'lon': u'1.0',
'lat': u'1.0',
'empty': u'NULL',
'text': u'NULL',
'int': u'0',
'longlong': u'0',
'real': u'NULL',
'text2': u'1',
'#fid': 2L,
'#geometry': 'POINT(1 1)',
},
3L: {
'id': u'line2',
'description': u'1.0',
'lon': u'1.0',
'lat': u'5.0',
'empty': u'NULL',
'text': u'1',
'int': u'NULL',
'longlong': u'9189304972279762602',
'real': u'1.3',
'text2': u'-4',
'#fid': 3L,
'#geometry': 'POINT(1 5)',
},
4L: {
'id': u'line3',
'description': u'5.0',
'lon': u'5.0',
'lat': u'5.0',
'empty': u'NULL',
'text': u'1xx',
'int': u'2',
'longlong': u'345',
'real': u'2.0',
'text2': u'1x',
'#fid': 4L,
'#geometry': 'POINT(5 5)',
},
5L: {
'id': u'line4',
'description': u'5.0',
'lon': u'5.0',
'lat': u'1.0',
'empty': u'NULL',
'text': u'A string',
'int': u'-3456',
'longlong': u'-3123724580211819352',
'real': u'-123.56',
'text2': u'NULL',
'#fid': 5L,
'#geometry': 'POINT(5 1)',
},
6L: {
'id': u'line5',
'description': u'3.0',
'lon': u'3.0',
'lat': u'1.0',
'empty': u'NULL',
'text': u'NULL',
'int': u'NULL',
'longlong': u'NULL',
'real': u'0.00023',
'text2': u'23',
'#fid': 6L,
'#geometry': 'POINT(3 1)',
},
7L: {
'id': u'line6',
'description': u'1.0',
'lon': u'1.0',
'lat': u'3.0',
'empty': u'NULL',
'text': u'1.5',
'int': u'9',
'longlong': u'42',
'real': u'99.0',
'text2': u'0',
'#fid': 7L,
'#geometry': 'POINT(1 3)',
},
}
wanted['log']=[
]
return wanted
7 changes: 3 additions & 4 deletions tests/testdata/delimitedtext/testcsvt.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
id,description,f1,f2,f3,f4,f5
1,Test csvt 1,1,1.2,01,text,times
2,Test csvt 2,3,1.5,99,23.5,80

id,description,fint,freal,fstr,fstr,fdatetime,fdate,ftime,flong,flonglong
1,Test csvt 1,1,1.2,1,text,2015-03-02T12:30:00,2014-12-30,23:55,-456,-678
2,Test csvt 2,3,1.5,99,23.5,80,2015-03-28,2014-12-30,01:55,9189304972279762602,-3123724580211819352
2 changes: 1 addition & 1 deletion tests/testdata/delimitedtext/testcsvt.csvt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
integer,string,integer,real,string,string,datetime
integer,string,integer,real,string,string,datetime,date,time,long,longlong
7 changes: 7 additions & 0 deletions tests/testdata/delimitedtext/testtypes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id,lon,lat,empty,text,int,longlong,real,text2
line1,1.0,1.0,,,0,0,,1
line2,1.0,5.0,,1,,9189304972279762602,1.3,-4
line3,5.0,5.0,,1xx,2,345,2,1x
line4,5.0,1.0,,A string,-3456,-3123724580211819352,-123.56,,
line5,3.0,1.0,,,,,23e-5,23
line6,1.0,3.0,,1.5,9,42,99,0

0 comments on commit 60fcc4d

Please sign in to comment.