Skip to content
Permalink
Browse files

Merge branch 'carolinux-5605-types'

  • Loading branch information
ccrook committed Mar 21, 2015
2 parents 297a94f + 99e6a62 commit 60fcc4d876f57b4f7f2f404a4c3ef7f3910e0a9f
@@ -35,7 +35,9 @@ be double, otherwise the type will be text.
QGIS can also read the types from an OGR CSV driver compatible "csvt" file.
This is a file alongside the data file, but with a "t" appended to the file name.
The file should just contain one line which lists the type of each field.
Valid types are "integer", "real", "string", "date", "time", and "datetime". The date, time, and datetime types are treated as strings in QGIS.
Valid types are "integer", "long", "longlong", "real",
"string", "date", "time", and "datetime".
The date, time, and datetime types are treated as strings by the delimited text provider.
Each type may be followed by a width and precision, for example "real(10.4)".
The list of types are separated by commas, regardless of the delimiter used in the data file. An
example of a valid format file would be:
@@ -215,9 +217,11 @@ feature id to each record which is the line number in the source file on which
the record starts.
</p>
<p>
Each attribute also has a data type, one of string (text), integer, or real number.
Each attribute also has a data type, one of string (text), integer, longlong,
or real number.
The data type is inferred from the content of the fields - if every non blank value
is a valid integer then the type is integer, otherwise if it is a valid real
is a valid integer then the type is integer, otherwise if it is a valid long long
nubmer then the type is longlong, otherwise if it is a valid real
number then the type is real, otherwise the type is string. Note that this is
based on the content of the fields - quoting fields does not change the way they
are interpreted.
@@ -245,7 +245,7 @@ QStringList QgsDelimitedTextProvider::readCsvtFieldTypes( QString filename, QStr
// not allowed in OGR CSVT files. Also doesn't care if int and string fields have

strTypeList = strTypeList.toLower();
QRegExp reTypeList( "^(?:\\s*(\\\"?)(?:integer|real|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" );
QRegExp reTypeList( "^(?:\\s*(\\\"?)(?:integer|real|long|longlong|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" );
if ( ! reTypeList.exactMatch( strTypeList ) )
{
// Looks like this was supposed to be a CSVT file, so report bad formatted string
@@ -407,6 +407,7 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )

QList<bool> isEmpty;
QList<bool> couldBeInt;
QList<bool> couldBeLongLong;
QList<bool> couldBeDouble;

while ( true )
@@ -561,28 +562,46 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
{

QString &value = parts[i];
// Ignore empty fields - spreadsheet generated CSV files often
// have random empty fields at the end of a row
if ( value.isEmpty() )
continue;

// try to convert attribute values to integer and double
// Expand the columns to include this non empty field if necessary

while ( couldBeInt.size() <= i )
{
isEmpty.append( true );
couldBeInt.append( false );
couldBeLongLong.append( false );
couldBeDouble.append( false );
}

// If this column has been empty so far then initiallize it
// for possible types

if ( isEmpty[i] )
{
isEmpty[i] = false;
couldBeInt[i] = true;
couldBeLongLong[i] = true;
couldBeDouble[i] = true;
}

// Now test for still valid possible types for the field
// Types are possible until first record which cannot be parsed

if ( couldBeInt[i] )
{
value.toInt( &couldBeInt[i] );
}
if ( couldBeDouble[i] )

if ( couldBeLongLong[i] && ! couldBeInt[i] )
{
value.toLongLong( &couldBeLongLong[i] );
}

if ( couldBeDouble[i] && ! couldBeLongLong[i] )
{
if ( ! mDecimalPoint.isEmpty() )
{
@@ -620,7 +639,12 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
fieldType = QVariant::Int;
typeName = "integer";
}
else if ( csvtTypes[i] == "real" )
else if ( csvtTypes[i] == "long" || csvtTypes[i]== "longlong" )
{
fieldType = QVariant::LongLong; //QVariant doesn't support long
typeName = "longlong";
}
else if ( csvtTypes[i] == "real" || csvtTypes[i] == "double" )
{
fieldType = QVariant::Double;
typeName = "double";
@@ -633,6 +657,11 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
fieldType = QVariant::Int;
typeName = "integer";
}
else if ( couldBeLongLong[i] )
{
fieldType = QVariant::LongLong;
typeName = "longlong";
}
else if ( couldBeDouble[i] )
{
fieldType = QVariant::Double;
@@ -997,12 +1026,14 @@ bool QgsDelimitedTextProvider::setSubsetString( QString subset, bool updateFeatu

if ( valid )
{

if ( mSubsetExpression ) delete mSubsetExpression;
QgsExpression * tmpSubsetExpression = mSubsetExpression;
// using a tmp pointer to avoid the pointer being dereferenced by
// a friend class after it has been freed but before it has been
// reassigned
QString previousSubset = mSubsetString;
mSubsetString = subset;
mSubsetExpression = expression;

if ( tmpSubsetExpression ) delete tmpSubsetExpression;
// Update the feature count and extents if requested

// Usage of updateFeatureCount is a bit painful, basically expect that it
@@ -12,7 +12,7 @@
# This will get replaced with a git SHA1 when you do a git archive
__revision__ = '$Format:%H$'

# This module provides unit test for the delimtied text provider. It uses data files in
# This module provides unit test for the delimited text provider. It uses data files in
# the testdata/delimitedtext directory.
#
# New tests can be created (or existing ones updated), but incorporating a createTest
@@ -227,7 +227,7 @@ def recordDifference( record1, record2 ):
return "Field {0} differs: {1:.50} versus {2:.50}".format(k,repr(r1k),repr(r2k))
for k in record2.keys():
if k not in record1:
return "Output contains extra field {0} is missing".format(k)
return "Output contains extra field {0}".format(k)
return ''

def runTest( file, requests, **params ):
@@ -650,6 +650,12 @@ def test_037_csvt_file_invalid_file(self):
requests=None
runTest(filename,requests,**params)

def test_038_type_inference(self):
# Skip lines
filename='testtypes.csv'
params={'yField': 'lat', 'xField': 'lon', 'type': 'csv'}
requests=None
runTest(filename,requests,**params)

if __name__ == '__main__':
unittest.main()
@@ -2035,36 +2035,46 @@ def test_033_reset_subset_string():
wanted['log']=[]
return wanted


def test_034_csvt_file():
wanted={}
wanted['uri']=u'file://testcsvt.csv?geomType=none&type=csv'
wanted['fieldTypes']=['integer', 'text', 'integer', 'double', 'text', 'text', 'text']
wanted['fieldTypes']=['integer', 'text', 'integer', 'double', 'text', 'text', 'text', 'text', 'text', 'text', 'longlong', 'longlong']
wanted['data']={
2L: {
'id': u'1',
'description': u'Test csvt 1',
'f1': u'1',
'f2': u'1.2',
'f3': u'01',
'f4': u'text',
'f5': u'times',
'fint': u'1',
'freal': u'1.2',
'fstr': u'1',
'fstr_1': u'text',
'fdatetime': u'2015-03-02T12:30:00',
'fdate': u'2014-12-30',
'ftime': u'23:55',
'flong': u'-456',
'flonglong': u'-678',
'field_12': u'NULL',
'#fid': 2L,
'#geometry': 'None',
},
},
3L: {
'id': u'2',
'description': u'Test csvt 2',
'f1': u'3',
'f2': u'1.5',
'f3': u'99',
'f4': u'23.5',
'f5': u'80',
'fint': u'3',
'freal': u'1.5',
'fstr': u'99',
'fstr_1': u'23.5',
'fdatetime': u'80',
'fdate': u'2015-03-28',
'ftime': u'2014-12-30',
'flong': u'01:55',
'flonglong': u'9189304972279762602',
'field_12': u'-3123724580211819352',
'#fid': 3L,
'#geometry': 'None',
},
}
wanted['log']=[]
},
}
wanted['log']=[
]
return wanted


@@ -2165,3 +2175,97 @@ def test_037_csvt_file_invalid_file():
}
wanted['log']=[]
return wanted

def test_038_type_inference():
wanted={}
wanted['uri']=u'file://testtypes.csv?yField=lat&xField=lon&type=csv'
wanted['fieldTypes']=['text', 'double', 'double', 'text', 'text', 'integer', 'longlong', 'double', 'text']
wanted['data']={
2L: {
'id': u'line1',
'description': u'1.0',
'lon': u'1.0',
'lat': u'1.0',
'empty': u'NULL',
'text': u'NULL',
'int': u'0',
'longlong': u'0',
'real': u'NULL',
'text2': u'1',
'#fid': 2L,
'#geometry': 'POINT(1 1)',
},
3L: {
'id': u'line2',
'description': u'1.0',
'lon': u'1.0',
'lat': u'5.0',
'empty': u'NULL',
'text': u'1',
'int': u'NULL',
'longlong': u'9189304972279762602',
'real': u'1.3',
'text2': u'-4',
'#fid': 3L,
'#geometry': 'POINT(1 5)',
},
4L: {
'id': u'line3',
'description': u'5.0',
'lon': u'5.0',
'lat': u'5.0',
'empty': u'NULL',
'text': u'1xx',
'int': u'2',
'longlong': u'345',
'real': u'2.0',
'text2': u'1x',
'#fid': 4L,
'#geometry': 'POINT(5 5)',
},
5L: {
'id': u'line4',
'description': u'5.0',
'lon': u'5.0',
'lat': u'1.0',
'empty': u'NULL',
'text': u'A string',
'int': u'-3456',
'longlong': u'-3123724580211819352',
'real': u'-123.56',
'text2': u'NULL',
'#fid': 5L,
'#geometry': 'POINT(5 1)',
},
6L: {
'id': u'line5',
'description': u'3.0',
'lon': u'3.0',
'lat': u'1.0',
'empty': u'NULL',
'text': u'NULL',
'int': u'NULL',
'longlong': u'NULL',
'real': u'0.00023',
'text2': u'23',
'#fid': 6L,
'#geometry': 'POINT(3 1)',
},
7L: {
'id': u'line6',
'description': u'1.0',
'lon': u'1.0',
'lat': u'3.0',
'empty': u'NULL',
'text': u'1.5',
'int': u'9',
'longlong': u'42',
'real': u'99.0',
'text2': u'0',
'#fid': 7L,
'#geometry': 'POINT(1 3)',
},
}
wanted['log']=[
]
return wanted
@@ -1,4 +1,3 @@
id,description,f1,f2,f3,f4,f5
1,Test csvt 1,1,1.2,01,text,times
2,Test csvt 2,3,1.5,99,23.5,80

id,description,fint,freal,fstr,fstr,fdatetime,fdate,ftime,flong,flonglong
1,Test csvt 1,1,1.2,1,text,2015-03-02T12:30:00,2014-12-30,23:55,-456,-678
2,Test csvt 2,3,1.5,99,23.5,80,2015-03-28,2014-12-30,01:55,9189304972279762602,-3123724580211819352
@@ -1 +1 @@
integer,string,integer,real,string,string,datetime
integer,string,integer,real,string,string,datetime,date,time,long,longlong
@@ -0,0 +1,7 @@
id,lon,lat,empty,text,int,longlong,real,text2
line1,1.0,1.0,,,0,0,,1
line2,1.0,5.0,,1,,9189304972279762602,1.3,-4
line3,5.0,5.0,,1xx,2,345,2,1x
line4,5.0,1.0,,A string,-3456,-3123724580211819352,-123.56,,
line5,3.0,1.0,,,,,23e-5,23
line6,1.0,3.0,,1.5,9,42,99,0

0 comments on commit 60fcc4d

Please sign in to comment.
You can’t perform that action at this time.