Skip to content

Commit 60fcc4d

Browse files
committed
Merge branch 'carolinux-5605-types'
2 parents 297a94f + 99e6a62 commit 60fcc4d

File tree

7 files changed

+184
-33
lines changed

7 files changed

+184
-33
lines changed

resources/context_help/QgsDelimitedTextSourceSelect

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ be double, otherwise the type will be text.
3535
QGIS can also read the types from an OGR CSV driver compatible "csvt" file.
3636
This is a file alongside the data file, but with a "t" appended to the file name.
3737
The file should just contain one line which lists the type of each field.
38-
Valid types are "integer", "real", "string", "date", "time", and "datetime". The date, time, and datetime types are treated as strings in QGIS.
38+
Valid types are "integer", "long", "longlong", "real",
39+
"string", "date", "time", and "datetime".
40+
The date, time, and datetime types are treated as strings by the delimited text provider.
3941
Each type may be followed by a width and precision, for example "real(10.4)".
4042
The list of types are separated by commas, regardless of the delimiter used in the data file. An
4143
example of a valid format file would be:
@@ -215,9 +217,11 @@ feature id to each record which is the line number in the source file on which
215217
the record starts.
216218
</p>
217219
<p>
218-
Each attribute also has a data type, one of string (text), integer, or real number.
220+
Each attribute also has a data type, one of string (text), integer, longlong,
221+
or real number.
219222
The data type is inferred from the content of the fields - if every non blank value
220-
is a valid integer then the type is integer, otherwise if it is a valid real
223+
is a valid integer then the type is integer, otherwise if it is a valid long long
224+
nubmer then the type is longlong, otherwise if it is a valid real
221225
number then the type is real, otherwise the type is string. Note that this is
222226
based on the content of the fields - quoting fields does not change the way they
223227
are interpreted.

src/providers/delimitedtext/qgsdelimitedtextprovider.cpp

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ QStringList QgsDelimitedTextProvider::readCsvtFieldTypes( QString filename, QStr
245245
// not allowed in OGR CSVT files. Also doesn't care if int and string fields have
246246

247247
strTypeList = strTypeList.toLower();
248-
QRegExp reTypeList( "^(?:\\s*(\\\"?)(?:integer|real|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" );
248+
QRegExp reTypeList( "^(?:\\s*(\\\"?)(?:integer|real|long|longlong|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" );
249249
if ( ! reTypeList.exactMatch( strTypeList ) )
250250
{
251251
// Looks like this was supposed to be a CSVT file, so report bad formatted string
@@ -407,6 +407,7 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
407407

408408
QList<bool> isEmpty;
409409
QList<bool> couldBeInt;
410+
QList<bool> couldBeLongLong;
410411
QList<bool> couldBeDouble;
411412

412413
while ( true )
@@ -561,28 +562,46 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
561562
{
562563

563564
QString &value = parts[i];
565+
// Ignore empty fields - spreadsheet generated CSV files often
566+
// have random empty fields at the end of a row
564567
if ( value.isEmpty() )
565568
continue;
566569

567-
// try to convert attribute values to integer and double
570+
// Expand the columns to include this non empty field if necessary
568571

569572
while ( couldBeInt.size() <= i )
570573
{
571574
isEmpty.append( true );
572575
couldBeInt.append( false );
576+
couldBeLongLong.append( false );
573577
couldBeDouble.append( false );
574578
}
579+
580+
// If this column has been empty so far then initiallize it
581+
// for possible types
582+
575583
if ( isEmpty[i] )
576584
{
577585
isEmpty[i] = false;
578586
couldBeInt[i] = true;
587+
couldBeLongLong[i] = true;
579588
couldBeDouble[i] = true;
580589
}
590+
591+
// Now test for still valid possible types for the field
592+
// Types are possible until first record which cannot be parsed
593+
581594
if ( couldBeInt[i] )
582595
{
583596
value.toInt( &couldBeInt[i] );
584597
}
585-
if ( couldBeDouble[i] )
598+
599+
if ( couldBeLongLong[i] && ! couldBeInt[i] )
600+
{
601+
value.toLongLong( &couldBeLongLong[i] );
602+
}
603+
604+
if ( couldBeDouble[i] && ! couldBeLongLong[i] )
586605
{
587606
if ( ! mDecimalPoint.isEmpty() )
588607
{
@@ -620,7 +639,12 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
620639
fieldType = QVariant::Int;
621640
typeName = "integer";
622641
}
623-
else if ( csvtTypes[i] == "real" )
642+
else if ( csvtTypes[i] == "long" || csvtTypes[i]== "longlong" )
643+
{
644+
fieldType = QVariant::LongLong; //QVariant doesn't support long
645+
typeName = "longlong";
646+
}
647+
else if ( csvtTypes[i] == "real" || csvtTypes[i] == "double" )
624648
{
625649
fieldType = QVariant::Double;
626650
typeName = "double";
@@ -633,6 +657,11 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
633657
fieldType = QVariant::Int;
634658
typeName = "integer";
635659
}
660+
else if ( couldBeLongLong[i] )
661+
{
662+
fieldType = QVariant::LongLong;
663+
typeName = "longlong";
664+
}
636665
else if ( couldBeDouble[i] )
637666
{
638667
fieldType = QVariant::Double;
@@ -997,12 +1026,14 @@ bool QgsDelimitedTextProvider::setSubsetString( QString subset, bool updateFeatu
9971026

9981027
if ( valid )
9991028
{
1000-
1001-
if ( mSubsetExpression ) delete mSubsetExpression;
1029+
QgsExpression * tmpSubsetExpression = mSubsetExpression;
1030+
// using a tmp pointer to avoid the pointer being dereferenced by
1031+
// a friend class after it has been freed but before it has been
1032+
// reassigned
10021033
QString previousSubset = mSubsetString;
10031034
mSubsetString = subset;
10041035
mSubsetExpression = expression;
1005-
1036+
if ( tmpSubsetExpression ) delete tmpSubsetExpression;
10061037
// Update the feature count and extents if requested
10071038

10081039
// Usage of updateFeatureCount is a bit painful, basically expect that it

tests/src/python/test_qgsdelimitedtextprovider.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# This will get replaced with a git SHA1 when you do a git archive
1313
__revision__ = '$Format:%H$'
1414

15-
# This module provides unit test for the delimtied text provider. It uses data files in
15+
# This module provides unit test for the delimited text provider. It uses data files in
1616
# the testdata/delimitedtext directory.
1717
#
1818
# New tests can be created (or existing ones updated), but incorporating a createTest
@@ -227,7 +227,7 @@ def recordDifference( record1, record2 ):
227227
return "Field {0} differs: {1:.50} versus {2:.50}".format(k,repr(r1k),repr(r2k))
228228
for k in record2.keys():
229229
if k not in record1:
230-
return "Output contains extra field {0} is missing".format(k)
230+
return "Output contains extra field {0}".format(k)
231231
return ''
232232

233233
def runTest( file, requests, **params ):
@@ -650,6 +650,12 @@ def test_037_csvt_file_invalid_file(self):
650650
requests=None
651651
runTest(filename,requests,**params)
652652

653+
def test_038_type_inference(self):
654+
# Skip lines
655+
filename='testtypes.csv'
656+
params={'yField': 'lat', 'xField': 'lon', 'type': 'csv'}
657+
requests=None
658+
runTest(filename,requests,**params)
653659

654660
if __name__ == '__main__':
655661
unittest.main()

tests/src/python/test_qgsdelimitedtextprovider_wanted.py

Lines changed: 120 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2035,36 +2035,46 @@ def test_033_reset_subset_string():
20352035
wanted['log']=[]
20362036
return wanted
20372037

2038-
20392038
def test_034_csvt_file():
20402039
wanted={}
20412040
wanted['uri']=u'file://testcsvt.csv?geomType=none&type=csv'
2042-
wanted['fieldTypes']=['integer', 'text', 'integer', 'double', 'text', 'text', 'text']
2041+
wanted['fieldTypes']=['integer', 'text', 'integer', 'double', 'text', 'text', 'text', 'text', 'text', 'text', 'longlong', 'longlong']
20432042
wanted['data']={
20442043
2L: {
20452044
'id': u'1',
20462045
'description': u'Test csvt 1',
2047-
'f1': u'1',
2048-
'f2': u'1.2',
2049-
'f3': u'01',
2050-
'f4': u'text',
2051-
'f5': u'times',
2046+
'fint': u'1',
2047+
'freal': u'1.2',
2048+
'fstr': u'1',
2049+
'fstr_1': u'text',
2050+
'fdatetime': u'2015-03-02T12:30:00',
2051+
'fdate': u'2014-12-30',
2052+
'ftime': u'23:55',
2053+
'flong': u'-456',
2054+
'flonglong': u'-678',
2055+
'field_12': u'NULL',
20522056
'#fid': 2L,
20532057
'#geometry': 'None',
2054-
},
2058+
},
20552059
3L: {
20562060
'id': u'2',
20572061
'description': u'Test csvt 2',
2058-
'f1': u'3',
2059-
'f2': u'1.5',
2060-
'f3': u'99',
2061-
'f4': u'23.5',
2062-
'f5': u'80',
2062+
'fint': u'3',
2063+
'freal': u'1.5',
2064+
'fstr': u'99',
2065+
'fstr_1': u'23.5',
2066+
'fdatetime': u'80',
2067+
'fdate': u'2015-03-28',
2068+
'ftime': u'2014-12-30',
2069+
'flong': u'01:55',
2070+
'flonglong': u'9189304972279762602',
2071+
'field_12': u'-3123724580211819352',
20632072
'#fid': 3L,
20642073
'#geometry': 'None',
2065-
},
2066-
}
2067-
wanted['log']=[]
2074+
},
2075+
}
2076+
wanted['log']=[
2077+
]
20682078
return wanted
20692079

20702080

@@ -2165,3 +2175,97 @@ def test_037_csvt_file_invalid_file():
21652175
}
21662176
wanted['log']=[]
21672177
return wanted
2178+
2179+
def test_038_type_inference():
2180+
wanted={}
2181+
wanted['uri']=u'file://testtypes.csv?yField=lat&xField=lon&type=csv'
2182+
wanted['fieldTypes']=['text', 'double', 'double', 'text', 'text', 'integer', 'longlong', 'double', 'text']
2183+
wanted['data']={
2184+
2L: {
2185+
'id': u'line1',
2186+
'description': u'1.0',
2187+
'lon': u'1.0',
2188+
'lat': u'1.0',
2189+
'empty': u'NULL',
2190+
'text': u'NULL',
2191+
'int': u'0',
2192+
'longlong': u'0',
2193+
'real': u'NULL',
2194+
'text2': u'1',
2195+
'#fid': 2L,
2196+
'#geometry': 'POINT(1 1)',
2197+
},
2198+
3L: {
2199+
'id': u'line2',
2200+
'description': u'1.0',
2201+
'lon': u'1.0',
2202+
'lat': u'5.0',
2203+
'empty': u'NULL',
2204+
'text': u'1',
2205+
'int': u'NULL',
2206+
'longlong': u'9189304972279762602',
2207+
'real': u'1.3',
2208+
'text2': u'-4',
2209+
'#fid': 3L,
2210+
'#geometry': 'POINT(1 5)',
2211+
},
2212+
4L: {
2213+
'id': u'line3',
2214+
'description': u'5.0',
2215+
'lon': u'5.0',
2216+
'lat': u'5.0',
2217+
'empty': u'NULL',
2218+
'text': u'1xx',
2219+
'int': u'2',
2220+
'longlong': u'345',
2221+
'real': u'2.0',
2222+
'text2': u'1x',
2223+
'#fid': 4L,
2224+
'#geometry': 'POINT(5 5)',
2225+
},
2226+
5L: {
2227+
'id': u'line4',
2228+
'description': u'5.0',
2229+
'lon': u'5.0',
2230+
'lat': u'1.0',
2231+
'empty': u'NULL',
2232+
'text': u'A string',
2233+
'int': u'-3456',
2234+
'longlong': u'-3123724580211819352',
2235+
'real': u'-123.56',
2236+
'text2': u'NULL',
2237+
'#fid': 5L,
2238+
'#geometry': 'POINT(5 1)',
2239+
},
2240+
6L: {
2241+
'id': u'line5',
2242+
'description': u'3.0',
2243+
'lon': u'3.0',
2244+
'lat': u'1.0',
2245+
'empty': u'NULL',
2246+
'text': u'NULL',
2247+
'int': u'NULL',
2248+
'longlong': u'NULL',
2249+
'real': u'0.00023',
2250+
'text2': u'23',
2251+
'#fid': 6L,
2252+
'#geometry': 'POINT(3 1)',
2253+
},
2254+
7L: {
2255+
'id': u'line6',
2256+
'description': u'1.0',
2257+
'lon': u'1.0',
2258+
'lat': u'3.0',
2259+
'empty': u'NULL',
2260+
'text': u'1.5',
2261+
'int': u'9',
2262+
'longlong': u'42',
2263+
'real': u'99.0',
2264+
'text2': u'0',
2265+
'#fid': 7L,
2266+
'#geometry': 'POINT(1 3)',
2267+
},
2268+
}
2269+
wanted['log']=[
2270+
]
2271+
return wanted
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
id,description,f1,f2,f3,f4,f5
2-
1,Test csvt 1,1,1.2,01,text,times
3-
2,Test csvt 2,3,1.5,99,23.5,80
4-
1+
id,description,fint,freal,fstr,fstr,fdatetime,fdate,ftime,flong,flonglong
2+
1,Test csvt 1,1,1.2,1,text,2015-03-02T12:30:00,2014-12-30,23:55,-456,-678
3+
2,Test csvt 2,3,1.5,99,23.5,80,2015-03-28,2014-12-30,01:55,9189304972279762602,-3123724580211819352
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
integer,string,integer,real,string,string,datetime
1+
integer,string,integer,real,string,string,datetime,date,time,long,longlong
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
id,lon,lat,empty,text,int,longlong,real,text2
2+
line1,1.0,1.0,,,0,0,,1
3+
line2,1.0,5.0,,1,,9189304972279762602,1.3,-4
4+
line3,5.0,5.0,,1xx,2,345,2,1x
5+
line4,5.0,1.0,,A string,-3456,-3123724580211819352,-123.56,,
6+
line5,3.0,1.0,,,,,23e-5,23
7+
line6,1.0,3.0,,1.5,9,42,99,0

0 commit comments

Comments
 (0)