Skip to content

Commit 4f53135

Browse files
committed
[OGR provider] Performance improvements on huge GeoPackage database (fixes #18402)
- Introduce an approximate feature count for GeoPackage in case there are at least 100,000 rows in a table. - Add a super fast implementation of GetExtent() for GeoPackage when there is an RTree - Do not require feature count when enumerating layers from the browser
1 parent e5e966b commit 4f53135

File tree

4 files changed

+237
-7
lines changed

4 files changed

+237
-7
lines changed

src/providers/ogr/qgsogrdataitems.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ QList<QgsOgrDbLayerInfo *> QgsOgrLayerItem::subLayers( const QString &path, cons
172172
{
173173
// Collect mixed-geom layers
174174
QMultiMap<int, QStringList> subLayersMap;
175-
const QStringList subLayersList( layer.dataProvider()->subLayers( ) );
175+
QgsOgrProvider *ogrProvider = qobject_cast<QgsOgrProvider *>( layer.dataProvider() );
176+
const QStringList subLayersList( ogrProvider->subLayersWithoutFeatureCount( ) );
176177
QMap< QString, int > mapLayerNameToCount;
177178
bool uniqueNames = true;
178179
int prevIdx = -1;

src/providers/ogr/qgsogrprovider.cpp

+194-5
Original file line numberDiff line numberDiff line change
@@ -705,7 +705,7 @@ static OGRwkbGeometryType ogrWkbGeometryTypeFromName( const QString &typeName )
705705
return wkbUnknown;
706706
}
707707

708-
void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer ) const
708+
void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer, bool withFeatureCount ) const
709709
{
710710
QgsOgrFeatureDefn &fdef = layer->GetLayerDefn();
711711
// Get first column name,
@@ -731,7 +731,7 @@ void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer
731731

732732
if ( wkbFlatten( layerGeomType ) != wkbUnknown )
733733
{
734-
int layerFeatureCount = layer->GetFeatureCount();
734+
int layerFeatureCount = withFeatureCount ? layer->GetApproxFeatureCount() : -1;
735735

736736
QString geom = ogrWkbGeometryTypeName( layerGeomType );
737737

@@ -822,6 +822,16 @@ void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer
822822
}
823823

824824
QStringList QgsOgrProvider::subLayers() const
825+
{
826+
return _subLayers( true );
827+
}
828+
829+
QStringList QgsOgrProvider::subLayersWithoutFeatureCount() const
830+
{
831+
return _subLayers( false );
832+
}
833+
834+
QStringList QgsOgrProvider::_subLayers( bool withFeatureCount ) const
825835
{
826836
if ( !mValid )
827837
{
@@ -833,7 +843,7 @@ QStringList QgsOgrProvider::subLayers() const
833843

834844
if ( mOgrLayer && ( mIsSubLayer || layerCount() == 1 ) )
835845
{
836-
addSubLayerDetailsToSubLayerList( mLayerIndex, mOgrLayer );
846+
addSubLayerDetailsToSubLayerList( mLayerIndex, mOgrLayer, withFeatureCount );
837847
}
838848
else
839849
{
@@ -856,7 +866,7 @@ QStringList QgsOgrProvider::subLayers() const
856866
if ( !layer )
857867
continue;
858868

859-
addSubLayerDetailsToSubLayerList( i, layer.get() );
869+
addSubLayerDetailsToSubLayerList( i, layer.get(), withFeatureCount );
860870
if ( firstLayer == nullptr )
861871
{
862872
firstLayer = std::move( layer );
@@ -3838,7 +3848,7 @@ void QgsOgrProvider::recalculateFeatureCount()
38383848
// so we remove it if there's any and then put it back
38393849
if ( mOgrGeometryTypeFilter == wkbUnknown )
38403850
{
3841-
mFeaturesCounted = mOgrLayer->GetFeatureCount( true );
3851+
mFeaturesCounted = mOgrLayer->GetApproxFeatureCount();
38423852
if ( mFeaturesCounted == -1 )
38433853
{
38443854
mFeaturesCounted = QgsVectorDataProvider::UnknownCount;
@@ -5131,9 +5141,188 @@ GIntBig QgsOgrLayer::GetFeatureCount( bool force )
51315141
return OGR_L_GetFeatureCount( hLayer, force );
51325142
}
51335143

5144+
GIntBig QgsOgrLayer::GetApproxFeatureCount()
5145+
{
5146+
QMutexLocker locker( &ds->mutex );
5147+
5148+
// OGR_L_GetFeatureCount() can be super slow on huge geopackage files
5149+
// so implement some approximation strategy that has reasonable runtime.
5150+
QString driverName = GDALGetDriverShortName( GDALGetDatasetDriver( ds->hDS ) );
5151+
if ( driverName == QLatin1String( "GPKG" ) )
5152+
{
5153+
CPLPushErrorHandler( CPLQuietErrorHandler );
5154+
OGRLayerH hSqlLayer = GDALDatasetExecuteSQL(
5155+
ds->hDS, "SELECT 1 FROM gpkg_ogr_contents LIMIT 0", nullptr, nullptr );
5156+
CPLPopErrorHandler();
5157+
if ( hSqlLayer )
5158+
{
5159+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5160+
return OGR_L_GetFeatureCount( hLayer, TRUE );
5161+
}
5162+
5163+
// Enumerate features up to a limit of 100000.
5164+
const GIntBig nLimit = CPLAtoGIntBig(
5165+
CPLGetConfigOption( "QGIS_GPKG_FC_THRESHOLD", "100000" ) );
5166+
QByteArray layerName = OGR_L_GetName( hLayer );
5167+
QByteArray sql( "SELECT COUNT(*) FROM (SELECT 1 FROM " );
5168+
sql += QgsOgrProviderUtils::quotedIdentifier( layerName, driverName );
5169+
sql += " LIMIT ";
5170+
sql += CPLSPrintf( CPL_FRMT_GIB, nLimit );
5171+
sql += ")";
5172+
hSqlLayer = GDALDatasetExecuteSQL( ds->hDS, sql, nullptr, nullptr );
5173+
GIntBig res = -1;
5174+
if ( hSqlLayer )
5175+
{
5176+
gdal::ogr_feature_unique_ptr fet( OGR_L_GetNextFeature( hSqlLayer ) );
5177+
if ( fet )
5178+
{
5179+
res = OGR_F_GetFieldAsInteger64( fet.get(), 0 );
5180+
}
5181+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5182+
}
5183+
if ( res >= 0 && res < nLimit )
5184+
{
5185+
// Less than 100000 features ? This is the final count
5186+
return res;
5187+
}
5188+
if ( res == nLimit )
5189+
{
5190+
// If we reach the threshold, then use the min and max values of the rowid
5191+
// hoping there are not a lot of holes.
5192+
// Do it in 2 separate SQL queries otherwise SQLite apparently does a
5193+
// full table scan...
5194+
sql = "SELECT MAX(ROWID) FROM ";
5195+
sql += QgsOgrProviderUtils::quotedIdentifier( layerName, driverName );
5196+
hSqlLayer = GDALDatasetExecuteSQL( ds->hDS, sql, nullptr, nullptr );
5197+
GIntBig maxrowid = -1;
5198+
if ( hSqlLayer )
5199+
{
5200+
gdal::ogr_feature_unique_ptr fet( OGR_L_GetNextFeature( hSqlLayer ) );
5201+
if ( fet )
5202+
{
5203+
maxrowid = OGR_F_GetFieldAsInteger64( fet.get(), 0 );
5204+
}
5205+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5206+
}
5207+
5208+
sql = "SELECT MIN(ROWID) FROM ";
5209+
sql += QgsOgrProviderUtils::quotedIdentifier( layerName, driverName );
5210+
hSqlLayer = GDALDatasetExecuteSQL( ds->hDS, sql, nullptr, nullptr );
5211+
GIntBig minrowid = 0;
5212+
if ( hSqlLayer )
5213+
{
5214+
gdal::ogr_feature_unique_ptr fet( OGR_L_GetNextFeature( hSqlLayer ) );
5215+
if ( fet )
5216+
{
5217+
minrowid = OGR_F_GetFieldAsInteger64( fet.get(), 0 );
5218+
}
5219+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5220+
}
5221+
5222+
if ( maxrowid >= minrowid )
5223+
{
5224+
return maxrowid - minrowid + 1;
5225+
}
5226+
}
5227+
}
5228+
5229+
return OGR_L_GetFeatureCount( hLayer, TRUE );
5230+
}
5231+
5232+
static bool findMinOrMax( GDALDatasetH hDS, const QByteArray &rtreeName,
5233+
const char *varName, bool isMin, double &val )
5234+
{
5235+
// We proceed by dichotomic search since unfortunately SELECT MIN(minx)
5236+
// in a RTree is a slow operation
5237+
double minval = -1e10;
5238+
double maxval = 1e10;
5239+
val = 0.0;
5240+
double oldval = 0.0;
5241+
for ( int i = 0; i < 100 && maxval - minval > 1e-15; i++ )
5242+
{
5243+
val = ( minval + maxval ) / 2;
5244+
if ( i > 0 && val == oldval )
5245+
{
5246+
break;
5247+
}
5248+
oldval = val;
5249+
QByteArray sql = "SELECT 1 FROM ";
5250+
sql += rtreeName;
5251+
sql += " WHERE ";
5252+
sql += varName;
5253+
sql += isMin ? " < " : " > ";
5254+
sql += CPLSPrintf( "%.18g", val );
5255+
sql += " LIMIT 1";
5256+
auto hSqlLayer = GDALDatasetExecuteSQL(
5257+
hDS, sql, nullptr, nullptr );
5258+
GIntBig count = -1;
5259+
if ( hSqlLayer )
5260+
{
5261+
count = OGR_L_GetFeatureCount( hSqlLayer, true );
5262+
GDALDatasetReleaseResultSet( hDS, hSqlLayer );
5263+
}
5264+
if ( count < 0 )
5265+
{
5266+
return false;
5267+
}
5268+
if ( ( isMin && count == 0 ) || ( !isMin && count == 1 ) )
5269+
{
5270+
minval = val;
5271+
}
5272+
else
5273+
{
5274+
maxval = val;
5275+
}
5276+
}
5277+
return true;
5278+
}
5279+
51345280
OGRErr QgsOgrLayer::GetExtent( OGREnvelope *psExtent, bool bForce )
51355281
{
51365282
QMutexLocker locker( &ds->mutex );
5283+
5284+
// OGR_L_GetExtent() can be super slow on huge geopackage files
5285+
// so implement some approximation strategy that has reasonable runtime.
5286+
// Actually this should return a rather accurante answer.
5287+
QString driverName = GDALGetDriverShortName( GDALGetDatasetDriver( ds->hDS ) );
5288+
if ( driverName == QLatin1String( "GPKG" ) )
5289+
{
5290+
QByteArray layerName = OGR_L_GetName( hLayer );
5291+
QByteArray rtreeName =
5292+
QgsOgrProviderUtils::quotedIdentifier( "rtree_" + layerName + "_" + OGR_L_GetGeometryColumn( hLayer ), driverName );
5293+
5294+
// Check if there is a non-empty RTree
5295+
QByteArray sql( "SELECT 1 FROM " );
5296+
sql += rtreeName;
5297+
sql += " LIMIT 1";
5298+
CPLPushErrorHandler( CPLQuietErrorHandler );
5299+
OGRLayerH hSqlLayer = GDALDatasetExecuteSQL(
5300+
ds->hDS, sql, nullptr, nullptr );
5301+
CPLPopErrorHandler();
5302+
if ( !hSqlLayer )
5303+
{
5304+
return OGR_L_GetExtent( hLayer, psExtent, bForce );
5305+
}
5306+
bool hasFeatures = OGR_L_GetFeatureCount( hSqlLayer, true ) > 0;
5307+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5308+
if ( !hasFeatures )
5309+
{
5310+
return OGRERR_FAILURE;
5311+
}
5312+
5313+
double minx, miny, maxx, maxy;
5314+
if ( findMinOrMax( ds->hDS, rtreeName, "MINX", true, minx ) &&
5315+
findMinOrMax( ds->hDS, rtreeName, "MINY", true, miny ) &&
5316+
findMinOrMax( ds->hDS, rtreeName, "MAXX", false, maxx ) &&
5317+
findMinOrMax( ds->hDS, rtreeName, "MAXY", false, maxy ) )
5318+
{
5319+
psExtent->MinX = minx;
5320+
psExtent->MinY = miny;
5321+
psExtent->MaxX = maxx;
5322+
psExtent->MaxY = maxy;
5323+
return OGRERR_NONE;
5324+
}
5325+
}
51375326
return OGR_L_GetExtent( hLayer, psExtent, bForce );
51385327
}
51395328

src/providers/ogr/qgsogrprovider.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ class QgsOgrProvider : public QgsVectorDataProvider
100100

101101
QgsCoordinateReferenceSystem crs() const override;
102102
QStringList subLayers() const override;
103+
QStringList subLayersWithoutFeatureCount() const;
103104
QString storageType() const override;
104105
QgsFeatureIterator getFeatures( const QgsFeatureRequest &request ) const override;
105106
QString subsetString() const override;
@@ -215,7 +216,9 @@ class QgsOgrProvider : public QgsVectorDataProvider
215216
//! Does the real job of settings the subset string and adds an argument to disable update capabilities
216217
bool _setSubsetString( const QString &theSQL, bool updateFeatureCount = true, bool updateCapabilities = true, bool hasExistingRef = true );
217218

218-
void addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer ) const;
219+
void addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer, bool withFeatureCount ) const;
220+
221+
QStringList _subLayers( bool withFeatureCount ) const;
219222

220223
QgsFields mAttributeFields;
221224

@@ -565,6 +568,9 @@ class QgsOgrLayer
565568
//! Wrapper of OGR_L_GetLayerCount
566569
GIntBig GetFeatureCount( bool force = false );
567570

571+
//! Return an approximate feature count
572+
GIntBig GetApproxFeatureCount();
573+
568574
//! Wrapper of OGR_L_GetLayerCount
569575
OGRErr GetExtent( OGREnvelope *psExtent, bool bForce );
570576

tests/src/python/test_provider_ogr_gpkg.py

+34
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,40 @@ def testAddingTwoIntFieldsWithWidth(self):
949949
self.assertTrue(vl.addAttribute(QgsField("c", QVariant.Int, "integer", 10)))
950950
self.assertTrue(vl.commitChanges())
951951

952+
def testApproxFeatureCountAndExtent(self):
953+
""" Test perf improvement for for https://issues.qgis.org/issues/18402 """
954+
955+
tmpfile = os.path.join(self.basetestpath, 'testApproxFeatureCountAndExtent.gpkg')
956+
ds = ogr.GetDriverByName('GPKG').CreateDataSource(tmpfile)
957+
lyr = ds.CreateLayer('test', geom_type=ogr.wkbPoint)
958+
f = ogr.Feature(lyr.GetLayerDefn())
959+
f.SetGeometry(ogr.CreateGeometryFromWkt('POINT(0 1)'))
960+
lyr.CreateFeature(f)
961+
f = ogr.Feature(lyr.GetLayerDefn())
962+
f.SetGeometry(ogr.CreateGeometryFromWkt('POINT(2 3)'))
963+
lyr.CreateFeature(f)
964+
fid = f.GetFID()
965+
f = ogr.Feature(lyr.GetLayerDefn())
966+
f.SetGeometry(ogr.CreateGeometryFromWkt('POINT(4 5)'))
967+
lyr.CreateFeature(f)
968+
lyr.DeleteFeature(fid)
969+
ds = None
970+
ds = ogr.Open(tmpfile, update=1)
971+
ds.ExecuteSQL('DROP TABLE gpkg_ogr_contents')
972+
ds = None
973+
974+
os.environ['QGIS_GPKG_FC_THRESHOLD'] = '1'
975+
vl = QgsVectorLayer(u'{}'.format(tmpfile) + "|layername=" + "test", 'test', u'ogr')
976+
self.assertTrue(vl.isValid())
977+
fc = vl.featureCount()
978+
del os.environ['QGIS_GPKG_FC_THRESHOLD']
979+
self.assertEqual(fc, 3) # didn't notice the hole
980+
981+
reference = QgsGeometry.fromRect(QgsRectangle(0, 1, 4, 5))
982+
provider_extent = QgsGeometry.fromRect(vl.extent())
983+
self.assertTrue(QgsGeometry.compare(provider_extent.asPolygon()[0], reference.asPolygon()[0], 0.00001),
984+
provider_extent.asPolygon()[0])
985+
952986

953987
if __name__ == '__main__':
954988
unittest.main()

0 commit comments

Comments
 (0)