Skip to content

Commit

Permalink
[OGR provider] GPKG/SQLite: issue a 'UPDATE ... SET col_name = consta…
Browse files Browse the repository at this point in the history
…nt' when updating all features (fixes qgis#53043)

On the test case provided in qgis#53043:

Before:
```
Start layer.getFeatures() without update
layer.getFeatures() found 591904 features and took 0:00:06.561681
Start loop to update features
Loop took 0:00:07.583642
Commitchanges took 0:00:33.213708
```

After:
```
Start layer.getFeatures() without update
layer.getFeatures() found 591904 features and took 0:00:06.572171
Start loop to update features
Loop took 0:00:07.807320
Commitchanges took 0:00:03.322783
```

So a 10x times improvement
  • Loading branch information
rouault committed Sep 18, 2023
1 parent 4d80e94 commit 93b6e78
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 1 deletion.
86 changes: 85 additions & 1 deletion src/core/providers/ogr/qgsogrprovider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2235,7 +2235,91 @@ bool QgsOgrProvider::changeAttributeValues( const QgsChangedAttributesMap &attr_
mayNeedResetReadingAfterGetFeature = false;
}

for ( QgsChangedAttributesMap::const_iterator it = attr_map.begin(); it != attr_map.end(); ++it )
/* Optimization to update a single field of all layer's feature with a
* constant value */
bool useUpdate = false;
// If changing the below value, update it into test_provider_ogr_gpkg.py
// as well
constexpr size_t THRESHOLD_UPDATE_OPTIM = 100;
if ( static_cast<size_t>( attr_map.size() ) >= THRESHOLD_UPDATE_OPTIM &&
( mGDALDriverName == QLatin1String( "GPKG" ) ||
mGDALDriverName == QLatin1String( "SQLite" ) ) &&
mOgrLayer->TestCapability( OLCFastFeatureCount ) &&
attr_map.size() == mOgrLayer->GetFeatureCount() )
{
std::set<QgsFeatureId> fids;
OGRFieldDefnH fd = nullptr;
int fieldIdx = -1;
QVariant val;
OGRFieldType type = OFTMaxType;
useUpdate = true;
for ( QgsChangedAttributesMap::const_iterator it = attr_map.begin(); it != attr_map.end(); ++it )
{
QgsFeatureId fid = it.key();
fids.insert( fid );
const QgsAttributeMap &attr = it.value();
if ( attr.size() != 1 )
{
useUpdate = false;
break;
}
QgsAttributeMap::const_iterator it2 = attr.begin();
if ( fieldIdx < 0 )
{
fieldIdx = it2.key();
if ( fieldIdx == 0 && mFirstFieldIsFid )
{
useUpdate = false;
break;
}
fd = mOgrLayer->GetLayerDefn().GetFieldDefn(
( mFirstFieldIsFid && fieldIdx > 0 ) ? fieldIdx - 1 : fieldIdx );
if ( !fd )
{
useUpdate = false;
break;
}
type = OGR_Fld_GetType( fd );
if ( type != OFTInteger && type != OFTInteger64 && type != OFTString && type != OFTReal )
{
useUpdate = false;
break;
}
val = *it2;
}
else if ( fieldIdx != it2.key() || val != *it2 )
{
useUpdate = false;
break;
}
}
if ( useUpdate && fids.size() != static_cast<size_t>( attr_map.size() ) )
{
useUpdate = false;
}
if ( useUpdate )
{
QString sql = QStringLiteral( "UPDATE %1 SET %2 = %3" )
.arg( QString::fromUtf8( QgsOgrProviderUtils::quotedIdentifier( mOgrLayer->name(), mGDALDriverName ) ) )
.arg( QString::fromUtf8( QgsOgrProviderUtils::quotedIdentifier( QByteArray( OGR_Fld_GetNameRef( fd ) ), mGDALDriverName ) ) )
.arg( QgsOgrProviderUtils::quotedValue( val ) );
QgsDebugMsgLevel( QStringLiteral( "Using optimized changeAttributeValues(): %1" ).arg( sql ), 3 );
CPLErrorReset();
mOgrOrigLayer->ExecuteSQLNoReturn( sql.toUtf8() );
if ( CPLGetLastErrorType() != CE_None )
{
useUpdate = false;
returnValue = false;
}
}
}

// General case: let's iterate over all features and attributes to update
QgsChangedAttributesMap::const_iterator it = attr_map.begin();
if ( useUpdate )
it = attr_map.end();

for ( ; it != attr_map.end(); ++it )
{
QgsFeatureId fid = it.key();

Expand Down
81 changes: 81 additions & 0 deletions tests/src/python/test_provider_ogr_gpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -2760,6 +2760,87 @@ def testTransactionModeAutoWithFilter(self):
attrs = [f['name'] for f in vl2.getFeatures()]
self.assertEqual(attrs, ['a', 'b'])

def testChangeAttributeValuesOptimization(self):
"""Test issuing 'UPDATE layer SET column_name = constant' when possible"""

# Below value comes from QgsOgrProvider::changeAttributeValues()
THRESHOLD_UPDATE_OPTIM = 100

tmpfile = os.path.join(self.basetestpath, 'testChangeAttributeValuesOptimization.gpkg')
ds = ogr.GetDriverByName('GPKG').CreateDataSource(tmpfile)
lyr = ds.CreateLayer('test', geom_type=ogr.wkbPoint)
lyr.CreateField(ogr.FieldDefn('str_field', ogr.OFTString))
lyr.CreateField(ogr.FieldDefn('int_field', ogr.OFTInteger))
lyr.CreateField(ogr.FieldDefn('int64_field', ogr.OFTInteger64))
lyr.CreateField(ogr.FieldDefn('real_field', ogr.OFTReal))
for i in range(THRESHOLD_UPDATE_OPTIM + 1):
f = ogr.Feature(lyr.GetLayerDefn())
lyr.CreateFeature(f)
ds = None

vl = QgsVectorLayer(f'{tmpfile}' + "|layername=" + "test", 'test', 'ogr')

# Does not trigger the optim: not constant value
field_name, value, other_value = "str_field", "my_value", "other_value"
vl.startEditing()
fieldid = vl.fields().indexFromName(field_name)
for idx, feature in enumerate(vl.getFeatures()):
if idx == THRESHOLD_UPDATE_OPTIM:
vl.changeAttributeValue(feature.id(), fieldid, other_value)
else:
vl.changeAttributeValue(feature.id(), fieldid, value)
vl.commitChanges()

got = [feat[field_name] for feat in vl.getFeatures()]
self.assertEqual(set(got), set([value, other_value]))

# Does not trigger the optim: update of different fields
vl.startEditing()
fieldid = vl.fields().indexFromName("int_field")
fieldid2 = vl.fields().indexFromName("int64_field")
for idx, feature in enumerate(vl.getFeatures()):
if idx == THRESHOLD_UPDATE_OPTIM:
vl.changeAttributeValue(feature.id(), fieldid2, 1)
else:
vl.changeAttributeValue(feature.id(), fieldid, 1)
vl.commitChanges()

got = [feat["int_field"] for feat in vl.getFeatures()]
self.assertEqual(set(got), set([1, QVariant()]))
got = [feat["int64_field"] for feat in vl.getFeatures()]
self.assertEqual(set(got), set([1, QVariant()]))

# Does not trigger the optim: not all features updated
vl.startEditing()
fieldid = vl.fields().indexFromName("real_field")
for idx, feature in enumerate(vl.getFeatures()):
if idx == THRESHOLD_UPDATE_OPTIM:
break
vl.changeAttributeValue(feature.id(), fieldid, 1.5)
vl.commitChanges()

got = [feat["real_field"] for feat in vl.getFeatures()]
self.assertEqual(set(got), set([1.5, QVariant()]))

# Triggers the optim
for field_name, value in [("str_field", "my_value"),
("int_field", 123),
("int64_field", 1234567890123),
("real_field", 2.5),
("real_field", None),
]:
vl.startEditing()
fieldid = vl.fields().indexFromName(field_name)
for feature in vl.getFeatures():
vl.changeAttributeValue(feature.id(), fieldid, value)
vl.commitChanges()

got = [feat[field_name] for feat in vl.getFeatures()]
if value:
self.assertEqual(set(got), set([value]))
else:
self.assertEqual(set(got), set([QVariant()]))


if __name__ == '__main__':
unittest.main()

0 comments on commit 93b6e78

Please sign in to comment.