Skip to content

Commit

Permalink
bug 1400141: Improve History.getChange efficiency. (#419). r=catlee
Browse files Browse the repository at this point in the history
  • Loading branch information
bhearsum committed Oct 10, 2017
1 parent d858985 commit ecb6110
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 7 deletions.
30 changes: 23 additions & 7 deletions auslib/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,15 +741,31 @@ def getChange(self, change_id=None, column_values=None, data_version=None, trans
raise ValueError("data_version queried for non-versioned table")

where = [self.data_version == data_version]
self.log.debug("Querying for change_id by:")
self.log.debug("data_version: %s", data_version)
for col in column_names.keys():
self.log.debug("%s: %s", column_names[col], column_values[col])
where.append(column_names[col] == column_values[col])
changes = self.select(where=where,
transaction=transaction)
else:
changes = self.select(where=[self.change_id == change_id], transaction=transaction)
found = len(changes)
if found > 1 or found == 0:
self.log.debug("Found %s changes, should have been 1", found)

# To improve query efficiency we first get the change_id,
# and _then_ get the entire row. This is because we may not be able
# to query by an index depending which column_values we were given.
# If we end up querying by column_values that don't have an index,
# mysql will read many more rows than will be returned. This is
# particularly bad on the releases_history table, where the "data"
# column is often hundreds of kilobytes per row.
# Additional details in https://github.com/mozilla/balrog/pull/419#issuecomment-334851038
change_ids = self.select(columns=[self.change_id], where=where,
transaction=transaction)
if len(change_ids) != 1:
self.log.debug("Found %s changes when not querying by change_id, should have been 1", len(change_ids))
return None
change_id = change_ids[0]["change_id"]

self.log.debug("Querying for full change by change_id %s", change_id)
changes = self.select(where=[self.change_id == change_id], transaction=transaction)
if len(changes) != 1:
self.log.debug("Found %s changes when querying by change_id, should have been 1", len(changes))
return None
return changes[0]

Expand Down
2 changes: 2 additions & 0 deletions auslib/test/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ def testHistoryTableHasAllColumns(self):
self.assertTrue('foo' in columns)
self.assertTrue('changed_by' in columns)
self.assertTrue('timestamp' in columns)
self.assertTrue('data_version' in columns)

@mock.patch("time.time", mock.MagicMock(return_value=1.0))
def testHistoryUponInsert(self):
Expand Down Expand Up @@ -600,6 +601,7 @@ def testMultiplePrimaryHistoryTableHasAllColumns(self):
self.assertTrue('foo' in columns)
self.assertTrue('changed_by' in columns)
self.assertTrue('timestamp' in columns)
self.assertTrue('data_version' in columns)

@mock.patch("time.time", mock.MagicMock(return_value=1.0))
def testMultiplePrimaryHistoryUponInsert(self):
Expand Down

0 comments on commit ecb6110

Please sign in to comment.