Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analyze user_ips before running deduplication #4627

Merged
merged 3 commits into from Feb 12, 2019
Merged
Changes from 2 commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+32 −3
Diff settings

Always

Just for now

Copy path View file
@@ -0,0 +1 @@
Improve 'user_ips' table deduplication background update
Copy path View file
@@ -65,6 +65,11 @@ def __init__(self, db_conn, hs):
columns=["last_seen"],
)

self.register_background_update_handler(
"user_ips_analyze",
self._analyze_user_ip,
)

self.register_background_update_handler(
"user_ips_remove_dupes",
self._remove_user_ip_dupes,
@@ -108,6 +113,25 @@ def f(conn):
yield self._end_background_update("user_ips_drop_nonunique_index")
defer.returnValue(1)

@defer.inlineCallbacks
def _analyze_user_ip(self, progress, batch_size):
# Background update to analyze user_ips table before we run the
# deduplication background update. The table may not have been analyzed
# for ages due to the table locks.
#
# This will lock out the naive upserts to user_ips while it happens, but
# the analyze should be quick (28GB table takes ~10s)
def user_ips_analyze(txn):
txn.execute("ANALYZE user_ips")

end_last_seen = yield self.runInteraction(
This conversation was marked as resolved by erikjohnston

This comment has been minimized.

Copy link
@richvdh

richvdh Feb 12, 2019

Member
Suggested change
end_last_seen = yield self.runInteraction(
yield self.runInteraction(
"user_ips_analyze", user_ips_analyze
)

yield self._end_background_update("user_ips_analyze")

defer.returnValue(1)

@defer.inlineCallbacks
def _remove_user_ip_dupes(self, progress, batch_size):
# This works function works by scanning the user_ips table in batches
@@ -13,14 +13,18 @@
* limitations under the License.
*/

-- delete duplicates
-- analyze user_ips, to help ensure the correct indices are used
INSERT INTO background_updates (update_name, progress_json) VALUES
('user_ips_remove_dupes', '{}');
('user_ips_analyze', '{}');

-- delete duplicates
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('user_ips_remove_dupes', '{}', 'user_ips_analyze');

-- add a new unique index to user_ips table
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('user_ips_device_unique_index', '{}', 'user_ips_remove_dupes');

-- drop the old original index
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');
('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.