Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions tests/unit/cli/test_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def test_no_records_to_backfill(self, cli, db_request, monkeypatch):

assert db_request.db.query(User.Event).count() == 0

result = cli.invoke(hashing.backfill_ipaddrs, obj=config)
args = ["--event-type", "user"]
result = cli.invoke(hashing.backfill_ipaddrs, args, obj=config)

assert result.exit_code == 0
assert result.output.strip() == "No rows to backfill. Done!"
Expand Down Expand Up @@ -67,7 +68,8 @@ def test_backfill_with_no_ipaddr_obj(self, cli, db_session, monkeypatch):
assert db_session.query(User.Event).count() == 3
assert db_session.query(IpAddress).count() == 0

result = cli.invoke(hashing.backfill_ipaddrs, obj=config)
args = ["--event-type", "user"]
result = cli.invoke(hashing.backfill_ipaddrs, args, obj=config)

assert result.exit_code == 0
assert db_session.query(IpAddress).count() == 3
Expand Down Expand Up @@ -96,6 +98,8 @@ def tests_backfills_records(self, cli, db_request, remote_addr, monkeypatch):
assert db_request.db.query(User.Event).count() == 3

args = [
"--event-type",
"user",
"--batch-size",
"2",
]
Expand Down Expand Up @@ -141,6 +145,8 @@ def test_continue_until_done(self, cli, db_request, remote_addr, monkeypatch):
)

args = [
"--event-type",
"user",
"--batch-size",
"1",
"--sleep-time",
Expand Down
34 changes: 27 additions & 7 deletions warehouse/cli/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ def hashing():


@hashing.command()
@click.option(
"-e",
"--event-type",
type=click.Choice(["user", "project", "file", "organization", "team"]),
required=True,
help="Type of event to backfill",
)
@click.option(
"-b",
"--batch-size",
Expand All @@ -51,6 +58,7 @@ def hashing():
@click.pass_obj
def backfill_ipaddrs(
config,
event_type: str,
batch_size: int,
sleep_time: int,
continue_until_done: bool,
Expand All @@ -68,12 +76,15 @@ def backfill_ipaddrs(

salt = config.registry.settings["warehouse.ip_salt"]

_backfill_ips(session, salt, batch_size, sleep_time, continue_until_done)
_backfill_ips(
session, salt, event_type, batch_size, sleep_time, continue_until_done
)


def _backfill_ips(
session,
salt: str,
event_type: str,
batch_size: int,
sleep_time: int,
continue_until_done: bool,
Expand All @@ -82,18 +93,26 @@ def _backfill_ips(
Create missing IPAddress objects for events that don't have them.

Broken out from the CLI command so that it can be called recursively.

TODO: Currently operates on only User events, but should be expanded to
include Project events and others.
"""
from warehouse.accounts.models import User
from warehouse.ip_addresses.models import IpAddress
from warehouse.organizations.models import Organization, Team
from warehouse.packaging.models import File, Project

has_events = {
"user": User,
"organization": Organization,
"team": Team,
"project": Project,
"file": File,
}
model = has_events[event_type]

# Get rows a batch at a time, only if the row doesn't have an `ip_address_id
no_ip_obj_rows = session.scalars(
select(User.Event)
.where(User.Event.ip_address_id.is_(None)) # type: ignore[attr-defined]
.order_by(User.Event.time) # type: ignore[attr-defined]
select(model.Event) # type: ignore[attr-defined]
.where(model.Event.ip_address_id.is_(None)) # type: ignore[attr-defined]
.order_by(model.Event.time) # type: ignore[attr-defined]
.limit(batch_size)
).all()

Expand Down Expand Up @@ -137,6 +156,7 @@ def _backfill_ips(
_backfill_ips(
session,
salt,
event_type,
batch_size,
sleep_time,
continue_until_done,
Expand Down