Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements/tests.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ freezegun
pretend
pytest>=3.0.0
pytest-icdiff
pytest-mock
pytest-postgresql>=3.1.3,<8.0.0
pytest-randomly
pytest-socket
Expand Down
5 changes: 5 additions & 0 deletions requirements/tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ pytest==8.3.5 \
# via
# -r requirements/tests.in
# pytest-icdiff
# pytest-mock
# pytest-postgresql
# pytest-randomly
# pytest-socket
Expand All @@ -261,6 +262,10 @@ pytest-icdiff==0.9 \
--hash=sha256:13aede616202e57fcc882568b64589002ef85438046f012ac30a8d959dac8b75 \
--hash=sha256:efee0da3bd1b24ef2d923751c5c547fbb8df0a46795553fba08ef57c3ca03d82
# via -r requirements/tests.in
pytest-mock==3.14.0 \
--hash=sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f \
--hash=sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0
# via -r requirements/tests.in
pytest-postgresql==7.0.1 \
--hash=sha256:7723dfbfc57ea6f6f9876c2828e7b36f8b0e60b6cb040b1ddd444a60eed06e0a \
--hash=sha256:cbc6a67bbad5128b1f00def8cca5cf597020acc79893723f7a9cb60981b6840f
Expand Down
6 changes: 4 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,8 +550,10 @@ def search_service():


@pytest.fixture
def domain_status_service():
return account_services.NullDomainStatusService()
def domain_status_service(mocker):
service = account_services.NullDomainStatusService()
mocker.spy(service, "get_domain_status")
return service


class QueryRecorder:
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/accounts/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -1691,7 +1691,7 @@ def __init__(self):
session=session, client_id="some_client_id"
)

assert svc.get_domain_status("example.com") == []
assert svc.get_domain_status("example.com") is None
assert session.get.calls == [
pretend.call(
"https://api.domainr.com/v2/status",
Expand Down
58 changes: 57 additions & 1 deletion tests/unit/accounts/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@

from warehouse.accounts import tasks
from warehouse.accounts.models import TermsOfServiceEngagement
from warehouse.accounts.tasks import compute_user_metrics, notify_users_of_tos_update
from warehouse.accounts.tasks import (
batch_update_email_domain_status,
compute_user_metrics,
notify_users_of_tos_update,
)

from ...common.db.accounts import EmailFactory, UserFactory
from ...common.db.packaging import ProjectFactory, ReleaseFactory
Expand Down Expand Up @@ -192,3 +196,55 @@ def test_compute_user_metrics(db_request, metrics):
],
),
]


def test_update_email_domain_status(db_request, domain_status_service, mocker):
"""
Test that the batch update performs the correct queries and updates
"""
never_checked = EmailFactory.create(
email="me@never-checked.com", domain_last_checked=None
)
over_threshold = EmailFactory.create(
email="me@over-threshold.com",
domain_last_checked=datetime.now(tz=timezone.utc) - timedelta(days=90),
)
on_threshold = EmailFactory.create(
email="me@on-threshold.com",
domain_last_checked=datetime.now(tz=timezone.utc) - timedelta(days=30),
)
under_threshold = EmailFactory.create(
email="me@under-threshold.com",
domain_last_checked=datetime.now(tz=timezone.utc) - timedelta(days=1),
)

batch_update_email_domain_status(db_request)

assert domain_status_service.get_domain_status.call_count == 3
domain_status_service.get_domain_status.assert_has_calls(
[
mocker.call(never_checked.domain),
mocker.call(over_threshold.domain),
mocker.call(on_threshold.domain),
]
)

assert never_checked.domain_last_status == ["active"]
assert over_threshold.domain_last_status == ["active"]
assert on_threshold.domain_last_status == ["active"]
assert under_threshold.domain_last_status is None # no default, not updated


def test_update_email_domain_status_does_not_update_if_not_needed(
db_request, domain_status_service, mocker
):
mocker.patch.object(domain_status_service, "get_domain_status", return_value=None)

fail_check = EmailFactory.create()

batch_update_email_domain_status(db_request)

domain_status_service.get_domain_status.assert_called_once_with(fail_check.domain)

assert fail_check.domain_last_checked is None
assert fail_check.domain_last_status is None
8 changes: 7 additions & 1 deletion warehouse/accounts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
TokenServiceFactory,
database_login_factory,
)
from warehouse.accounts.tasks import compute_user_metrics, notify_users_of_tos_update
from warehouse.accounts.tasks import (
batch_update_email_domain_status,
compute_user_metrics,
notify_users_of_tos_update,
)
from warehouse.accounts.utils import UserContext
from warehouse.admin.flags import AdminFlagValue
from warehouse.macaroons.security_policy import MacaroonSecurityPolicy
Expand Down Expand Up @@ -215,3 +219,5 @@ def includeme(config):
# Add a periodic task to generate Account metrics
config.add_periodic_task(crontab(minute="*/20"), compute_user_metrics)
config.add_periodic_task(crontab(minute="*"), notify_users_of_tos_update)
# TODO: After initial backfill, this can be done less frequently
config.add_periodic_task(crontab(minute="*/5"), batch_update_email_domain_status)
2 changes: 1 addition & 1 deletion warehouse/accounts/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def get_email_breach_count(email: str) -> int | None:


class IDomainStatusService(Interface):
def get_domain_status(domain: str) -> list[str]:
def get_domain_status(domain: str) -> list[str] | None:
"""
Returns a list of status strings for the given domain.
"""
1 change: 1 addition & 0 deletions warehouse/accounts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ class Email(db.ModelBase):
# Domain validation information
domain_last_checked: Mapped[datetime.datetime | None] = mapped_column(
comment="Last time domain was checked with the domain validation service.",
index=True,
)
domain_last_status: Mapped[list[str] | None] = mapped_column(
ARRAY(String),
Expand Down
4 changes: 2 additions & 2 deletions warehouse/accounts/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,7 @@ def create_service(cls, _context, request: Request) -> DomainrDomainStatusServic
domainr_client_id = request.registry.settings.get("domain_status.client_id")
return cls(session=request.http, client_id=domainr_client_id)

def get_domain_status(self, domain: str) -> list[str]:
def get_domain_status(self, domain: str) -> list[str] | None:
"""
Check if a domain is available or not.
See https://domainr.com/docs/api/v2/status
Expand All @@ -1006,6 +1006,6 @@ def get_domain_status(self, domain: str) -> list[str]:
resp.raise_for_status()
except requests.RequestException as exc:
logger.warning("Error contacting Domainr: %r", exc)
return []
return None

return resp.json()["status"][0]["status"].split()
41 changes: 39 additions & 2 deletions warehouse/accounts/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime, timedelta, timezone
from __future__ import annotations

from sqlalchemy import func
import typing

from datetime import UTC, datetime, timedelta, timezone

from sqlalchemy import func, nullsfirst, or_, select

from warehouse import tasks
from warehouse.accounts.models import (
Expand All @@ -22,10 +26,14 @@
UserTermsOfServiceEngagement,
)
from warehouse.accounts.services import IUserService
from warehouse.accounts.utils import update_email_domain_status
from warehouse.email import send_user_terms_of_service_updated
from warehouse.metrics import IMetricsService
from warehouse.packaging.models import Release

if typing.TYPE_CHECKING:
from pyramid.request import Request


@tasks.task(ignore_result=True, acks_late=True)
def notify_users_of_tos_update(request):
Expand Down Expand Up @@ -136,3 +144,32 @@ def compute_user_metrics(request):
"primary:true",
],
)


@tasks.task(ignore_result=True, acks_late=True)
def batch_update_email_domain_status(request: Request) -> None:
"""
Update the email domain status for any domain last checked over 30 days ago.

30 days is roughly the time between a domain's expiration
and when it enters a renewal grace period.
Each TLD may express their own grace period, 30 days is an estimate
of time before the registrar is likely to sell it.
"""
stmt = (
select(Email)
.where(
# TODO: After completely backfilled, remove the `or_` for None
or_(
Email.domain_last_checked.is_(None),
Email.domain_last_checked < datetime.now(tz=UTC) - timedelta(days=30),
)
)
.order_by(nullsfirst(Email.domain_last_checked.asc()))
.limit(10_000)
)
# Run in batches to avoid too much memory usage, API rate limits
stmt = stmt.execution_options(yield_per=1_000)

for email in request.db.scalars(stmt):
update_email_domain_status(email, request)
21 changes: 21 additions & 0 deletions warehouse/accounts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,17 @@

from __future__ import annotations

import datetime

from dataclasses import dataclass
from typing import TYPE_CHECKING

from warehouse.accounts.models import Email
from warehouse.accounts.services import IDomainStatusService

if TYPE_CHECKING:
from pyramid.request import Request

from warehouse.accounts.models import User
from warehouse.macaroons.models import Macaroon

Expand Down Expand Up @@ -44,3 +51,17 @@ class UserContext:

def __principals__(self) -> list[str]:
return self.user.__principals__()


def update_email_domain_status(email: Email, request: Request) -> None:
"""
Update the domain status of the given email address.
"""
domain_status_service = request.find_service(IDomainStatusService)

if domain_status := domain_status_service.get_domain_status(email.domain):
email.domain_last_checked = datetime.datetime.now(datetime.UTC)
email.domain_last_status = domain_status
request.db.add(email)

return None
10 changes: 2 additions & 8 deletions warehouse/admin/views/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

from warehouse.accounts.interfaces import (
BurnedRecoveryCode,
IDomainStatusService,
IEmailBreachedService,
InvalidRecoveryCode,
IUserService,
Expand All @@ -40,6 +39,7 @@
ProhibitedUserName,
User,
)
from warehouse.accounts.utils import update_email_domain_status
from warehouse.authnz import Permissions
from warehouse.email import (
send_account_recovery_initiated_email,
Expand Down Expand Up @@ -683,13 +683,7 @@ def user_email_domain_check(user, request):
email_address = request.params.get("email_address")
email = request.db.scalar(select(Email).where(Email.email == email_address))

domain_status_service = request.find_service(IDomainStatusService)
domain_status = domain_status_service.get_domain_status(email.domain)

# set the domain status to the email address
email.domain_last_checked = datetime.datetime.now(datetime.UTC)
email.domain_last_status = domain_status
request.db.add(email)
update_email_domain_status(email, request)

request.session.flash(
f"Domain status check for {email.domain!r} completed", queue="success"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Add Index to Email.domain_last_checked

Revision ID: c8384ca429fc
Revises: f609b35e981b
Create Date: 2025-04-22 18:36:03.844860
"""

from alembic import op

revision = "c8384ca429fc"
down_revision = "f609b35e981b"


def upgrade():
op.create_index(
op.f("ix_user_emails_domain_last_checked"),
"user_emails",
["domain_last_checked"],
unique=False,
)


def downgrade():
op.drop_index(op.f("ix_user_emails_domain_last_checked"), table_name="user_emails")