Skip to content
Merged
3 changes: 3 additions & 0 deletions backend/code_review_backend/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@
# Use production Phabricator instance by default
PHABRICATOR_HOST = "https://phabricator.services.mozilla.com"

# Limit the automatic creation of reositories to allowed hosts
ALLOWED_REPOSITORY_HOSTS = ["hg.mozilla.org"]

DYNO = env("DYNO")
# Heroku settings override to run the web app through dyno
if DYNO:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from django.core.management.base import BaseCommand
from django.utils import timezone

from code_review_backend.issues.models import Diff, Issue, IssueLink, Revision
from code_review_backend.issues.models import (
Diff,
Issue,
IssueLink,
Repository,
Revision,
)

logger = logging.getLogger(__name__)

Expand All @@ -28,7 +34,19 @@ def add_arguments(self, parser):
default=30,
)

def cleanup_repositories(self):
unused_repositories = Repository.objects.filter(
base_revisions__isnull=True,
head_revisions__isnull=True,
diffs__isnull=True,
)
delete_count = unused_repositories._raw_delete(unused_repositories.db)
if delete_count:
logger.info(f"Deleted {delete_count} unused Repository.")

def handle(self, *args, **options):
self.cleanup_repositories()

clean_until = timezone.now() - timedelta(days=options["nb_days"])

rev_to_delete = Revision.objects.filter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import logging
import os
import tempfile
from urllib.parse import urlparse

import taskcluster
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from requests.exceptions import HTTPError
Expand Down Expand Up @@ -179,22 +181,35 @@ def load_local_reports(self):
report = json.load(open(os.path.join(self.cache_dir, task_id)))
yield task_id, report

def get_or_create_repository(self, url):
"""Retrieve a repository or create it if its URL must match allowed hosts"""
parsed = urlparse(url)
if parsed.netloc not in settings.ALLOWED_REPOSITORY_HOSTS:
try:
return Repository.objects.get(url=url)
except Repository.DoesNotExist:
logger.warning(
f"No repository exists with URL {url} "
"(must be in ALLOWED_REPOSITORY_HOST for automatic creation), skipping."
)
raise ValueError(url)
repo, created = Repository.objects.get_or_create(
url=url, defaults={"slug": parsed.path.lstrip("/")}
)
if created:
logger.info(f"Created missing repository from URL {url}")
return repo

def build_revision_and_diff(self, data, task_id):
"""Build or retrieve a revision and diff in current repo from report's data"""
try:
head_repository = Repository.objects.get(url=data["repository"])
except Repository.DoesNotExist:
logger.warning(
f"No repository found with URL {data['repository']}, skipping."
)
head_repository = self.get_or_create_repository(data["repository"])
except ValueError:
return None, None

try:
base_repository = Repository.objects.get(url=data["target_repository"])
except Repository.DoesNotExist:
logger.warning(
f"No repository found with URL {data['target_repository']}, skipping."
)
base_repository = self.get_or_create_repository(data["target_repository"])
except ValueError:
return None, None

revision, _ = head_repository.head_revisions.get_or_create(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Generated by Django 5.1.2 on 2024-11-18 15:53

from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("issues", "0014_unique_hash"),
]

operations = [
migrations.RemoveField(
model_name="repository",
name="phid",
),
migrations.AlterField(
Comment thread
vrigal marked this conversation as resolved.
model_name="repository",
name="id",
field=models.AutoField(primary_key=True, serialize=False),
),
migrations.AlterModelOptions(
name="repository",
options={"ordering": ("id",), "verbose_name_plural": "repositories"},
),
]
if "postgresql" in settings.DATABASES["default"]["ENGINE"]:
print(
"Adding sequence initialization for Repository PK to issues.0015 with PostgreSQL backend"
)
operations.append(
migrations.RunSQL(
"""
SELECT setval(
pg_get_serial_sequence('issues_repository', 'id'),
coalesce(max(id)+1, 0),
false
) FROM issues_repository;
""",
reverse_sql=migrations.RunSQL.noop,
)
)
26 changes: 13 additions & 13 deletions backend/code_review_backend/issues/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,18 @@
ISSUE_LEVELS = ((LEVEL_WARNING, "Warning"), (LEVEL_ERROR, "Error"))


class PhabricatorModel(models.Model):
id = models.PositiveIntegerField(primary_key=True)
phid = models.CharField(max_length=40, unique=True)
class Repository(models.Model):
id = models.AutoField(primary_key=True)

created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)

class Meta:
abstract = True
ordering = ("id",)


class Repository(PhabricatorModel):
# Not all repositories are available on Phabricator (Try ones)
phid = models.CharField(max_length=40, unique=False, null=True, blank=True)

slug = models.SlugField(unique=True)
url = models.URLField(unique=True)

class Meta:
verbose_name_plural = "repositories"
ordering = ("id",)

def __str__(self):
return self.slug
Expand Down Expand Up @@ -110,11 +101,17 @@ def phabricator_url(self):
return f"{parser.scheme}://{parser.netloc}/D{self.phabricator_id}"


class Diff(PhabricatorModel):
class Diff(models.Model):
"""Reference of a specific code patch (diff) in Phabricator.
A revision can be linked to multiple successive diffs, or none in case of a repository push.
"""

# Phabricator's attributes
id = models.PositiveIntegerField(primary_key=True)
phid = models.CharField(max_length=40, unique=True)
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)

revision = models.ForeignKey(
Revision, related_name="diffs", on_delete=models.CASCADE
)
Expand All @@ -131,6 +128,9 @@ class Diff(PhabricatorModel):
def __str__(self):
return f"Diff {self.id}"

class Meta:
ordering = ("id",)


class IssueLink(models.Model):
"""Many-to-many relationship between an Issue and a Revision.
Expand Down
52 changes: 39 additions & 13 deletions backend/code_review_backend/issues/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from collections import defaultdict
from urllib.parse import urlparse

from django.conf import settings
from django.db import transaction
from rest_framework import serializers

Expand All @@ -24,20 +26,48 @@ class RepositorySerializer(serializers.ModelSerializer):

class Meta:
model = Repository
fields = ("id", "phid", "slug", "url")
fields = ("id", "slug", "url")


class RepositoryGetOrCreateField(serializers.SlugRelatedField):
help_text = (
"Get or create a repository. URL must match allowed hosts from settings."
)
default_error_messages = {
**serializers.SlugRelatedField.default_error_messages,
"invalid_url": "Repository URL must match hg.mozilla.org.",
}
queryset = Repository.objects.all()

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs, slug_field="url")

def to_internal_value(self, url):
parsed = urlparse(url)
if parsed.netloc not in settings.ALLOWED_REPOSITORY_HOSTS:
try:
return self.get_queryset().get(url=url)
except Repository.DoesNotExist:
self.fail("invalid_url")
Comment thread
vrigal marked this conversation as resolved.
except (TypeError, ValueError):
self.fail("invalid")
return
try:
repo, _ = self.get_queryset().get_or_create(
url=url, defaults={"slug": parsed.path.lstrip("/")}
)
return repo
except (TypeError, ValueError):
self.fail("invalid")


class RevisionSerializer(serializers.ModelSerializer):
"""
Serialize a Revision in a Repository
"""

base_repository = serializers.SlugRelatedField(
queryset=Repository.objects.all(), slug_field="url"
)
head_repository = serializers.SlugRelatedField(
queryset=Repository.objects.all(), slug_field="url"
)
base_repository = RepositoryGetOrCreateField()
head_repository = RepositoryGetOrCreateField()
diffs_url = serializers.HyperlinkedIdentityField(
view_name="revision-diffs-list", lookup_url_kwarg="revision_id"
)
Expand Down Expand Up @@ -75,12 +105,8 @@ class RevisionLightSerializer(serializers.ModelSerializer):
Serialize a Revision in a Diff light serializer
"""

base_repository = serializers.SlugRelatedField(
queryset=Repository.objects.all(), slug_field="url"
)
head_repository = serializers.SlugRelatedField(
queryset=Repository.objects.all(), slug_field="url"
)
base_repository = RepositoryGetOrCreateField()
head_repository = RepositoryGetOrCreateField()
phabricator_url = serializers.URLField(read_only=True)

class Meta:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from datetime import timedelta

from django.core.management import call_command
from django.db.models import Count
from django.test import TestCase
from django.utils import timezone

Expand Down Expand Up @@ -90,8 +91,11 @@ def setUp(self):
build_issue("path6", [rev_1, rev_3])

def test_cleanup_issues_no_issue(self):
Repository.objects.exclude(
id__in=(self.moz_central.id, self.autoland.id, self.test_repo.id)
).delete()
with self.assertLogs() as mock_log:
with self.assertNumQueries(1):
with self.assertNumQueries(2):
call_command("cleanup_issues", "--nb-days", "40")

self.assertEqual(
Expand All @@ -102,12 +106,13 @@ def test_cleanup_issues_no_issue(self):
)

def test_cleanup_issues_default_nb_days(self):
Repository.objects.exclude(
id__in=(self.moz_central.id, self.autoland.id, self.test_repo.id)
).delete()
self.assertEqual(Issue.objects.count(), 6)
with self.assertLogs() as mock_log:
with self.assertNumQueries(6):
call_command(
"cleanup_issues",
)
with self.assertNumQueries(7):
call_command("cleanup_issues")

self.assertEqual(Issue.objects.count(), 4)
self.assertListEqual(
Expand All @@ -129,9 +134,12 @@ def test_cleanup_issues_default_nb_days(self):
)

def test_cleanup_issues_custom_nb_days(self):
Repository.objects.exclude(
id__in=(self.moz_central.id, self.autoland.id, self.test_repo.id)
).delete()
self.assertEqual(Issue.objects.count(), 6)
with self.assertLogs() as mock_log:
with self.assertNumQueries(6):
with self.assertNumQueries(7):
call_command("cleanup_issues", "--nb-days", "4")

self.assertEqual(Issue.objects.count(), 2)
Expand All @@ -148,3 +156,46 @@ def test_cleanup_issues_custom_nb_days(self):
f"{LOG_PREFIX}Deleted 6 IssueLink, 1 Diff, 4 Issue, 2 Revision.",
],
)

def test_cleanup_issues_removes_unused_repositories(self):
self.assertListEqual(
list(
Repository.objects.annotate(rev_count=Count("base_revisions"))
.order_by("slug")
.values_list("slug", "rev_count")
),
[
("autoland", 1),
("mozilla-central", 1),
("nss-try", 0),
("test", 1),
("try", 0),
],
)

self.assertEqual(Issue.objects.count(), 6)
with self.assertLogs() as mock_log:
with self.assertNumQueries(7):
call_command("cleanup_issues", "--nb-days", "4")
self.assertEqual(Issue.objects.count(), 2)
self.assertEqual(
mock_log.output,
[
f"{LOG_PREFIX}Deleted 2 unused Repository.",
f"{LOG_PREFIX}Retrieved 2 old revisions to be deleted.",
f"{LOG_PREFIX}Page 1/1.",
f"{LOG_PREFIX}Deleted 6 IssueLink, 1 Diff, 4 Issue, 2 Revision.",
],
)
self.assertListEqual(
list(
Repository.objects.annotate(rev_count=Count("base_revisions"))
.order_by("slug")
.values_list("slug", "rev_count")
),
[
("autoland", 0),
("mozilla-central", 0),
("test", 1),
],
)
Loading