From a786a1d653c6dc1e723aeef4aee0bc482f881032 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 18 Nov 2024 16:54:12 +0100 Subject: [PATCH 01/13] Remove repository Phabricator refs --- ...ove_repository_phid_alter_repository_id.py | 21 +++++++++++++++ backend/code_review_backend/issues/models.py | 26 +++++++++---------- 2 files changed, 34 insertions(+), 13 deletions(-) create mode 100644 backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py diff --git a/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py new file mode 100644 index 000000000..5ae5c091d --- /dev/null +++ b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py @@ -0,0 +1,21 @@ +# Generated by Django 5.1.2 on 2024-11-18 15:53 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("issues", "0014_unique_hash"), + ] + + operations = [ + migrations.RemoveField( + model_name="repository", + name="phid", + ), + migrations.AlterField( + model_name="repository", + name="id", + field=models.AutoField(primary_key=True, serialize=False), + ), + ] diff --git a/backend/code_review_backend/issues/models.py b/backend/code_review_backend/issues/models.py index 38ed6a264..da457656b 100644 --- a/backend/code_review_backend/issues/models.py +++ b/backend/code_review_backend/issues/models.py @@ -14,27 +14,18 @@ ISSUE_LEVELS = ((LEVEL_WARNING, "Warning"), (LEVEL_ERROR, "Error")) -class PhabricatorModel(models.Model): - id = models.PositiveIntegerField(primary_key=True) - phid = models.CharField(max_length=40, unique=True) +class Repository(models.Model): + id = models.AutoField(primary_key=True) created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now=True) - class Meta: - abstract = True - ordering = ("id",) - - -class Repository(PhabricatorModel): - # Not all repositories are available on Phabricator (Try ones) - phid = models.CharField(max_length=40, unique=False, null=True, blank=True) - slug = models.SlugField(unique=True) url = models.URLField(unique=True) class Meta: verbose_name_plural = "repositories" + ordering = ("id",) def __str__(self): return self.slug @@ -110,11 +101,17 @@ def phabricator_url(self): return f"{parser.scheme}://{parser.netloc}/D{self.phabricator_id}" -class Diff(PhabricatorModel): +class Diff(models.Model): """Reference of a specific code patch (diff) in Phabricator. A revision can be linked to multiple successive diffs, or none in case of a repository push. """ + # Phabricator's attributes + id = models.PositiveIntegerField(primary_key=True) + phid = models.CharField(max_length=40, unique=True) + created = models.DateTimeField(auto_now_add=True) + updated = models.DateTimeField(auto_now=True) + revision = models.ForeignKey( Revision, related_name="diffs", on_delete=models.CASCADE ) @@ -131,6 +128,9 @@ class Diff(PhabricatorModel): def __str__(self): return f"Diff {self.id}" + class Meta: + ordering = ("id",) + class IssueLink(models.Model): """Many-to-many relationship between an Issue and a Revision. From 8089520cbedc4f2ac2638773a3ac5880faab57ce Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 18 Nov 2024 17:03:43 +0100 Subject: [PATCH 02/13] Update tests --- .../migrations/0016_alter_repository_options.py | 16 ++++++++++++++++ .../code_review_backend/issues/serializers.py | 2 +- .../code_review_backend/issues/tests/test_api.py | 2 +- .../issues/tests/test_compare.py | 2 +- .../issues/tests/test_diff.py | 5 +---- .../issues/tests/test_repository.py | 5 ----- .../issues/tests/test_revision.py | 1 - .../issues/tests/test_stats.py | 2 +- backend/fixtures/repositories.json | 5 ----- 9 files changed, 21 insertions(+), 19 deletions(-) create mode 100644 backend/code_review_backend/issues/migrations/0016_alter_repository_options.py diff --git a/backend/code_review_backend/issues/migrations/0016_alter_repository_options.py b/backend/code_review_backend/issues/migrations/0016_alter_repository_options.py new file mode 100644 index 000000000..e3641dc7e --- /dev/null +++ b/backend/code_review_backend/issues/migrations/0016_alter_repository_options.py @@ -0,0 +1,16 @@ +# Generated by Django 5.1.2 on 2024-11-18 15:55 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("issues", "0015_remove_repository_phid_alter_repository_id"), + ] + + operations = [ + migrations.AlterModelOptions( + name="repository", + options={"ordering": ("id",), "verbose_name_plural": "repositories"}, + ), + ] diff --git a/backend/code_review_backend/issues/serializers.py b/backend/code_review_backend/issues/serializers.py index 24e50782f..8a4ddf55d 100644 --- a/backend/code_review_backend/issues/serializers.py +++ b/backend/code_review_backend/issues/serializers.py @@ -24,7 +24,7 @@ class RepositorySerializer(serializers.ModelSerializer): class Meta: model = Repository - fields = ("id", "phid", "slug", "url") + fields = ("id", "slug", "url") class RevisionSerializer(serializers.ModelSerializer): diff --git a/backend/code_review_backend/issues/tests/test_api.py b/backend/code_review_backend/issues/tests/test_api.py index 42307507a..575804690 100644 --- a/backend/code_review_backend/issues/tests/test_api.py +++ b/backend/code_review_backend/issues/tests/test_api.py @@ -24,7 +24,7 @@ def setUp(self): # Create a repo & its try counterpart self.repo = Repository.objects.create( - id=1, phid="PHID-REPO-xxx", slug="myrepo", url="http://repo.test/myrepo" + id=1, slug="myrepo", url="http://repo.test/myrepo" ) self.repo_try = Repository.objects.create( id=2, slug="myrepo-try", url="http://repo.test/try" diff --git a/backend/code_review_backend/issues/tests/test_compare.py b/backend/code_review_backend/issues/tests/test_compare.py index 9614be4bc..78c05ef0b 100644 --- a/backend/code_review_backend/issues/tests/test_compare.py +++ b/backend/code_review_backend/issues/tests/test_compare.py @@ -19,7 +19,7 @@ def setUp(self): # Create a repo & its try counterpart self.repo = Repository.objects.create( - id=1, phid="PHID-REPO-xxx", slug="myrepo", url="http://repo.test/myrepo" + id=1, slug="myrepo", url="http://repo.test/myrepo" ) self.repo_try = Repository.objects.create( id=2, slug="myrepo-try", url="http://repo.test/try" diff --git a/backend/code_review_backend/issues/tests/test_diff.py b/backend/code_review_backend/issues/tests/test_diff.py index 3712ce2ab..6359ec73b 100644 --- a/backend/code_review_backend/issues/tests/test_diff.py +++ b/backend/code_review_backend/issues/tests/test_diff.py @@ -20,7 +20,7 @@ def setUpTestData(cls): # Create a repo & its try counterpart cls.repo = Repository.objects.create( - id=1, phid="PHID-REPO-xxx", slug="myrepo", url="http://repo.test/myrepo" + id=1, slug="myrepo", url="http://repo.test/myrepo" ) cls.repo_try = Repository.objects.create( id=2, slug="myrepo-try", url="http://repo.test/try" @@ -83,7 +83,6 @@ def test_list_diffs(self): }, "repository": { "id": 2, - "phid": None, "slug": "myrepo-try", "url": "http://repo.test/try", }, @@ -115,7 +114,6 @@ def test_list_diffs(self): }, "repository": { "id": 2, - "phid": None, "slug": "myrepo-try", "url": "http://repo.test/try", }, @@ -147,7 +145,6 @@ def test_list_diffs(self): }, "repository": { "id": 2, - "phid": None, "slug": "myrepo-try", "url": "http://repo.test/try", }, diff --git a/backend/code_review_backend/issues/tests/test_repository.py b/backend/code_review_backend/issues/tests/test_repository.py index a33d11df2..8b20ae56b 100644 --- a/backend/code_review_backend/issues/tests/test_repository.py +++ b/backend/code_review_backend/issues/tests/test_repository.py @@ -27,31 +27,26 @@ def test_list_repositories(self): "results": [ { "id": 102, - "phid": None, "slug": "autoland", "url": "https://hg.mozilla.org/integration/autoland", }, { "id": 1, - "phid": "PHID-REPO-saax4qdxlbbhahhp2kg5", "slug": "mozilla-central", "url": "https://hg.mozilla.org/mozilla-central", }, { "id": 8, - "phid": "PHID-REPO-3lrloqw4qf6fluy2a5ni", "slug": "nss", "url": "https://hg.mozilla.org/projects/nss", }, { "id": 101, - "phid": None, "slug": "nss-try", "url": "https://hg.mozilla.org/projects/nss-try", }, { "id": 100, - "phid": None, "slug": "try", "url": "https://hg.mozilla.org/try", }, diff --git a/backend/code_review_backend/issues/tests/test_revision.py b/backend/code_review_backend/issues/tests/test_revision.py index 193da4cc4..fb1f93668 100644 --- a/backend/code_review_backend/issues/tests/test_revision.py +++ b/backend/code_review_backend/issues/tests/test_revision.py @@ -12,7 +12,6 @@ class RevisionAPITestCase(APITestCase): def setUp(self): self.repo = Repository.objects.create( id=1, - phid="PHID-REPO-xxx", slug="myrepo", url="http://repo.test/myrepo", ) diff --git a/backend/code_review_backend/issues/tests/test_stats.py b/backend/code_review_backend/issues/tests/test_stats.py index 9beb1e1e6..1746b64ca 100644 --- a/backend/code_review_backend/issues/tests/test_stats.py +++ b/backend/code_review_backend/issues/tests/test_stats.py @@ -21,7 +21,7 @@ def setUp(self): # Create a repo & its try self.repo = Repository.objects.create( - id=1, phid="PHID-REPO-xxx", slug="myrepo", url="http://repo.test/myrepo" + id=1, slug="myrepo", url="http://repo.test/myrepo" ) self.repo_try = Repository.objects.create( id=2, slug="myrepo-try", url="http://repo.test/myrepo-try" diff --git a/backend/fixtures/repositories.json b/backend/fixtures/repositories.json index a34869420..3e12ae241 100644 --- a/backend/fixtures/repositories.json +++ b/backend/fixtures/repositories.json @@ -5,7 +5,6 @@ "fields": { "created": "2019-10-17T07:17:06.396Z", "updated": "2019-10-17T07:17:06.396Z", - "phid": "PHID-REPO-saax4qdxlbbhahhp2kg5", "slug": "mozilla-central", "url": "https://hg.mozilla.org/mozilla-central" } @@ -16,7 +15,6 @@ "fields": { "created": "2019-10-17T07:17:32.970Z", "updated": "2019-10-17T07:17:32.970Z", - "phid": "PHID-REPO-3lrloqw4qf6fluy2a5ni", "slug": "nss", "url": "https://hg.mozilla.org/projects/nss" } @@ -27,7 +25,6 @@ "fields": { "created": "2019-11-27T10:42:35.034Z", "updated": "2019-11-27T10:42:35.034Z", - "phid": null, "slug": "try", "url": "https://hg.mozilla.org/try" } @@ -38,7 +35,6 @@ "fields": { "created": "2019-11-27T10:42:35.035Z", "updated": "2019-11-27T10:42:35.035Z", - "phid": null, "slug": "nss-try", "url": "https://hg.mozilla.org/projects/nss-try" } @@ -49,7 +45,6 @@ "fields": { "created": "2019-12-02T16:01:23.231Z", "updated": "2019-12-02T16:01:23.231Z", - "phid": null, "slug": "autoland", "url": "https://hg.mozilla.org/integration/autoland" } From 75fd091a258df5f7472c3a6d29348b8d091ef406 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 18 Nov 2024 18:20:32 +0100 Subject: [PATCH 03/13] Automatically create repositories referenced on a revision --- .../code_review_backend/issues/serializers.py | 51 ++++++++++++++----- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/backend/code_review_backend/issues/serializers.py b/backend/code_review_backend/issues/serializers.py index 8a4ddf55d..590579124 100644 --- a/backend/code_review_backend/issues/serializers.py +++ b/backend/code_review_backend/issues/serializers.py @@ -3,6 +3,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from collections import defaultdict +from urllib.parse import urlparse from django.db import transaction from rest_framework import serializers @@ -27,17 +28,42 @@ class Meta: fields = ("id", "slug", "url") +class RepositoryGetOrCreateField(serializers.SlugRelatedField): + help_text = "Get or create a repository. URL must match " + default_error_messages = { + **serializers.SlugRelatedField.default_error_messages, + "invalid_url": "Repository URL must match hg.mozilla.org.", + } + queryset = Repository.objects.all() + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, slug_field="url") + + def to_internal_value(self, url): + parsed = urlparse(url) + if parsed.netloc != "hg.mozilla.org": + try: + return self.get_queryset().get(url=url) + except Repository.DoesNotExist: + self.fail("invalid_url") + except (TypeError, ValueError): + self.fail("invalid") + try: + repo, _ = self.get_queryset().get_or_create( + url=url, defaults={"slug": parsed.path} + ) + return repo + except (TypeError, ValueError): + self.fail("invalid") + + class RevisionSerializer(serializers.ModelSerializer): """ Serialize a Revision in a Repository """ - base_repository = serializers.SlugRelatedField( - queryset=Repository.objects.all(), slug_field="url" - ) - head_repository = serializers.SlugRelatedField( - queryset=Repository.objects.all(), slug_field="url" - ) + base_repository = RepositoryGetOrCreateField() + head_repository = RepositoryGetOrCreateField() diffs_url = serializers.HyperlinkedIdentityField( view_name="revision-diffs-list", lookup_url_kwarg="revision_id" ) @@ -69,18 +95,19 @@ class Meta: "phabricator_url", ) + @transaction.atomic + def create(self, validated_data): + # Create BASE and HEAD repositories if needed + return super().create(validated_data) + class RevisionLightSerializer(serializers.ModelSerializer): """ Serialize a Revision in a Diff light serializer """ - base_repository = serializers.SlugRelatedField( - queryset=Repository.objects.all(), slug_field="url" - ) - head_repository = serializers.SlugRelatedField( - queryset=Repository.objects.all(), slug_field="url" - ) + base_repository = RepositoryGetOrCreateField() + head_repository = RepositoryGetOrCreateField() phabricator_url = serializers.URLField(read_only=True) class Meta: From 149ab698680ecdc72bf3890de1b9b91009826bb6 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 18 Nov 2024 18:42:02 +0100 Subject: [PATCH 04/13] Add tests --- .../issues/tests/test_api.py | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/backend/code_review_backend/issues/tests/test_api.py b/backend/code_review_backend/issues/tests/test_api.py index 575804690..bd4a8af14 100644 --- a/backend/code_review_backend/issues/tests/test_api.py +++ b/backend/code_review_backend/issues/tests/test_api.py @@ -24,10 +24,10 @@ def setUp(self): # Create a repo & its try counterpart self.repo = Repository.objects.create( - id=1, slug="myrepo", url="http://repo.test/myrepo" + slug="myrepo", url="http://repo.test/myrepo" ) self.repo_try = Repository.objects.create( - id=2, slug="myrepo-try", url="http://repo.test/try" + slug="myrepo-try", url="http://repo.test/try" ) # Create revision and diff self.revision = self.repo_try.head_revisions.create( @@ -100,6 +100,52 @@ def test_create_revision(self): self.assertDictEqual(response.json(), expected_response) self.assertEqual(Revision.objects.count(), 1) + def test_create_revision_wrong_new_repo(self): + self.revision.delete() + data = { + "phabricator_id": 123, + "phabricator_phid": "PHID-REV-xxx", + "title": "Bug XXX - Some bug", + "bugzilla_id": 123456, + "base_repository": "http://notamozrepo.test/myrepo", + "head_repository": "http://notamozrepo.test/myrepo", + "base_changeset": "123456789ABCDEF", + "head_changeset": "FEDCBA987654321", + } + + self.assertEqual(Revision.objects.count(), 0) + self.client.force_authenticate(user=self.user) + response = self.client.post("/v1/revision/", data, format="json") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + { + "base_repository": ["Repository URL must match hg.mozilla.org."], + "head_repository": ["Repository URL must match hg.mozilla.org."], + }, + ) + + def test_create_revision_creates_new_repo(self): + self.revision.delete() + data = { + "phabricator_id": 123, + "phabricator_phid": "PHID-REV-xxx", + "title": "Bug XXX - Some bug", + "bugzilla_id": 123456, + "base_repository": "http://repo.test/myrepo", + "head_repository": "http://hg.mozilla.org/a/new/repo", + "base_changeset": "123456789ABCDEF", + "head_changeset": "FEDCBA987654321", + } + + self.assertEqual(Revision.objects.count(), 0) + self.client.force_authenticate(user=self.user) + response = self.client.post("/v1/revision/", data, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + new_repo = Repository.objects.get(url="http://hg.mozilla.org/a/new/repo") + self.assertIsNotNone(new_repo) + self.assertEqual(new_repo.slug, "/a/new/repo") + def test_create_diff(self): """ Check we can create a diff through the API From e8b64fc39c4e288184573daba7f888c3e7055166 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 19 Nov 2024 10:26:48 +0100 Subject: [PATCH 05/13] Suggestions --- backend/code_review_backend/app/settings.py | 3 +++ backend/code_review_backend/issues/serializers.py | 13 ++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/backend/code_review_backend/app/settings.py b/backend/code_review_backend/app/settings.py index 8a00d7b31..78e026789 100644 --- a/backend/code_review_backend/app/settings.py +++ b/backend/code_review_backend/app/settings.py @@ -211,6 +211,9 @@ # Use production Phabricator instance by default PHABRICATOR_HOST = "https://phabricator.services.mozilla.com" +# Limit the automatic creation of reositories to allowed hosts +ALLOWED_REPOSITORY_HOSTS = ["hg.mozilla.org"] + DYNO = env("DYNO") # Heroku settings override to run the web app through dyno if DYNO: diff --git a/backend/code_review_backend/issues/serializers.py b/backend/code_review_backend/issues/serializers.py index 590579124..560f9edfd 100644 --- a/backend/code_review_backend/issues/serializers.py +++ b/backend/code_review_backend/issues/serializers.py @@ -5,6 +5,7 @@ from collections import defaultdict from urllib.parse import urlparse +from django.conf import settings from django.db import transaction from rest_framework import serializers @@ -29,7 +30,9 @@ class Meta: class RepositoryGetOrCreateField(serializers.SlugRelatedField): - help_text = "Get or create a repository. URL must match " + help_text = ( + "Get or create a repository. URL must match allowed hosts from settings." + ) default_error_messages = { **serializers.SlugRelatedField.default_error_messages, "invalid_url": "Repository URL must match hg.mozilla.org.", @@ -41,13 +44,14 @@ def __init__(self, *args, **kwargs): def to_internal_value(self, url): parsed = urlparse(url) - if parsed.netloc != "hg.mozilla.org": + if parsed.netloc not in settings.ALLOWED_REPOSITORY_HOSTS: try: return self.get_queryset().get(url=url) except Repository.DoesNotExist: self.fail("invalid_url") except (TypeError, ValueError): self.fail("invalid") + return try: repo, _ = self.get_queryset().get_or_create( url=url, defaults={"slug": parsed.path} @@ -95,11 +99,6 @@ class Meta: "phabricator_url", ) - @transaction.atomic - def create(self, validated_data): - # Create BASE and HEAD repositories if needed - return super().create(validated_data) - class RevisionLightSerializer(serializers.ModelSerializer): """ From cf5703b68456289f7e20ea6cc28332e57df996da Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Thu, 21 Nov 2024 18:15:04 +0100 Subject: [PATCH 06/13] Merge migration files --- ...remove_repository_phid_alter_repository_id.py | 4 ++++ .../migrations/0016_alter_repository_options.py | 16 ---------------- 2 files changed, 4 insertions(+), 16 deletions(-) delete mode 100644 backend/code_review_backend/issues/migrations/0016_alter_repository_options.py diff --git a/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py index 5ae5c091d..a69bad4cd 100644 --- a/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py +++ b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py @@ -18,4 +18,8 @@ class Migration(migrations.Migration): name="id", field=models.AutoField(primary_key=True, serialize=False), ), + migrations.AlterModelOptions( + name="repository", + options={"ordering": ("id",), "verbose_name_plural": "repositories"}, + ), ] diff --git a/backend/code_review_backend/issues/migrations/0016_alter_repository_options.py b/backend/code_review_backend/issues/migrations/0016_alter_repository_options.py deleted file mode 100644 index e3641dc7e..000000000 --- a/backend/code_review_backend/issues/migrations/0016_alter_repository_options.py +++ /dev/null @@ -1,16 +0,0 @@ -# Generated by Django 5.1.2 on 2024-11-18 15:55 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("issues", "0015_remove_repository_phid_alter_repository_id"), - ] - - operations = [ - migrations.AlterModelOptions( - name="repository", - options={"ordering": ("id",), "verbose_name_plural": "repositories"}, - ), - ] From 6b48f650325e81f3461a0a8d3a935af159c2caf4 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Fri, 22 Nov 2024 10:43:21 +0100 Subject: [PATCH 07/13] Suggestion: left strip path for creating repository slug --- backend/code_review_backend/issues/serializers.py | 2 +- backend/code_review_backend/issues/tests/test_api.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/code_review_backend/issues/serializers.py b/backend/code_review_backend/issues/serializers.py index 560f9edfd..da5fe796b 100644 --- a/backend/code_review_backend/issues/serializers.py +++ b/backend/code_review_backend/issues/serializers.py @@ -54,7 +54,7 @@ def to_internal_value(self, url): return try: repo, _ = self.get_queryset().get_or_create( - url=url, defaults={"slug": parsed.path} + url=url, defaults={"slug": parsed.path.lstrip("/")} ) return repo except (TypeError, ValueError): diff --git a/backend/code_review_backend/issues/tests/test_api.py b/backend/code_review_backend/issues/tests/test_api.py index bd4a8af14..09d3e8ff4 100644 --- a/backend/code_review_backend/issues/tests/test_api.py +++ b/backend/code_review_backend/issues/tests/test_api.py @@ -144,7 +144,7 @@ def test_create_revision_creates_new_repo(self): self.assertEqual(response.status_code, status.HTTP_201_CREATED) new_repo = Repository.objects.get(url="http://hg.mozilla.org/a/new/repo") self.assertIsNotNone(new_repo) - self.assertEqual(new_repo.slug, "/a/new/repo") + self.assertEqual(new_repo.slug, "a/new/repo") def test_create_diff(self): """ From 856208ef682c64c4d99e0b947d1978b299052cac Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Fri, 22 Nov 2024 10:44:40 +0100 Subject: [PATCH 08/13] Support creating repositories from the load_issue command --- .../issues/management/commands/load_issues.py | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/backend/code_review_backend/issues/management/commands/load_issues.py b/backend/code_review_backend/issues/management/commands/load_issues.py index 474bc8856..c8082059d 100644 --- a/backend/code_review_backend/issues/management/commands/load_issues.py +++ b/backend/code_review_backend/issues/management/commands/load_issues.py @@ -6,8 +6,10 @@ import logging import os import tempfile +from urllib.parse import urlparse import taskcluster +from django.conf import settings from django.core.management.base import BaseCommand from django.db import transaction from requests.exceptions import HTTPError @@ -179,22 +181,35 @@ def load_local_reports(self): report = json.load(open(os.path.join(self.cache_dir, task_id))) yield task_id, report + def get_or_create_repository(self, url): + """Retrieve a repository or create it if its URL must match allowed hosts""" + parsed = urlparse(url) + if parsed.netloc not in settings.ALLOWED_REPOSITORY_HOSTS: + try: + return Repository.objects.get(url=url) + except Repository.DoesNotExist: + logger.warning( + f"No repository exist with URL {url} " + "(must be in ALLOWED_REPOSITORY_HOST for automatic creation), skipping." + ) + raise ValueError(url) + repo, created = Repository.objects.get_or_create( + url=url, defaults={"slug": parsed.path.lstrip("/")} + ) + if created: + logger.info(f"Created missing repository from URL {url}") + return repo + def build_revision_and_diff(self, data, task_id): """Build or retrieve a revision and diff in current repo from report's data""" try: - head_repository = Repository.objects.get(url=data["repository"]) - except Repository.DoesNotExist: - logger.warning( - f"No repository found with URL {data['repository']}, skipping." - ) + head_repository = self.get_or_create_repository(data["repository"]) + except ValueError: return None, None try: - base_repository = Repository.objects.get(url=data["target_repository"]) - except Repository.DoesNotExist: - logger.warning( - f"No repository found with URL {data['target_repository']}, skipping." - ) + base_repository = self.get_or_create_repository(data["target_repository"]) + except ValueError: return None, None revision, _ = head_repository.head_revisions.get_or_create( From 27e11581fe0851d66b2524153107d29abddef006 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 25 Nov 2024 16:21:57 +0100 Subject: [PATCH 09/13] Remove unused repositories in the cleanup script --- .../management/commands/cleanup_issues.py | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/backend/code_review_backend/issues/management/commands/cleanup_issues.py b/backend/code_review_backend/issues/management/commands/cleanup_issues.py index a4fec58a4..e20e537f2 100644 --- a/backend/code_review_backend/issues/management/commands/cleanup_issues.py +++ b/backend/code_review_backend/issues/management/commands/cleanup_issues.py @@ -10,7 +10,13 @@ from django.core.management.base import BaseCommand from django.utils import timezone -from code_review_backend.issues.models import Diff, Issue, IssueLink, Revision +from code_review_backend.issues.models import ( + Diff, + Issue, + IssueLink, + Repository, + Revision, +) logger = logging.getLogger(__name__) @@ -28,7 +34,20 @@ def add_arguments(self, parser): default=30, ) + def cleanup_repositories(self): + unused_repositories = Repository.objects.filter( + base_revisions__isnull=True, + head_revisions__isnull=True, + diffs__isnull=True, + ) + return unused_repositories._raw_delete(unused_repositories.db) + def handle(self, *args, **options): + stats = defaultdict(int) + repo_count = self.cleanup_repositories() + if repo_count: + stats["Repository"] = repo_count + clean_until = timezone.now() - timedelta(days=options["nb_days"]) rev_to_delete = Revision.objects.filter( @@ -42,8 +61,6 @@ def handle(self, *args, **options): logger.info(f"Retrieved {total_rev_count} old revisions to be deleted.") - stats = defaultdict(int) - iterations = math.ceil(total_rev_count / DEL_CHUNK_SIZE) for i, start in enumerate(range(0, total_rev_count, DEL_CHUNK_SIZE), start=1): logger.info(f"Page {i}/{iterations}.") From fd1f17b4e85f71df031c9e183220ea53a3308dbc Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 25 Nov 2024 16:22:10 +0100 Subject: [PATCH 10/13] Update tests --- .../tests/commands/test_cleanup_issues.py | 62 +++++++++++++++++-- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py b/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py index 65e7b2709..e27b8e6d2 100644 --- a/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py +++ b/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py @@ -6,6 +6,7 @@ from datetime import timedelta from django.core.management import call_command +from django.db.models import Count from django.test import TestCase from django.utils import timezone @@ -90,8 +91,11 @@ def setUp(self): build_issue("path6", [rev_1, rev_3]) def test_cleanup_issues_no_issue(self): + Repository.objects.exclude( + id__in=(self.moz_central.id, self.autoland.id, self.test_repo.id) + ).delete() with self.assertLogs() as mock_log: - with self.assertNumQueries(1): + with self.assertNumQueries(2): call_command("cleanup_issues", "--nb-days", "40") self.assertEqual( @@ -102,12 +106,13 @@ def test_cleanup_issues_no_issue(self): ) def test_cleanup_issues_default_nb_days(self): + Repository.objects.exclude( + id__in=(self.moz_central.id, self.autoland.id, self.test_repo.id) + ).delete() self.assertEqual(Issue.objects.count(), 6) with self.assertLogs() as mock_log: - with self.assertNumQueries(6): - call_command( - "cleanup_issues", - ) + with self.assertNumQueries(7): + call_command("cleanup_issues") self.assertEqual(Issue.objects.count(), 4) self.assertListEqual( @@ -129,9 +134,12 @@ def test_cleanup_issues_default_nb_days(self): ) def test_cleanup_issues_custom_nb_days(self): + Repository.objects.exclude( + id__in=(self.moz_central.id, self.autoland.id, self.test_repo.id) + ).delete() self.assertEqual(Issue.objects.count(), 6) with self.assertLogs() as mock_log: - with self.assertNumQueries(6): + with self.assertNumQueries(7): call_command("cleanup_issues", "--nb-days", "4") self.assertEqual(Issue.objects.count(), 2) @@ -148,3 +156,45 @@ def test_cleanup_issues_custom_nb_days(self): f"{LOG_PREFIX}Deleted 6 IssueLink, 1 Diff, 4 Issue, 2 Revision.", ], ) + + def test_cleanup_issues_removes_unused_repositories(self): + self.assertListEqual( + list( + Repository.objects.annotate(rev_count=Count("base_revisions")) + .order_by("slug") + .values_list("slug", "rev_count") + ), + [ + ("autoland", 1), + ("mozilla-central", 1), + ("nss-try", 0), + ("test", 1), + ("try", 0), + ], + ) + + self.assertEqual(Issue.objects.count(), 6) + with self.assertLogs() as mock_log: + with self.assertNumQueries(7): + call_command("cleanup_issues", "--nb-days", "4") + self.assertEqual(Issue.objects.count(), 2) + self.assertEqual( + mock_log.output, + [ + f"{LOG_PREFIX}Retrieved 2 old revisions to be deleted.", + f"{LOG_PREFIX}Page 1/1.", + f"{LOG_PREFIX}Deleted 2 Repository, 6 IssueLink, 1 Diff, 4 Issue, 2 Revision.", + ], + ) + self.assertListEqual( + list( + Repository.objects.annotate(rev_count=Count("base_revisions")) + .order_by("slug") + .values_list("slug", "rev_count") + ), + [ + ("autoland", 0), + ("mozilla-central", 0), + ("test", 1), + ], + ) From 1548118a8544d752d957d9a5848061e6714af24c Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 25 Nov 2024 17:26:08 +0100 Subject: [PATCH 11/13] Directly log deleted repositories --- .../issues/management/commands/cleanup_issues.py | 11 ++++++----- .../issues/tests/commands/test_cleanup_issues.py | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/backend/code_review_backend/issues/management/commands/cleanup_issues.py b/backend/code_review_backend/issues/management/commands/cleanup_issues.py index e20e537f2..ab0601f24 100644 --- a/backend/code_review_backend/issues/management/commands/cleanup_issues.py +++ b/backend/code_review_backend/issues/management/commands/cleanup_issues.py @@ -40,13 +40,12 @@ def cleanup_repositories(self): head_revisions__isnull=True, diffs__isnull=True, ) - return unused_repositories._raw_delete(unused_repositories.db) + delete_count = unused_repositories._raw_delete(unused_repositories.db) + if delete_count: + logger.info(f"Deleted {delete_count} unused Repository.") def handle(self, *args, **options): - stats = defaultdict(int) - repo_count = self.cleanup_repositories() - if repo_count: - stats["Repository"] = repo_count + self.cleanup_repositories() clean_until = timezone.now() - timedelta(days=options["nb_days"]) @@ -61,6 +60,8 @@ def handle(self, *args, **options): logger.info(f"Retrieved {total_rev_count} old revisions to be deleted.") + stats = defaultdict(int) + iterations = math.ceil(total_rev_count / DEL_CHUNK_SIZE) for i, start in enumerate(range(0, total_rev_count, DEL_CHUNK_SIZE), start=1): logger.info(f"Page {i}/{iterations}.") diff --git a/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py b/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py index e27b8e6d2..b6ba5fe40 100644 --- a/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py +++ b/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py @@ -181,9 +181,10 @@ def test_cleanup_issues_removes_unused_repositories(self): self.assertEqual( mock_log.output, [ + f"{LOG_PREFIX}Deleted 2 unused Repository.", f"{LOG_PREFIX}Retrieved 2 old revisions to be deleted.", f"{LOG_PREFIX}Page 1/1.", - f"{LOG_PREFIX}Deleted 2 Repository, 6 IssueLink, 1 Diff, 4 Issue, 2 Revision.", + f"{LOG_PREFIX}Deleted 6 IssueLink, 1 Diff, 4 Issue, 2 Revision.", ], ) self.assertListEqual( From e74492112b36e46dcd4adb947fe3f1a887267d98 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 25 Nov 2024 17:33:10 +0100 Subject: [PATCH 12/13] Correctly initialize the sequence with PostgreSQL --- ..._remove_repository_phid_alter_repository_id.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py index a69bad4cd..70f785e1f 100644 --- a/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py +++ b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py @@ -1,5 +1,6 @@ # Generated by Django 5.1.2 on 2024-11-18 15:53 +from django.conf import settings from django.db import migrations, models @@ -23,3 +24,17 @@ class Migration(migrations.Migration): options={"ordering": ("id",), "verbose_name_plural": "repositories"}, ), ] + if "postgresql" in settings.DATABASES["default"]["ENGINE"]: + print("Initializing the sequence with PostgreSQL backend") + operations.append( + migrations.RunSQL( + """ + SELECT setval( + pg_get_serial_sequence('issues_repository', 'id'), + coalesce(max(id)+1, 0), + false + ) FROM issues_repository; + """, + reverse_sql=migrations.RunSQL.noop, + ) + ) From b78af8cb8405c54570b6657adf031258eb13dd34 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 25 Nov 2024 17:34:32 +0100 Subject: [PATCH 13/13] Typo & nit --- .../issues/management/commands/load_issues.py | 2 +- .../0015_remove_repository_phid_alter_repository_id.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/code_review_backend/issues/management/commands/load_issues.py b/backend/code_review_backend/issues/management/commands/load_issues.py index c8082059d..76e2f7eaa 100644 --- a/backend/code_review_backend/issues/management/commands/load_issues.py +++ b/backend/code_review_backend/issues/management/commands/load_issues.py @@ -189,7 +189,7 @@ def get_or_create_repository(self, url): return Repository.objects.get(url=url) except Repository.DoesNotExist: logger.warning( - f"No repository exist with URL {url} " + f"No repository exists with URL {url} " "(must be in ALLOWED_REPOSITORY_HOST for automatic creation), skipping." ) raise ValueError(url) diff --git a/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py index 70f785e1f..0aecd04e2 100644 --- a/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py +++ b/backend/code_review_backend/issues/migrations/0015_remove_repository_phid_alter_repository_id.py @@ -25,7 +25,9 @@ class Migration(migrations.Migration): ), ] if "postgresql" in settings.DATABASES["default"]["ENGINE"]: - print("Initializing the sequence with PostgreSQL backend") + print( + "Adding sequence initialization for Repository PK to issues.0015 with PostgreSQL backend" + ) operations.append( migrations.RunSQL( """