Add support for Update Pinning (#2534)

Ref: https://bugzilla.mozilla.org/show_bug.cgi?id=1762986 This lets clients specify a major or major.minor version beyond which they should not update. To support this: * add `pinnable_release` and associated tables and permissions * add `PinVersion` class: written as `{major}.` or `{major}.{minor}.`, and supports comparison with StrictVersion * add `set_pinnable` API with support for scheduled changes and enacting them via the agent * add pin support in `evaluateRules` and public API: if normal rule evaluation yields a release with greater version than allowed by the pin, look up the `pinnable_releases` table instead * prevent deletion of releases referenced by `pinnable_releases` Note: there is currently no API to remove an entry from the `pinnable_releases` table, or to query that table. Co-authored-by: Julien Cristau <jcristau@mozilla.com> Co-authored-by: bytesized <bytesized@mozilla.com>
mozilla-releng · Jun 3, 2022 · 79145f4 · 79145f4
1 parent 949bb6f
commit 79145f4
Show file tree

Hide file tree

Showing 20 changed files with 1,492 additions and 36 deletions.
diff --git a/agent/src/balrogagent/cmd.py b/agent/src/balrogagent/cmd.py
@@ -7,7 +7,15 @@
 from .changes import get_telemetry_uptake, telemetry_is_ready, time_is_ready
 from .log import configure_logging
 
-V1_SCHEDULED_CHANGE_ENDPOINTS = ["rules", "releases", "permissions", "emergency_shutoff", "required_signoffs/product", "required_signoffs/permissions"]
+V1_SCHEDULED_CHANGE_ENDPOINTS = [
+    "rules",
+    "releases",
+    "permissions",
+    "emergency_shutoff",
+    "required_signoffs/product",
+    "required_signoffs/permissions",
+    "pinnable_releases",
+]
 V2_SCHEDULED_CHANGE_ENDPOINTS = ["/v2/releases"]
 
 

diff --git a/agent/tests/test_cmd.py b/agent/tests/test_cmd.py
@@ -36,7 +36,7 @@ async def testNoChanges(self, time_is_ready, telemetry_is_ready, request):
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 0)
-        self.assertEqual(request.call_count, 7)
+        self.assertEqual(request.call_count, 8)
 
     @asynctest.patch("time.time")
     async def testTimeBasedNotReadyRules(self, time, time_is_ready, telemetry_is_ready, request):
@@ -46,7 +46,7 @@ async def testTimeBasedNotReadyRules(self, time, time_is_ready, telemetry_is_rea
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 7)
+        self.assertEqual(request.call_count, 8)
 
     @asynctest.patch("time.time")
     async def testTimeBasedNotReadyReleases(self, time, time_is_ready, telemetry_is_ready, request):
@@ -56,7 +56,7 @@ async def testTimeBasedNotReadyReleases(self, time, time_is_ready, telemetry_is_
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 7)
+        self.assertEqual(request.call_count, 8)
 
     @asynctest.patch("time.time")
     async def testTimeBasedNotReadyPermissions(self, time, time_is_ready, telemetry_is_ready, request):
@@ -66,7 +66,7 @@ async def testTimeBasedNotReadyPermissions(self, time, time_is_ready, telemetry_
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 7)
+        self.assertEqual(request.call_count, 8)
 
     @asynctest.patch("time.time")
     async def testTimeBasedIsNotReadyRequiredSignoffs(self, time, time_is_ready, telemetry_is_ready, request):
@@ -79,7 +79,7 @@ async def testTimeBasedIsNotReadyRequiredSignoffs(self, time, time_is_ready, tel
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 2)
-        self.assertEqual(request.call_count, 7)
+        self.assertEqual(request.call_count, 8)
 
     @asynctest.patch("time.time")
     async def testTimeBasedIsReadyRules(self, time, time_is_ready, telemetry_is_ready, request):
@@ -89,7 +89,7 @@ async def testTimeBasedIsReadyRules(self, time, time_is_ready, telemetry_is_read
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 8)
+        self.assertEqual(request.call_count, 9)
 
     @asynctest.patch("time.time")
     async def testTimeBasedIsReadyReleases(self, time, time_is_ready, telemetry_is_ready, request):
@@ -99,7 +99,7 @@ async def testTimeBasedIsReadyReleases(self, time, time_is_ready, telemetry_is_r
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 8)
+        self.assertEqual(request.call_count, 9)
 
     @asynctest.patch("time.time")
     async def testTimeBasedIsReadyPermissions(self, time, time_is_ready, telemetry_is_ready, request):
@@ -109,7 +109,7 @@ async def testTimeBasedIsReadyPermissions(self, time, time_is_ready, telemetry_i
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 8)
+        self.assertEqual(request.call_count, 9)
 
     @asynctest.patch("time.time")
     async def testTimeBasedIsReadyRequiredSignoffs(self, time, time_is_ready, telemetry_is_ready, request):
@@ -122,7 +122,7 @@ async def testTimeBasedIsReadyRequiredSignoffs(self, time, time_is_ready, teleme
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 2)
-        self.assertEqual(request.call_count, 9)
+        self.assertEqual(request.call_count, 10)
 
     @asynctest.patch("balrogagent.cmd.get_telemetry_uptake")
     async def testTelemetryBasedNotReady(self, get_telemetry_uptake, time_is_ready, telemetry_is_ready, request):
@@ -132,7 +132,7 @@ async def testTelemetryBasedNotReady(self, get_telemetry_uptake, time_is_ready,
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 1)
         self.assertEqual(time_is_ready.call_count, 0)
-        self.assertEqual(request.call_count, 7)
+        self.assertEqual(request.call_count, 8)
 
     @asynctest.patch("balrogagent.cmd.get_telemetry_uptake")
     async def testTelemetryBasedIsReady(self, get_telemetry_uptake, time_is_ready, telemetry_is_ready, request):
@@ -142,7 +142,7 @@ async def testTelemetryBasedIsReady(self, get_telemetry_uptake, time_is_ready, t
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 1)
         self.assertEqual(time_is_ready.call_count, 0)
-        self.assertEqual(request.call_count, 8)
+        self.assertEqual(request.call_count, 9)
 
     @asynctest.patch("time.time")
     async def testMultipleEndpointsAtOnce(self, time, time_is_ready, telemetry_is_ready, request):
@@ -156,7 +156,7 @@ async def testMultipleEndpointsAtOnce(self, time, time_is_ready, telemetry_is_re
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 3)
-        self.assertEqual(request.call_count, 10)
+        self.assertEqual(request.call_count, 11)
 
     @asynctest.patch("time.time")
     async def testMultipleChangesOneEndpoint(self, time, time_is_ready, telemetry_is_ready, request):
@@ -172,7 +172,7 @@ async def testMultipleChangesOneEndpoint(self, time, time_is_ready, telemetry_is
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 3)
-        self.assertEqual(request.call_count, 10)
+        self.assertEqual(request.call_count, 11)
         called_endpoints = [call[0][1] for call in request.call_args_list]
         self.assertIn("/scheduled_changes/releases", called_endpoints)
         self.assertIn("/scheduled_changes/permissions", called_endpoints)
@@ -202,7 +202,7 @@ async def testSignoffsPresent(self, time, time_is_ready, telemetry_is_ready, req
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 8)
+        self.assertEqual(request.call_count, 9)
 
     @asynctest.patch("time.time")
     async def testSignoffsAbsent(self, time, time_is_ready, telemetry_is_ready, request):
@@ -224,7 +224,7 @@ async def testSignoffsAbsent(self, time, time_is_ready, telemetry_is_ready, requ
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 1)
-        self.assertEqual(request.call_count, 7)
+        self.assertEqual(request.call_count, 8)
 
     @asynctest.patch("time.time")
     async def testRightEnactOrderForMultipleEndpointsAtOnce(self, time, time_is_ready, telemetry_is_ready, request):
@@ -252,7 +252,7 @@ async def testRightEnactOrderForMultipleEndpointsAtOnce(self, time, time_is_read
         await self._runAgent(sc, request)
         self.assertEqual(telemetry_is_ready.call_count, 0)
         self.assertEqual(time_is_ready.call_count, 11)
-        self.assertEqual(request.call_count, 18)
+        self.assertEqual(request.call_count, 19)
         called_endpoints = [call[0][1] for call in request.call_args_list]
         self.assertLess(called_endpoints.index("/scheduled_changes/rules"), called_endpoints.index("/scheduled_changes/releases"))
         self.assertLess(called_endpoints.index("/scheduled_changes/rules/1/enact"), called_endpoints.index("/scheduled_changes/rules/4/enact"))
@@ -282,7 +282,7 @@ async def test_v2_releases_no_changes(monkeypatch, fake_request):
     assert time_is_ready.call_count == 0
     assert verify_signoffs.call_count == 0
     # Once for each v1 endpoint, once for each v2 endpoint
-    assert fr.call_count == 7
+    assert fr.call_count == 8
 
 
 @pytest.mark.asyncio
@@ -330,7 +330,7 @@ async def test_v2_releases_one_change(monkeypatch, fake_request):
     assert time_is_ready.call_count == 1
     assert verify_signoffs.call_count == 1
     # Once for each v1 endpoint, once for each v2 endpoint, once to enact
-    assert fr.call_count == 8
+    assert fr.call_count == 9
     called_endpoints = [call[0][1] for call in fr.call_args_list]
     assert "/v2/releases/Firefox-64.0-build1/enact" in called_endpoints
 
@@ -404,7 +404,7 @@ async def test_v2_releases_multiple_changes_one_release(monkeypatch, fake_reques
     assert time_is_ready.call_count == 3
     assert verify_signoffs.call_count == 3
     # Once for each v1 endpoint, once for each v2 endpoint, once to enact
-    assert fr.call_count == 8
+    assert fr.call_count == 9
     called_endpoints = [call[0][1] for call in fr.call_args_list]
     assert "/v2/releases/Firefox-64.0-build1/enact" in called_endpoints
 
@@ -514,7 +514,7 @@ async def test_v2_releases_multiple_changes_multiple_releases(monkeypatch, fake_
     assert time_is_ready.call_count == 5
     assert verify_signoffs.call_count == 5
     # Once for each v1 endpoint, once for each v2 endpoint, once to enact each release's scheduled changes
-    assert fr.call_count == 9
+    assert fr.call_count == 10
     called_endpoints = [call[0][1] for call in fr.call_args_list]
     assert "/v2/releases/Firefox-64.0-build1/enact" in called_endpoints
     assert "/v2/releases/Firefox-66.0-build1/enact" in called_endpoints
@@ -590,7 +590,7 @@ async def test_v2_releases_multiple_changes_not_all_ready(monkeypatch, fake_requ
     # One less here, because the final call is skipped after time_is_ready return False
     assert verify_signoffs.call_count == 2
     # Once for each v1 endpoint, once for each v2 endpoint
-    assert fr.call_count == 7
+    assert fr.call_count == 8
     called_endpoints = [call[0][1] for call in fr.call_args_list]
     assert "/v2/releases/Firefox-64.0-build1/enact" not in called_endpoints
 
@@ -701,7 +701,7 @@ async def test_v2_releases_multiple_changes_one_release_one_part_not_ready(monke
     # One less here, because the final call is skipped after time_is_ready return False
     assert verify_signoffs.call_count == 4
     # Once for each v1 endpoint, once for each v2 endpoint, once to enact the one release that was ready
-    assert fr.call_count == 8
+    assert fr.call_count == 9
     called_endpoints = [call[0][1] for call in fr.call_args_list]
     assert "/v2/releases/Firefox-64.0-build1/enact" in called_endpoints
     assert "/v2/releases/Firefox-66.0-build1/enact" not in called_endpoints
@@ -776,4 +776,4 @@ async def test_v2_releases_signoff_requirements_not_met(monkeypatch, fake_reques
     assert time_is_ready.call_count == 3
     assert verify_signoffs.call_count == 3
     # Once for each v1 endpoint, once for each v2 endpoint
-    assert fr.call_count == 7
+    assert fr.call_count == 8
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -23,6 +23,7 @@ Contents:
    balrog_agent
    client_domains
    infrastructure
+   pinning
    autodocs
 
 Indices and tables

diff --git a/docs/source/pinning.rst b/docs/source/pinning.rst
@@ -0,0 +1,65 @@
+===============
+Release Pinning
+===============
+
+Firefox update supports a feature known as update pinning.
+The purpose of this feature is to allow users (primarily enterprise users) to set a version beyond which installations should not update.
+Two types of pins can be set: major version pins (ex: ``102.``) and minor version pins (ex: ``102.1.``).
+
+This feature is a bit more complicated than simply not installing an update if it is beyond the specified version.
+The problem with that approach is that, without the changes made to accommodate this feature, Balrog will always provide the newest version that the installation can be updated to.
+Balrog needs to be aware of what version the installation is pinned to.
+To understand why, consider an installation at version 102 that is pinned to version 103 when the newest version is 104.
+If Balrog wasn't aware of the pinned version, the installation would ask for updates and would get the update to 104 in response.
+Since that is newer than the pin, instead of updating to 103, it wouldn't update at all.
+To accommodate update pinning, the application needs to send the pin as part of the update URL so that Balrog knows the correct version to return in cases like this.
+
+-----------------
+How Pinning Works
+-----------------
+
+~~~~~~~~~~~~~~~~~~
+Release Automation
+~~~~~~~~~~~~~~~~~~
+
+The process starts when a new release is built.
+When the build has completed, Release Automation automatically submits the release to Balrog.
+The Balrog submission jobs (in Taskcluster) specify one or more channels that the release should be pinned for (See `Bug 1762979 <https://bugzilla.mozilla.org/show_bug.cgi?id=1762979>`_ for details).
+
+These channel names are passed, along with the rest of the release submission data to `balrogscript <https://github.com/mozilla-releng/scriptworker-scripts/tree/master/balrogscript>`_.
+Balrogscript then makes 2 requests (one for the major pin and one for the minor pin) to the Balrog REST API to associate the release with its pin for the specified channel (See `Bug 1770827 <https://bugzilla.mozilla.org/show_bug.cgi?id=1770827>`_ for details).
+
+Balrog handles each request by first ensuring that the release being pinned is not older than the release that currently has that pin (if any).
+Then it associates the pin with the release name in its ``pinnable_releases`` table.
+
+~~~~~~~~~~~~~~~~~~
+Application Update
+~~~~~~~~~~~~~~~~~~
+
+When an installation of Firefox has an update pin set, it simply includes the pin as a query parameter in the update URL in order to communicate it to Balrog.
+
+.. note::
+  When Balrog responds, Firefox does not actually check that the update returned complies with the requested pin.
+  It simply assumes that Balrog gave it the correct version.
+  One important reason why it does this is that Firefox cannot always validate the pin 100% reliably.
+  In order for a pin to be valid, it must specify an existing version of Firefox.
+  Since Firefox does not know what versions exist and what do not, it cannot always reject invalid pins.
+  And in the case that the pin is invalid, Firefox should continue updating to the newest possible version rather than obeying the invalid pin.
+
+  At first glance, it may seem like it would be better to attempt to obey pins, even if no such version exists.
+  This, however, would lead to subtle problems that are best avoided.
+  Say, for example, that an installation is pinned to ``102.15.``, but no such version is ever released.
+  When the newest version is less than the pin, everything works fine and the installation stays up-to-date.
+  But when the newest version is greater than the pin (say, ``140.0.0``), Balrog is effectively faced with the choice of whether to return the newest available version, or whether to return nothing.
+  Returning nothing does two things: it suggests that ``102.15.`` is a valid pin, because installations do not update beyond it, and it essentially prevents any installation with that pin from updating, potentially trapping it on a version much older than ``102``.
+
+~~~~~~
+Balrog
+~~~~~~
+
+When Firefox requests updates, Balrog first evaluates the :ref:`rules` normally.
+If the resulting release is not newer than the pin, that is returned.
+This is important for ensuring that the installation doesn't skip over a watershed.
+
+If the release that the rules evaluate to is newer than the pin, the ``pinnable_releases`` table is consulted.
+If there is a matching pin in the pinning table that wouldn't result in the installation downgrading, that is returned.