From 3fe5c69b73f01a95fa6df017ea0557298fd6126c Mon Sep 17 00:00:00 2001
From: "zhong.zhou" <zhong.zhou@easystack.cn>
Date: Wed, 17 Jul 2024 18:29:46 +0800
Subject: [PATCH 01/12] nova-manage: modify image properties in request_spec

At present, we can modify the properties in the instance
system_metadata through the sub command image_property of
nova-manage, but there may be inconsistencies between their
values and those in request_specs.

And the migration is based on request_specs, so the same image
properties are also written to request_specs.

Closes-Bug: 2078999
Change-Id: Id36ecd022cb6f7f9a0fb131b0d202b79715870a9
(cherry picked from commit 2a1fad41453ca7ce15b1cd9b517055c4ccdd12cf)
(cherry picked from commit ebae97c62f1af6b3b9f6da2abfa920d6528ddb1b)
(cherry picked from commit ee30457accabcea10a62652d14d2cf08a6d57ac0)
---
 nova/cmd/manage.py                                 | 10 ++++++++--
 nova/tests/unit/cmd/test_manage.py                 | 14 ++++++++++++--
 ...mage-property-bug-2078999-c493fc259d316c24.yaml |  8 ++++++++
 3 files changed, 28 insertions(+), 4 deletions(-)
 create mode 100644 releasenotes/notes/nova-manage-image-property-bug-2078999-c493fc259d316c24.yaml

diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py
index 5485e21bbea..12cb60f135c 100644
--- a/nova/cmd/manage.py
+++ b/nova/cmd/manage.py
@@ -3306,9 +3306,10 @@ def _validate_image_properties(self, image_properties):
         # Return the dict so we can update the instance system_metadata
         return image_properties
 
-    def _update_image_properties(self, instance, image_properties):
+    def _update_image_properties(self, ctxt, instance, image_properties):
         """Update instance image properties
 
+        :param ctxt: nova.context.RequestContext
         :param instance: The instance to update
         :param image_properties: List of image properties and values to update
         """
@@ -3332,8 +3333,13 @@ def _update_image_properties(self, instance, image_properties):
         for image_property, value in image_properties.items():
             instance.system_metadata[f'image_{image_property}'] = value
 
+        request_spec = objects.RequestSpec.get_by_instance_uuid(
+            ctxt, instance.uuid)
+        request_spec.image = instance.image_meta
+
         # Save and return 0
         instance.save()
+        request_spec.save()
         return 0
 
     @action_description(_(
@@ -3368,7 +3374,7 @@ def set(self, instance_uuid=None, image_properties=None):
                 instance = objects.Instance.get_by_uuid(
                     cctxt, instance_uuid, expected_attrs=['system_metadata'])
                 return self._update_image_properties(
-                    instance, image_properties)
+                    ctxt, instance, image_properties)
         except ValueError as e:
             print(str(e))
             return 6
diff --git a/nova/tests/unit/cmd/test_manage.py b/nova/tests/unit/cmd/test_manage.py
index 0756624f6e6..70115ef9c74 100644
--- a/nova/tests/unit/cmd/test_manage.py
+++ b/nova/tests/unit/cmd/test_manage.py
@@ -4219,6 +4219,8 @@ def test_show_image_properties_unknown_failure(
             image_property='hw_disk_bus')
         self.assertEqual(1, ret, 'return code')
 
+    @mock.patch('nova.objects.RequestSpec.save')
+    @mock.patch('nova.objects.RequestSpec.get_by_instance_uuid')
     @mock.patch('nova.objects.Instance.get_by_uuid')
     @mock.patch('nova.context.target_cell')
     @mock.patch('nova.objects.Instance.save')
@@ -4227,7 +4229,8 @@ def test_show_image_properties_unknown_failure(
     @mock.patch('nova.context.get_admin_context',
                 new=mock.Mock(return_value=mock.sentinel.ctxt))
     def test_set_image_properties(
-        self, mock_instance_save, mock_target_cell, mock_get_instance
+        self, mock_instance_save, mock_target_cell, mock_get_instance,
+            mock_get_request_spec, mock_request_spec_save
     ):
         mock_target_cell.return_value.__enter__.return_value = \
             mock.sentinel.cctxt
@@ -4236,9 +4239,11 @@ def test_set_image_properties(
             vm_state=obj_fields.InstanceState.STOPPED,
             system_metadata={
                 'image_hw_disk_bus': 'virtio',
-            }
+            },
+            image_ref=''
         )
         mock_get_instance.return_value = instance
+        mock_get_request_spec.return_value = objects.RequestSpec()
         ret = self.commands.set(
             instance_uuid=uuidsentinel.instance,
             image_properties=['hw_cdrom_bus=sata']
@@ -4255,7 +4260,12 @@ def test_set_image_properties(
             instance.system_metadata.get('image_hw_disk_bus'),
             'image_hw_disk_bus'
         )
+        image_props = mock_get_request_spec.return_value.image.properties
+        self.assertEqual('sata', image_props.get('hw_cdrom_bus'))
+        self.assertEqual('virtio', image_props.get('hw_disk_bus'))
+
         mock_instance_save.assert_called_once()
+        mock_request_spec_save.assert_called_once()
 
     @mock.patch('nova.objects.Instance.get_by_uuid')
     @mock.patch('nova.objects.InstanceMapping.get_by_instance_uuid',
diff --git a/releasenotes/notes/nova-manage-image-property-bug-2078999-c493fc259d316c24.yaml b/releasenotes/notes/nova-manage-image-property-bug-2078999-c493fc259d316c24.yaml
new file mode 100644
index 00000000000..03123855e0e
--- /dev/null
+++ b/releasenotes/notes/nova-manage-image-property-bug-2078999-c493fc259d316c24.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+  - |
+    Before the `Bug 2078999 <https://bugs.launchpad.net/nova/+bug/2078999>`_ was fixed,
+    the ``nova-manage image_property set`` command would update the image properties
+    embedded in the instance but would not update the ones in the request specs. This
+    led to an unexpected rollback of the image properties that were updated by the
+    command after an instance migration.

From 6ee394f537b285a82b15479b571dd3fc6a7674a5 Mon Sep 17 00:00:00 2001
From: Sean Mooney <work@seanmooney.info>
Date: Thu, 12 Sep 2024 13:47:30 +0100
Subject: [PATCH 02/12] repoduce post liberty pre vicoria instance numa db
 issue

This change reproduces a bug in the db load of old
instance numa toplogy json blobs in
_migrate_legacy_dedicated_instance_cpuset
that failed to account for defaulting pcpuset to
the empty set when it is not in the json blob.

Related-Bug: #2080556
Change-Id: Ia0f327c501f65786d5b2538b2742ec2786486956
(cherry picked from commit 521db4a4353ac884252270cba226034e01062781)
(cherry picked from commit c00b76b25c3bcd48cf4d8417f4d0dd4cbf69770a)
(cherry picked from commit 7ae3afeb5bb46e52a5b73f3f973e9c2661b3f003)
---
 nova/objects/instance_numa.py                 |  5 +++
 nova/tests/unit/objects/test_instance_numa.py | 45 +++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/nova/objects/instance_numa.py b/nova/objects/instance_numa.py
index b7d69a90f96..e4a14eb9294 100644
--- a/nova/objects/instance_numa.py
+++ b/nova/objects/instance_numa.py
@@ -192,6 +192,11 @@ def _migrate_legacy_dedicated_instance_cpuset(cls, obj):
                 continue
 
             if cell.cpu_policy != obj_fields.CPUAllocationPolicy.DEDICATED:
+                # FIXME(sean-k-mooney): we should be setting the pcpuset
+                # to an empty set here
+                # if not 'pcpuset' in cell:
+                #     cell.pcpuset = set()
+                #     update_db = True
                 continue
 
             cell.pcpuset = cell.cpuset
diff --git a/nova/tests/unit/objects/test_instance_numa.py b/nova/tests/unit/objects/test_instance_numa.py
index 5a4c6da1cff..32b5e7f3085 100644
--- a/nova/tests/unit/objects/test_instance_numa.py
+++ b/nova/tests/unit/objects/test_instance_numa.py
@@ -372,6 +372,51 @@ def test_obj_from_db_obj(self):
             self.assertEqual(set(), obj_cell.cpuset)
             self.assertEqual(topo_cell.cpuset, obj_cell.pcpuset)
 
+    def test_obj_from_db_obj_no_pinning(self):
+        """Test of creating 'InstanceNUMATopology' OVO object from the
+        database primitives, which has an old version 'InstanceNUMACell'
+        primitives.
+
+        Prior to version 1.5, 'InstanceNUMACell' saves the instance CPUs in
+        the 'cpuset' field, for both the pinned CPUs of a dedicated and the
+        un-pinned CPUs of a shared instances, after version 1.5, any pinned
+        CPUs of dedicated instance are moved to 'pcpuset'. this test verifies
+        the CPU movement for instance with a 'dedicated' allocation policy.
+
+        This test is for the case where the instance has no pinned CPUs but
+        the instance has a numa topology such as when hugepages are used.
+        See bug: https://bugs.launchpad.net/nova/+bug/2080556 for more details.
+        """
+        fake_topo_obj_w_cell_v1_4 = objects.InstanceNUMATopology(
+            instance_uuid=fake_instance_uuid,
+            cells=[
+                objects.InstanceNUMACell(
+                    id=0, cpuset=set([1, 2]), memory=512,
+                    pagesize=2048),
+                objects.InstanceNUMACell(
+                    id=1, cpuset=set([3, 4]), memory=512,
+                    pagesize=2048),
+            ])
+
+        fake_topo_obj = copy.deepcopy(fake_topo_obj_w_cell_v1_4)
+        for cell in fake_topo_obj.cells:
+            cell.cpu_policy = objects.fields.CPUAllocationPolicy.SHARED
+
+        numa_topology = objects.InstanceNUMATopology.obj_from_db_obj(
+            self.context, fake_instance_uuid, fake_topo_obj._to_json())
+
+        for obj_cell, topo_cell in zip(
+                numa_topology.cells,
+                fake_topo_obj_w_cell_v1_4['cells']):
+            self.assertEqual(topo_cell.cpuset, obj_cell.cpuset)
+            # 'pcpuset' should be an empty set however
+            # obj_from_db_obj() or more specifically
+            # _migrate_legacy_dedicated_instance_cpuset() does not set
+            # 'pcpuset' to an empty set when it is not in the json data.
+            # self.assertEqual(set(), obj_cell.pcpuset)
+            self.assertRaises(
+                NotImplementedError, getattr, obj_cell, 'pcpuset')
+
 
 class TestInstanceNUMATopology(
     test_objects._LocalTest, _TestInstanceNUMATopology,

From 4c9267c30d6f530c98a810158c32d9b2e4385936 Mon Sep 17 00:00:00 2001
From: Sean Mooney <work@seanmooney.info>
Date: Thu, 12 Sep 2024 21:05:54 +0100
Subject: [PATCH 03/12] allow upgrade of pre-victoria InstanceNUMACells

This change ensures that if we are upgrading a
InstanceNUMACell object created before victoria
<1.5 that we properly set pcpuset=set() when
loading the object form the db.

This is requried to support instances with a numa
topology that do not use cpu pinning.

Depends-On: https://review.opendev.org/c/openstack/python-openstackclient/+/929236
Closes-Bug: #2080556
Change-Id: Iea55aabe71c250d8c8e93c61421450b909a7fa3d
(cherry picked from commit 2a870323c3d44d2056b326c184c435a484513532)
(cherry picked from commit 13c780ac84574037fa1617249cdee9102278e07d)
(cherry picked from commit e7d61fef6e13078e20071e435a7d868ec497cbee)
---
 nova/objects/instance_numa.py                 | 26 ++++++++++++-------
 nova/tests/unit/objects/test_instance_numa.py |  8 +-----
 ...-numa-object-upgrade-afa5bb96149ca2f5.yaml | 16 ++++++++++++
 3 files changed, 33 insertions(+), 17 deletions(-)
 create mode 100644 releasenotes/notes/instance-numa-object-upgrade-afa5bb96149ca2f5.yaml

diff --git a/nova/objects/instance_numa.py b/nova/objects/instance_numa.py
index e4a14eb9294..f112b08d535 100644
--- a/nova/objects/instance_numa.py
+++ b/nova/objects/instance_numa.py
@@ -14,6 +14,7 @@
 
 import itertools
 
+from oslo_log import log as logging
 from oslo_serialization import jsonutils
 from oslo_utils import versionutils
 
@@ -24,6 +25,8 @@
 from nova.objects import fields as obj_fields
 from nova.virt import hardware
 
+LOG = logging.getLogger(__name__)
+
 
 # TODO(berrange): Remove NovaObjectDictCompat
 @base.NovaObjectRegistry.register
@@ -61,6 +64,11 @@ def obj_make_compatible(self, primitive, target_version):
                     obj_fields.CPUAllocationPolicy.DEDICATED):
                 primitive['cpuset'] = primitive['pcpuset']
             primitive.pop('pcpuset', None)
+            LOG.warning(
+                f'Downgrading InstanceNUMACell to version {target_version} '
+                f'may cause the loss of pinned CPUs if mixing different '
+                f'verisons of nova on different hosts. This should not '
+                f'happen on any supported version after Victoria.')
 
         if target_version < (1, 4):
             primitive.pop('cpuset_reserved', None)
@@ -191,17 +199,15 @@ def _migrate_legacy_dedicated_instance_cpuset(cls, obj):
             if len(cell.cpuset) == 0:
                 continue
 
-            if cell.cpu_policy != obj_fields.CPUAllocationPolicy.DEDICATED:
-                # FIXME(sean-k-mooney): we should be setting the pcpuset
-                # to an empty set here
-                # if not 'pcpuset' in cell:
-                #     cell.pcpuset = set()
-                #     update_db = True
-                continue
+            if cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED:
+                cell.pcpuset = cell.cpuset
+                cell.cpuset = set()
+                update_db = True
+            else:
+                if 'pcpuset' not in cell:
+                    cell.pcpuset = set()
+                    update_db = True
 
-            cell.pcpuset = cell.cpuset
-            cell.cpuset = set()
-            update_db = True
         return update_db
 
     # TODO(huaqiang): Remove after Yoga once we are sure these objects have
diff --git a/nova/tests/unit/objects/test_instance_numa.py b/nova/tests/unit/objects/test_instance_numa.py
index 32b5e7f3085..af1286565ba 100644
--- a/nova/tests/unit/objects/test_instance_numa.py
+++ b/nova/tests/unit/objects/test_instance_numa.py
@@ -409,13 +409,7 @@ def test_obj_from_db_obj_no_pinning(self):
                 numa_topology.cells,
                 fake_topo_obj_w_cell_v1_4['cells']):
             self.assertEqual(topo_cell.cpuset, obj_cell.cpuset)
-            # 'pcpuset' should be an empty set however
-            # obj_from_db_obj() or more specifically
-            # _migrate_legacy_dedicated_instance_cpuset() does not set
-            # 'pcpuset' to an empty set when it is not in the json data.
-            # self.assertEqual(set(), obj_cell.pcpuset)
-            self.assertRaises(
-                NotImplementedError, getattr, obj_cell, 'pcpuset')
+            self.assertEqual(set(), obj_cell.pcpuset)
 
 
 class TestInstanceNUMATopology(
diff --git a/releasenotes/notes/instance-numa-object-upgrade-afa5bb96149ca2f5.yaml b/releasenotes/notes/instance-numa-object-upgrade-afa5bb96149ca2f5.yaml
new file mode 100644
index 00000000000..13d149b3763
--- /dev/null
+++ b/releasenotes/notes/instance-numa-object-upgrade-afa5bb96149ca2f5.yaml
@@ -0,0 +1,16 @@
+---
+upgrade:
+  - |
+    In the victoria release, the instance_numa_topology object
+    was extended to enabled mix cpus (pinned and unpinned cpus)
+    in the same instance. This change added a new field pcpuset
+    to the instance_numa_topology object. While the change included
+    object conversion code to handle the upgrade, it did not account
+    for instances that have a numa_topology but were not pinned.
+    i.e. a flavor with hw:mem_page_size or hw:numa_nodes set but
+    without hw:cpu_policy set to dedicated. As a result, instances
+    created between liberty and victoria releases with such a flavor
+    cannot be started  after upgrade to victoria. This has now
+    been fixed. instances created post victoria are not affected by
+    this issue. see: https://bugs.launchpad.net/nova/+bug/2080556
+    for more details.
\ No newline at end of file

From d907c1f3617e26ff5112ef5dcd840e1336837f72 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <gibi@redhat.com>
Date: Mon, 3 Feb 2025 11:00:44 +0100
Subject: [PATCH 04/12] Reproduce bug/2097359

In Victoria InstanceNUMACell ovo got the new pcpuset field and a
connected in place data migration. If the cpu_policy is dedicated the
content of the cpuset is moved to to the new pcpuset field during the
load of the data from the DB and the ovo is persisted back to the DB.
However during this data migration the version of the ovo is not bumped
to the latest, 1.6, version. Therefore the DB contains an inconsistent
object as it has the new pcpuset field from 1.6 but the
nova_object.version is still set to 1.4. It turned out that this can
cause that an old compute node having ovo 1.4 code will not request a
back levelling of the ovo even if it is already data migrated to 1.6
causing data loss from the compute perspective. Also if the compute
saves the object back to DB then the data loss becomes persistent.

Related-Bug: #2097359
Change-Id: I76ee9d59abc39e29c54be7217491e911b88a0621
(cherry picked from commit ae2f9bd573dc1923bdb2504f5a0ab7cf706109e8)
(cherry picked from commit 82b5bd6a7ed30d500d0e1025f4e056f05d8397ff)
(cherry picked from commit a3d3b89f2a8a9f29ead325cab4a071d470737238)
(cherry picked from commit db9a81050c9f15a98c9d4b011c974c35fa18b500)
---
 nova/tests/unit/objects/test_instance_numa.py | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/nova/tests/unit/objects/test_instance_numa.py b/nova/tests/unit/objects/test_instance_numa.py
index af1286565ba..c677fd09ca9 100644
--- a/nova/tests/unit/objects/test_instance_numa.py
+++ b/nova/tests/unit/objects/test_instance_numa.py
@@ -13,6 +13,7 @@
 import copy
 from unittest import mock
 
+from oslo_serialization import jsonutils
 from oslo_utils.fixture import uuidsentinel as uuids
 from oslo_versionedobjects import base as ovo_base
 import testtools
@@ -411,6 +412,46 @@ def test_obj_from_db_obj_no_pinning(self):
             self.assertEqual(topo_cell.cpuset, obj_cell.cpuset)
             self.assertEqual(set(), obj_cell.pcpuset)
 
+    def test__migrate_legacy_dedicated_instance_cpuset(self):
+        # Create a topology with a cell on latest version. Would be nice
+        # to create one the old 1.4 cell version directly but that is only
+        # possible indirectly as done below.
+        topo = objects.InstanceNUMATopology(
+            instance_uuid=fake_instance_uuid,
+            cells=[
+                objects.InstanceNUMACell(id=0, cpuset=set(), pcpuset={0, 1}),
+            ])
+        topo.cells[0].cpu_policy = objects.fields.CPUAllocationPolicy.DEDICATED
+
+        # Use the builtin backlevelling logic to pull it back to old cell
+        # version
+        topo_with_cell_1_4 = topo.obj_to_primitive(
+            target_version='1.3', version_manifest={'InstanceNUMACell': '1.4'})
+
+        # Just check that the backlevelling works, and we have a cell with
+        # version and data on 1.4 level
+        cell_1_4_primitive = topo_with_cell_1_4['nova_object.data']['cells'][0]
+        self.assertEqual('1.4', cell_1_4_primitive['nova_object.version'])
+        self.assertEqual(
+            (0, 1), cell_1_4_primitive['nova_object.data']['cpuset'])
+        self.assertNotIn('pcpuset', cell_1_4_primitive['nova_object.data'])
+
+        # Now simulate that such old data is loaded from the DB and migrated
+        # from 1.4 to 1.6 by the data migration
+        topo_loaded = objects.InstanceNUMATopology.obj_from_db_obj(
+            self.context, fake_instance_uuid,
+            jsonutils.dumps(topo_with_cell_1_4))
+
+        # In place data migration happens during load, cpuset data is moved to
+        # pcpuset
+        self.assertEqual(set(), topo_loaded.cells[0].cpuset)
+        self.assertEqual({0, 1}, topo_loaded.cells[0].pcpuset)
+        # but the object version isn't bumped. So when the
+        # data is saved back to the DB it still has the old version 1.4, but
+        # also it has the new pcpuset field from version 1.6. This is bug
+        # https://bugs.launchpad.net/nova/+bug/2097359.
+        self.assertEqual('1.4', topo_loaded.cells[0].VERSION)
+
 
 class TestInstanceNUMATopology(
     test_objects._LocalTest, _TestInstanceNUMATopology,

From 7bb6b8d8fa9927e92cc948904fffde56b92d8a41 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <gibi@redhat.com>
Date: Mon, 3 Feb 2025 11:07:34 +0100
Subject: [PATCH 05/12] Update InstanceNUMACell version after data migration

The data migration of InstanceNUMACell 1.4 to 1.5 only moved the data to
the new pcpuset field but does not update the ovo version string of the
object in the DB. This resulted in an 1.6 data with an 1.4 version
string in the DB which subsequently causes that an old compute running
1.4 ovo version will think it got an old ovo even though the data is
already in the new format. This leads to instance lifecycle errors and
if the nova-compute saves the instance then it also leads to permanent
data loss.

So this change modified the data migration to also update the ovo version
in the DB so that the version string in the DB matches the schema the
data uses in the DB.

Related-Bug: #2097359
Change-Id: I9a99f10526f8e466ac04b035121b24be70a23dae
(cherry picked from commit 643a6a8a57945ceb1bf3c5971be2aeb3efd4fa72)
(cherry picked from commit 7ff108b5fb91db31f2273eebeb60272e1c6e71b1)
(cherry picked from commit f6be1131c29c5ef1a8410f893fc779d7245c4501)
(cherry picked from commit 2ffc00c9d53f0b7b0c239fa65336d5f31f32bf57)
---
 nova/objects/instance_numa.py                 | 11 ++++++++++-
 nova/tests/unit/objects/test_instance_numa.py |  7 ++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/nova/objects/instance_numa.py b/nova/objects/instance_numa.py
index f112b08d535..3a6d2e34763 100644
--- a/nova/objects/instance_numa.py
+++ b/nova/objects/instance_numa.py
@@ -198,14 +198,23 @@ def _migrate_legacy_dedicated_instance_cpuset(cls, obj):
         for cell in obj.cells:
             if len(cell.cpuset) == 0:
                 continue
-
+            # NOTE(gibi): This data migration populates the pcpuset field that
+            # is new in version 1.5. However below we bump the object version
+            # to 1.6 directly. This is intentional. The version 1.6 introduced
+            # a new possible value 'mixed' for the cpu_policy field. As that
+            # is a forward compatible change we don't have a specific data
+            # migration for it. But we also don't have an automated way to bump
+            # old object versions from 1.5 to 1.6. So we do it here just to
+            # avoid inconsistency between data and version in the DB.
             if cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED:
                 cell.pcpuset = cell.cpuset
                 cell.cpuset = set()
+                cell.VERSION = '1.6'
                 update_db = True
             else:
                 if 'pcpuset' not in cell:
                     cell.pcpuset = set()
+                    cell.VERSION = '1.6'
                     update_db = True
 
         return update_db
diff --git a/nova/tests/unit/objects/test_instance_numa.py b/nova/tests/unit/objects/test_instance_numa.py
index c677fd09ca9..c99431e5678 100644
--- a/nova/tests/unit/objects/test_instance_numa.py
+++ b/nova/tests/unit/objects/test_instance_numa.py
@@ -446,11 +446,8 @@ def test__migrate_legacy_dedicated_instance_cpuset(self):
         # pcpuset
         self.assertEqual(set(), topo_loaded.cells[0].cpuset)
         self.assertEqual({0, 1}, topo_loaded.cells[0].pcpuset)
-        # but the object version isn't bumped. So when the
-        # data is saved back to the DB it still has the old version 1.4, but
-        # also it has the new pcpuset field from version 1.6. This is bug
-        # https://bugs.launchpad.net/nova/+bug/2097359.
-        self.assertEqual('1.4', topo_loaded.cells[0].VERSION)
+        # and the version is bumped to 1.6
+        self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
 
 
 class TestInstanceNUMATopology(

From dcf2e3c962c7d4d39a32b1c39c41921a37a4e74a Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <gibi@redhat.com>
Date: Mon, 3 Feb 2025 11:07:34 +0100
Subject: [PATCH 06/12] Update InstanceNUMACell version in more cases

The data migration of InstanceNUMACell 1.4 to 1.5 only moved the data to
the new pcpuset field but does not update the ovo version string of the
object in the DB. The previous patch added the missing version update
logic. However it only fixes the issue if the data is not already "half"
migrated to the new structure. So this patch adds logic to also do the
right thing if the wrong data migration already happened.

At the end the solution needs to consider multiple scenarios:
* data is never migrated to the new schema so the new code needs
  to migrate it and update the version string to match the new schema.
  (done by the previous patch)
* data is half migrated by the buggy code and the new code need to
  finish the migration by stamping the version in the DB.
* data is half migrated and then further modified to use the new 1.6
  feature cpu_policy mixed.
* data version is older in the DB than we can meaningfully upgrade

Closes-Bug: #2097359
Change-Id: I10ecfa7841b15637dea3e4736e90faa5f33ddff3
(cherry picked from commit 9507b7b92f9bcfff755ebef9fc6b115ef0b1167f)
(cherry picked from commit c0081262571bda2fc8dbab6534cbc2fa1afd8656)
(cherry picked from commit 882c5ebd8bca27edadf7831aab4f8d7e6e9e6075)
(cherry picked from commit e3db83ce866de40805263486843e0b4d491b72b0)
---
 nova/objects/instance_numa.py                 |  84 ++++++++++---
 nova/tests/unit/objects/test_instance_numa.py | 117 +++++++++++++++++-
 nova/tests/unit/objects/test_request_spec.py  |   3 +
 3 files changed, 186 insertions(+), 18 deletions(-)

diff --git a/nova/objects/instance_numa.py b/nova/objects/instance_numa.py
index 3a6d2e34763..a4c224e0c3d 100644
--- a/nova/objects/instance_numa.py
+++ b/nova/objects/instance_numa.py
@@ -196,26 +196,78 @@ def _migrate_legacy_dedicated_instance_cpuset(cls, obj):
         # come from instance_extra or request_spec too.
         update_db = False
         for cell in obj.cells:
-            if len(cell.cpuset) == 0:
+            version = versionutils.convert_version_to_tuple(cell.VERSION)
+
+            if version < (1, 4):
+                LOG.warning(
+                    "InstanceNUMACell %s with version %s for instance %s has "
+                    "too old version in the DB, don't know how to update, "
+                    "ignoring.", cell, cell.VERSION, obj.instance_uuid)
+                continue
+
+            if (version >= (1, 5) and
+                cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED and
+                (cell.cpuset or not cell.pcpuset)
+            ):
+                LOG.warning(
+                    "InstanceNUMACell %s with version %s is inconsistent as "
+                    "the version is 1.5 or greater, cpu_policy is dedicated, "
+                    "but cpuset is not empty or pcpuset is empty.",
+                    cell, cell.VERSION)
                 continue
-            # NOTE(gibi): This data migration populates the pcpuset field that
-            # is new in version 1.5. However below we bump the object version
-            # to 1.6 directly. This is intentional. The version 1.6 introduced
-            # a new possible value 'mixed' for the cpu_policy field. As that
-            # is a forward compatible change we don't have a specific data
-            # migration for it. But we also don't have an automated way to bump
-            # old object versions from 1.5 to 1.6. So we do it here just to
-            # avoid inconsistency between data and version in the DB.
-            if cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED:
-                cell.pcpuset = cell.cpuset
-                cell.cpuset = set()
-                cell.VERSION = '1.6'
-                update_db = True
-            else:
-                if 'pcpuset' not in cell:
+
+            # NOTE(gibi): The data migration between 1.4. and 1.5 populates the
+            # pcpuset field that is new in version 1.5. However below we update
+            # the object version to 1.6 directly. This is intentional. The
+            # version 1.6 introduced a new possible value 'mixed' for the
+            # cpu_policy field. As that is a forward compatible change we don't
+            # have a specific data migration for it. But we also don't have an
+            # automated way to update old object versions from 1.5 to 1.6. So
+            # we do it here just to avoid inconsistency between data and
+            # version in the DB.
+            if version < (1, 6):
+                if cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED:
+                    if "pcpuset" not in cell or not cell.pcpuset:
+                        # this cell was never migrated to 1.6, migrate it.
+                        cell.pcpuset = cell.cpuset
+                        cell.cpuset = set()
+                        cell.VERSION = '1.6'
+                        update_db = True
+                    else:
+                        # This data was already migrated to 1.6 format but the
+                        # version string wasn't updated to 1.6. This happened
+                        # before the fix
+                        # https://bugs.launchpad.net/nova/+bug/2097360
+                        # Only update the version string.
+                        cell.VERSION = '1.6'
+                        update_db = True
+                elif cell.cpu_policy in (
+                        None, obj_fields.CPUAllocationPolicy.SHARED):
+                    # no data migration needed just add the new field and
+                    # stamp the new version in the DB
                     cell.pcpuset = set()
                     cell.VERSION = '1.6'
                     update_db = True
+                else:  # obj_fields.CPUAllocationPolicy.MIXED
+                    # This means the cell data already got updated to the 1.6
+                    # content as MIXED only supported with 1.6 but the version
+                    # was not updated to 1.6.
+                    # We should not do the data migration as that would trample
+                    # the pcpuset field. Just stamp the 1.6 version in the DB
+                    # and hope for the best.
+                    LOG.warning(
+                        "InstanceNUMACell %s with version %s for instance %s "
+                        "has older than 1.6 version in the DB but using the "
+                        "1.6 feature CPUAllocationPolicy.MIXED. So nova "
+                        "assumes that the data is in 1.6 format and only the "
+                        "version string is old. Correcting the version string "
+                        "in the DB.", cell, cell.VERSION, obj.instance_uuid)
+                    cell.VERSION = '1.6'
+                    update_db = True
+
+            # When the next ovo version 1.7 is added it needs to be handed
+            # here to do any migration if needed and to ensure the version in
+            # the DB is stamped to 1.7
 
         return update_db
 
diff --git a/nova/tests/unit/objects/test_instance_numa.py b/nova/tests/unit/objects/test_instance_numa.py
index c99431e5678..bd742e058ac 100644
--- a/nova/tests/unit/objects/test_instance_numa.py
+++ b/nova/tests/unit/objects/test_instance_numa.py
@@ -363,6 +363,7 @@ def test_obj_from_db_obj(self):
         fake_topo_obj = copy.deepcopy(fake_topo_obj_w_cell_v1_4)
         for cell in fake_topo_obj.cells:
             cell.cpu_policy = objects.fields.CPUAllocationPolicy.DEDICATED
+            cell.VERSION = '1.4'
 
         numa_topology = objects.InstanceNUMATopology.obj_from_db_obj(
             self.context, fake_instance_uuid, fake_topo_obj._to_json())
@@ -402,6 +403,7 @@ def test_obj_from_db_obj_no_pinning(self):
         fake_topo_obj = copy.deepcopy(fake_topo_obj_w_cell_v1_4)
         for cell in fake_topo_obj.cells:
             cell.cpu_policy = objects.fields.CPUAllocationPolicy.SHARED
+            cell.VERSION = '1.4'
 
         numa_topology = objects.InstanceNUMATopology.obj_from_db_obj(
             self.context, fake_instance_uuid, fake_topo_obj._to_json())
@@ -412,7 +414,7 @@ def test_obj_from_db_obj_no_pinning(self):
             self.assertEqual(topo_cell.cpuset, obj_cell.cpuset)
             self.assertEqual(set(), obj_cell.pcpuset)
 
-    def test__migrate_legacy_dedicated_instance_cpuset(self):
+    def test__migrate_legacy_dedicated_instance_cpuset_dedicate_1_4(self):
         # Create a topology with a cell on latest version. Would be nice
         # to create one the old 1.4 cell version directly but that is only
         # possible indirectly as done below.
@@ -446,7 +448,118 @@ def test__migrate_legacy_dedicated_instance_cpuset(self):
         # pcpuset
         self.assertEqual(set(), topo_loaded.cells[0].cpuset)
         self.assertEqual({0, 1}, topo_loaded.cells[0].pcpuset)
-        # and the version is bumped to 1.6
+        # and the version is updated to 1.6
+        self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
+
+    def test__migrate_legacy_dedicated_instance_cpuset_shared_1_4(self):
+        # Create a topology with a cell on latest version. Would be nice
+        # to create one the old 1.4 cell version directly but that is only
+        # possible indirectly as done below.
+        topo = objects.InstanceNUMATopology(
+            instance_uuid=fake_instance_uuid,
+            cells=[
+                objects.InstanceNUMACell(id=0, cpuset={0, 1}, pcpuset=set()),
+            ])
+        topo.cells[0].cpu_policy = objects.fields.CPUAllocationPolicy.SHARED
+
+        # Use the builtin backlevelling logic to pull it back to old cell
+        # version
+        topo_with_cell_1_4 = topo.obj_to_primitive(
+            target_version='1.3', version_manifest={'InstanceNUMACell': '1.4'})
+
+        # Just check that the backlevelling works, and we have a cell with
+        # version and data on 1.4 level
+        cell_1_4_primitive = topo_with_cell_1_4['nova_object.data']['cells'][0]
+        self.assertEqual('1.4', cell_1_4_primitive['nova_object.version'])
+        self.assertEqual(
+            (0, 1), cell_1_4_primitive['nova_object.data']['cpuset'])
+        self.assertNotIn('pcpuset', cell_1_4_primitive['nova_object.data'])
+
+        # Now simulate that such old data is loaded from the DB and migrated
+        # from 1.4 to 1.6 by the data migration
+        topo_loaded = objects.InstanceNUMATopology.obj_from_db_obj(
+            self.context, fake_instance_uuid,
+            jsonutils.dumps(topo_with_cell_1_4))
+
+        # In place data migration did not move the data as the cpu_policy is
+        # shared
+        self.assertEqual({0, 1}, topo_loaded.cells[0].cpuset)
+        self.assertEqual(set(), topo_loaded.cells[0].pcpuset)
+        # but the version is updated to 1.6
+        self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
+
+    def test__migrate_legacy_dedicated_instance_cpuset_dedicated_half_migrated(
+        self
+    ):
+        # Before the fix for https://bugs.launchpad.net/nova/+bug/2097360
+        # landed Nova only half migrated the InstanceNUMACell from 1.4 to 1.6
+        # by doing the data move but not updating the version string in the DB.
+        # This test case ensures that if such migration happened before the fix
+        # was deployed then Nova fixed the DB content on the next load as well.
+
+        # Create a topology with a cell on latest version. Would be nice
+        # to create one the old 1.4 cell version directly but that is only
+        # possible indirectly as done below.
+        topo = objects.InstanceNUMATopology(
+            instance_uuid=fake_instance_uuid,
+            cells=[
+                objects.InstanceNUMACell(id=0, cpuset=set(), pcpuset={0, 1}),
+            ])
+        topo.cells[0].cpu_policy = objects.fields.CPUAllocationPolicy.DEDICATED
+
+        # simulate the half done migration by pulling back the version string
+        # in the primitive form to 1.4 while keeping the data on 1.6 format.
+        topo_primitive = topo.obj_to_primitive()
+        cell_primitive = topo_primitive['nova_object.data']['cells'][0]
+        self.assertEqual('1.6', cell_primitive['nova_object.version'])
+        cell_primitive['nova_object.version'] = '1.4'
+
+        topo_loaded = objects.InstanceNUMATopology.obj_from_db_obj(
+            self.context, fake_instance_uuid, jsonutils.dumps(topo_primitive))
+
+        # the data did not change
+        self.assertEqual(set(), topo_loaded.cells[0].cpuset)
+        self.assertEqual({0, 1}, topo_loaded.cells[0].pcpuset)
+        # but the version is updated to 1.6
+        self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
+
+    def test__migrate_legacy_dedicated_instance_cpuset_mixed_1_4(self):
+        # Before the fix for https://bugs.launchpad.net/nova/+bug/2097360
+        # landed Nova only half migrated the InstanceNUMACell from 1.4 to 1.6
+        # by doing the data move but not updating the version string in the DB.
+        # After this the instance is resized to flavor with mixed cpu_policy
+        # then there is a good chance that the DB has version string 1.4 but
+        # the data is on 1.6 format with the cpu_policy set to mixed.
+        # This test case ensures that if such situation happened before the fix
+        # was deployed then Nova fixed the DB content on the next load as well.
+
+        # Create a topology with a cell on latest version. Would be nice
+        # to create one the old 1.4 cell version directly but that is only
+        # possible indirectly as done below.
+        topo = objects.InstanceNUMATopology(
+            instance_uuid=fake_instance_uuid,
+            cells=[
+                objects.InstanceNUMACell(id=0, cpuset={0, 1}, pcpuset={2, 3}),
+            ])
+        topo.cells[0].cpu_policy = objects.fields.CPUAllocationPolicy.MIXED
+
+        # simulate the half done migration by pulling back the version string
+        # in the primitive form to 1.4 while keeping the data on 1.6 format.
+        topo_primitive = topo.obj_to_primitive()
+        cell_primitive = topo_primitive['nova_object.data']['cells'][0]
+        self.assertEqual('1.6', cell_primitive['nova_object.version'])
+        cell_primitive['nova_object.version'] = '1.4'
+
+        topo_loaded = objects.InstanceNUMATopology.obj_from_db_obj(
+            self.context, fake_instance_uuid, jsonutils.dumps(topo_primitive))
+
+        # the data did not change
+        self.assertEqual({0, 1}, topo_loaded.cells[0].cpuset)
+        self.assertEqual({2, 3}, topo_loaded.cells[0].pcpuset)
+        self.assertEqual(
+            objects.fields.CPUAllocationPolicy.MIXED,
+            topo_loaded.cells[0].cpu_policy)
+        # but the version is updated to 1.6
         self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
 
 
diff --git a/nova/tests/unit/objects/test_request_spec.py b/nova/tests/unit/objects/test_request_spec.py
index 58b98592340..fd2f4c38d14 100644
--- a/nova/tests/unit/objects/test_request_spec.py
+++ b/nova/tests/unit/objects/test_request_spec.py
@@ -722,6 +722,9 @@ def test_get_by_instance_uuid_numa_topology_migration(
                     id=1, cpuset={3, 4}, memory=512, cpu_policy="dedicated"),
             ]
         )
+        for cell in numa_topology.cells:
+            cell.VERSION = '1.4'
+
         spec_obj = fake_request_spec.fake_spec_obj()
         spec_obj.numa_topology = numa_topology
         fake_spec = fake_request_spec.fake_db_spec(spec_obj)

From a5e0048b40c4329b7217a306af616ee72dfe8b9a Mon Sep 17 00:00:00 2001
From: melanie witt <melwittt@gmail.com>
Date: Thu, 20 Feb 2025 20:44:04 +0000
Subject: [PATCH 07/12] Reproducer for bug 2098892

Change I60d6f04d374e9ede5895a43b7a75e955b0fea3c5 added tpool.Proxy
wrapping to the listDevices() and listAllDevices() methods but
introduced a regression for listDevices() that led to an error in
update_available_resource():

  TypeError: virNodeDeviceLookupByName() argument 2 must be str or
    None, not Proxy

The error was not caught by unit or functional testing because those
test environments intentionally do not import the libvirt Python
module -- so mocked code in the LibvirtFixture runs instead. Also, the
update_available_resource() method has a 'except Exception:' at the end
which logs an error but does not re-raise. So it would not cause a
functional test to fail.

This adds a functional test to reproduce the bug and adds a keyword arg
to the test _run_periodics() method to specify whether it should raise
an exception if an error is logged.

Related-Bug: #2098892

Change-Id: I3a3dda57f2181b24bd6589ac7bb8160014ab2396
(cherry picked from commit 3cf6667c503a5c27c85c2129e4495cf8bf53d66c)
(cherry picked from commit 173defb64c0c9cd7699b987b28485e037a561af0)
(cherry picked from commit f516a0c45cc638c2114e0a496092e4565d3aa1e3)
(cherry picked from commit c9bc74e05877428eed2b7744f4f4123ee5649139)
---
 nova/test.py                                  |  11 +-
 .../regressions/test_bug_2098892.py           | 119 ++++++++++++++++++
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 nova/tests/functional/regressions/test_bug_2098892.py

diff --git a/nova/test.py b/nova/test.py
index 1cf605f10af..2871c95e8e6 100644
--- a/nova/test.py
+++ b/nova/test.py
@@ -482,7 +482,7 @@ def _start_compute(self, host, cell_name=None):
         self.computes[host] = compute
         return compute
 
-    def _run_periodics(self):
+    def _run_periodics(self, raise_on_error=False):
         """Run the update_available_resource task on every compute manager
 
         This runs periodics on the computes in an undefined order; some child
@@ -497,6 +497,15 @@ class redefine this function to force a specific order.
             with context.target_cell(
                     ctx, self.host_mappings[host].cell_mapping) as cctxt:
                 compute.manager.update_available_resource(cctxt)
+
+        if raise_on_error:
+            if 'Traceback (most recent call last' in self.stdlog.logger.output:
+                # Get the last line of the traceback, for example:
+                #   TypeError: virNodeDeviceLookupByName() argument 2 must be
+                #   str or None, not Proxy
+                last_tb_line = self.stdlog.logger.output.splitlines()[-1]
+                raise TestingException(last_tb_line)
+
         LOG.info('Finished with periodics')
 
     def restart_compute_service(self, compute, keep_hypervisor_state=True):
diff --git a/nova/tests/functional/regressions/test_bug_2098892.py b/nova/tests/functional/regressions/test_bug_2098892.py
new file mode 100644
index 00000000000..909a77bc92c
--- /dev/null
+++ b/nova/tests/functional/regressions/test_bug_2098892.py
@@ -0,0 +1,119 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from nova import test
+from nova.tests.fixtures import libvirt as fakelibvirt
+from nova.tests.functional.libvirt import test_vgpu
+
+
+class VGPUTestsListDevices(test_vgpu.VGPUTestBase):
+    """Regression test for bug 2098892.
+
+    Test that nodeDeviceLookupByName() is called with valid types to prevent:
+
+    File "/usr/lib64/python3.9/site-packages/libvirt.py", line 5201, in
+      nodeDeviceLookupByName ret =
+      libvirtmod.virNodeDeviceLookupByName(self._o, name)
+      TypeError: virNodeDeviceLookupByName() argument 2 must be str or None,
+      not Proxy
+
+    in the future. This test relies on the LibvirtFixture checking for the
+    correct types in its nodeDeviceLookupByName() method and raising TypeError
+    if they are invalid.
+
+    We don't test this by importing the libvirt module because the libvirt
+    module is forbidden to be imported into our test environment. It is
+    excluded from test-requirements.txt and we also use the
+    ImportModulePoisonFixture in nova/test.py to prevent use of modules such as
+    libvirt.
+    """
+
+    def setUp(self):
+        super().setUp()
+
+        # Start compute supporting only nvidia-11
+        self.flags(
+            enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
+            group='devices')
+
+        self.start_compute_with_vgpu('host1')
+
+        def fake_nodeDeviceLookupByName(self, name):
+            # See bug https://bugs.launchpad.net/nova/+bug/2098892
+            # We don't test this by importing the libvirt module because the
+            # libvirt module is forbidden to be imported into our test
+            # environment.  It is excluded from test-requirements.txt and we
+            # also use the ImportModulePoisonFixture in nova/test.py to prevent
+            # use of modules such as libvirt.
+            if not isinstance(name, str) and name is not None:
+                raise TypeError(
+                    'virNodeDeviceLookupByName() argument 2 must be str or '
+                    f'None, not {type(name)}')
+
+        # FIXME(melwitt): We need to patch this only for this test because if
+        # we add it to the LibvirtFixture right away, it will cause the
+        # following additional tests to fail:
+        #
+        # nova.tests.functional.libvirt.test_reshape.VGPUReshapeTests
+        #   test_create_servers_with_vgpu
+        #
+        # nova.tests.functional.libvirt.test_vgpu.DifferentMdevClassesTests
+        #   test_create_servers_with_different_mdev_classes
+        #   test_resize_servers_with_mlx5
+        #
+        # nova.tests.functional.libvirt.test_vgpu.VGPULimitMultipleTypesTests
+        #   test_create_servers_with_vgpu
+        #
+        # nova.tests.functional.libvirt.test_vgpu.VGPULiveMigrationTests
+        #   test_live_migrate_server
+        #   test_live_migration_fails_on_old_source
+        #   test_live_migration_fails_due_to_non_supported_mdev_types
+        #   test_live_migration_fails_on_old_destination
+        #
+        # nova.tests.functional.libvirt.
+        #                           test_vgpu.VGPULiveMigrationTestsLMFailed
+        #   test_live_migrate_server
+        #   test_live_migration_fails_on_old_source
+        #   test_live_migration_fails_due_to_non_supported_mdev_types
+        #   test_live_migration_fails_on_old_destination
+        #
+        # nova.tests.functional.libvirt.test_vgpu.VGPUMultipleTypesTests
+        #   test_create_servers_with_specific_type
+        #   test_create_servers_with_vgpu
+        #
+        # nova.tests.functional.libvirt.test_vgpu.VGPUTests
+        #   test_multiple_instance_create
+        #   test_create_servers_with_vgpu
+        #   test_create_server_with_two_vgpus_isolated
+        #   test_resize_servers_with_vgpu
+        #
+        # nova.tests.functional.libvirt.test_vgpu.VGPUTestsLibvirt7_3
+        #   test_create_servers_with_vgpu
+        #   test_create_server_with_two_vgpus_isolated
+        #   test_resize_servers_with_vgpu
+        #   test_multiple_instance_create
+        #
+        # nova.tests.functional.regressions.
+        #                               test_bug_1951656.VGPUTestsLibvirt7_7
+        #   test_create_servers_with_vgpu
+        self.stub_out(
+            'nova.tests.fixtures.libvirt.Connection.nodeDeviceLookupByName',
+            fake_nodeDeviceLookupByName)
+
+    def test_update_available_resource(self):
+        # We only want to verify no errors were logged by
+        # update_available_resource (logging under the 'except Exception:').
+        # FIXME(melwitt): This currently will log an error and traceback
+        # because of the bug. Update this when the bug is fixed.
+        e = self.assertRaises(
+            test.TestingException, self._run_periodics, raise_on_error=True)
+        self.assertIn('TypeError', str(e))

From 458b443e5ffc828a827dbff1d2db85c77bdd47b9 Mon Sep 17 00:00:00 2001
From: melanie witt <melwittt@gmail.com>
Date: Thu, 20 Feb 2025 04:09:44 +0000
Subject: [PATCH 08/12] libvirt: Fix regression of listDevices() return type

This a partial revert of change
I60d6f04d374e9ede5895a43b7a75e955b0fea3c5 which added tpool.Proxy
wrapping to the listDevices() and listAllDevices() methods.

The regression was caught during downstream testing with vGPUs and the
update_available_resource() periodic task was failing with:

  TypeError: virNodeDeviceLookupByName() argument 2 must be str or
    None, not Proxy

It turns out that while the listAllDevices() method returns a list of
virNodeDevice objects [1], the listDevices() method returns a list of
string names [2] and is generated from the corresponding function in C
[3].

The error was not caught by unit or functional testing because those
test environments intentionally do not import the libvirt Python
module -- so mocked code in the LibvirtFixture runs instead. Also, the
update_available_resource() method has a 'except Exception:' at the end
which logs an error but does not re-raise. So it would not cause a
functional test to fail.

This reverts the change that caused the regression, updates potentially
confusing docstrings, adds type annotations to the methods that use
listDevices(), and moves the nodeDeviceLookupByName type checking into
the LibvirtFixture.

Closes-Bug: #2098892

[1] https://github.com/libvirt/libvirt-python/blob/408815a/libvirt-override-virConnect.py#L520-L524
[2] https://github.com/libvirt/libvirt-python/blob/408815a/libvirt-override-api.xml#L448-L453
[3] https://libvirt.org/html/libvirt-libvirt-nodedev.html#virNodeListDevices

Change-Id: Ib5befdd3c13367daa208ff969f66cba693ae2c76
(cherry picked from commit 2c07aa06458a2e70d79f8b3c68960bdccd11ab57)
(cherry picked from commit c4f4ae784f2078b652d07053d5a69a81dba1a8f5)
(cherry picked from commit b8bfa1efbb71be1909897b39ecc12e687f177e89)
(cherry picked from commit c4467364647387a3b2bae06d61c1e8c7b363ea5f)
---
 nova/tests/fixtures/libvirt.py                | 11 +++
 .../regressions/test_bug_2098892.py           | 69 +------------------
 nova/tests/unit/virt/libvirt/test_host.py     | 24 +++----
 nova/virt/libvirt/host.py                     | 20 +++---
 4 files changed, 33 insertions(+), 91 deletions(-)

diff --git a/nova/tests/fixtures/libvirt.py b/nova/tests/fixtures/libvirt.py
index 4f484631189..4ce9978550a 100644
--- a/nova/tests/fixtures/libvirt.py
+++ b/nova/tests/fixtures/libvirt.py
@@ -2067,6 +2067,17 @@ def device_lookup_by_name(self, dev_name):
         return self.pci_info.get_device_by_name(dev_name)
 
     def nodeDeviceLookupByName(self, name):
+        # See bug https://bugs.launchpad.net/nova/+bug/2098892
+        # We don't test this by importing the libvirt module because the
+        # libvirt module is forbidden to be imported into our test
+        # environment. It is excluded from test-requirements.txt and we
+        # also use the ImportModulePoisonFixture in nova/test.py to prevent
+        # use of modules such as libvirt.
+        if not isinstance(name, str) and name is not None:
+            raise TypeError(
+                'virNodeDeviceLookupByName() argument 2 must be str or '
+                f'None, not {type(name)}')
+
         if name.startswith('mdev'):
             return self.mdev_info.get_device_by_name(name)
 
diff --git a/nova/tests/functional/regressions/test_bug_2098892.py b/nova/tests/functional/regressions/test_bug_2098892.py
index 909a77bc92c..a83b0315813 100644
--- a/nova/tests/functional/regressions/test_bug_2098892.py
+++ b/nova/tests/functional/regressions/test_bug_2098892.py
@@ -10,7 +10,6 @@
 # License for the specific language governing permissions and limitations
 # under the License.
 
-from nova import test
 from nova.tests.fixtures import libvirt as fakelibvirt
 from nova.tests.functional.libvirt import test_vgpu
 
@@ -47,73 +46,7 @@ def setUp(self):
 
         self.start_compute_with_vgpu('host1')
 
-        def fake_nodeDeviceLookupByName(self, name):
-            # See bug https://bugs.launchpad.net/nova/+bug/2098892
-            # We don't test this by importing the libvirt module because the
-            # libvirt module is forbidden to be imported into our test
-            # environment.  It is excluded from test-requirements.txt and we
-            # also use the ImportModulePoisonFixture in nova/test.py to prevent
-            # use of modules such as libvirt.
-            if not isinstance(name, str) and name is not None:
-                raise TypeError(
-                    'virNodeDeviceLookupByName() argument 2 must be str or '
-                    f'None, not {type(name)}')
-
-        # FIXME(melwitt): We need to patch this only for this test because if
-        # we add it to the LibvirtFixture right away, it will cause the
-        # following additional tests to fail:
-        #
-        # nova.tests.functional.libvirt.test_reshape.VGPUReshapeTests
-        #   test_create_servers_with_vgpu
-        #
-        # nova.tests.functional.libvirt.test_vgpu.DifferentMdevClassesTests
-        #   test_create_servers_with_different_mdev_classes
-        #   test_resize_servers_with_mlx5
-        #
-        # nova.tests.functional.libvirt.test_vgpu.VGPULimitMultipleTypesTests
-        #   test_create_servers_with_vgpu
-        #
-        # nova.tests.functional.libvirt.test_vgpu.VGPULiveMigrationTests
-        #   test_live_migrate_server
-        #   test_live_migration_fails_on_old_source
-        #   test_live_migration_fails_due_to_non_supported_mdev_types
-        #   test_live_migration_fails_on_old_destination
-        #
-        # nova.tests.functional.libvirt.
-        #                           test_vgpu.VGPULiveMigrationTestsLMFailed
-        #   test_live_migrate_server
-        #   test_live_migration_fails_on_old_source
-        #   test_live_migration_fails_due_to_non_supported_mdev_types
-        #   test_live_migration_fails_on_old_destination
-        #
-        # nova.tests.functional.libvirt.test_vgpu.VGPUMultipleTypesTests
-        #   test_create_servers_with_specific_type
-        #   test_create_servers_with_vgpu
-        #
-        # nova.tests.functional.libvirt.test_vgpu.VGPUTests
-        #   test_multiple_instance_create
-        #   test_create_servers_with_vgpu
-        #   test_create_server_with_two_vgpus_isolated
-        #   test_resize_servers_with_vgpu
-        #
-        # nova.tests.functional.libvirt.test_vgpu.VGPUTestsLibvirt7_3
-        #   test_create_servers_with_vgpu
-        #   test_create_server_with_two_vgpus_isolated
-        #   test_resize_servers_with_vgpu
-        #   test_multiple_instance_create
-        #
-        # nova.tests.functional.regressions.
-        #                               test_bug_1951656.VGPUTestsLibvirt7_7
-        #   test_create_servers_with_vgpu
-        self.stub_out(
-            'nova.tests.fixtures.libvirt.Connection.nodeDeviceLookupByName',
-            fake_nodeDeviceLookupByName)
-
     def test_update_available_resource(self):
         # We only want to verify no errors were logged by
         # update_available_resource (logging under the 'except Exception:').
-        # FIXME(melwitt): This currently will log an error and traceback
-        # because of the bug. Update this when the bug is fixed.
-        e = self.assertRaises(
-            test.TestingException, self._run_periodics, raise_on_error=True)
-        self.assertIn('TypeError', str(e))
+        self._run_periodics(raise_on_error=True)
diff --git a/nova/tests/unit/virt/libvirt/test_host.py b/nova/tests/unit/virt/libvirt/test_host.py
index ebbaf260e28..0ec65529b8c 100644
--- a/nova/tests/unit/virt/libvirt/test_host.py
+++ b/nova/tests/unit/virt/libvirt/test_host.py
@@ -2175,24 +2175,24 @@ def test_tpool_list_all_devices(self):
 
     def test_tpool_list_pci_devices(self):
         self._add_fake_host_devices()
-        devs = self.host.list_pci_devices()
-        self.assertEqual(8, len(devs))
-        for dev in devs:
-            self.assertIsInstance(dev, tpool.Proxy)
+        dev_names = self.host.list_pci_devices()
+        self.assertEqual(8, len(dev_names))
+        for name in dev_names:
+            self.assertIsInstance(name, str)
 
     def test_tpool_list_mdev_capable_devices(self):
         self._add_fake_host_devices()
-        devs = self.host.list_mdev_capable_devices()
-        self.assertEqual(3, len(devs))
-        for dev in devs:
-            self.assertIsInstance(dev, tpool.Proxy)
+        dev_names = self.host.list_mdev_capable_devices()
+        self.assertEqual(3, len(dev_names))
+        for name in dev_names:
+            self.assertIsInstance(name, str)
 
     def test_tpool_list_mediated_devices(self):
         self._add_fake_host_devices()
-        devs = self.host.list_mediated_devices()
-        self.assertEqual(1, len(devs))
-        for dev in devs:
-            self.assertIsInstance(dev, tpool.Proxy)
+        dev_names = self.host.list_mediated_devices()
+        self.assertEqual(1, len(dev_names))
+        for name in dev_names:
+            self.assertIsInstance(name, str)
 
 
 class LoadersTestCase(test.NoDBTestCase):
diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py
index 531553720b1..873a097fd1a 100644
--- a/nova/virt/libvirt/host.py
+++ b/nova/virt/libvirt/host.py
@@ -1565,36 +1565,34 @@ def get_vdpa_device_path(
         nodedev = self.get_vdpa_nodedev_by_address(pci_address)
         return nodedev.vdpa_capability.dev_path
 
-    def list_pci_devices(self, flags=0):
+    def list_pci_devices(self, flags: int = 0) -> ty.List[str]:
         """Lookup pci devices.
 
-        :returns: a list of virNodeDevice instance
+        :returns: a list of strings, names of the virNodeDevice instances
         """
         return self._list_devices("pci", flags=flags)
 
-    def list_mdev_capable_devices(self, flags=0):
+    def list_mdev_capable_devices(self, flags: int = 0) -> ty.List[str]:
         """Lookup devices supporting mdev capabilities.
 
-        :returns: a list of virNodeDevice instance
+        :returns: a list of strings, names of the virNodeDevice instances
         """
         return self._list_devices("mdev_types", flags=flags)
 
-    def list_mediated_devices(self, flags=0):
+    def list_mediated_devices(self, flags: int = 0) -> ty.List[str]:
         """Lookup mediated devices.
 
-        :returns: a list of strings with the name of the instance
+        :returns: a list of strings, names of the virNodeDevice instances
         """
         return self._list_devices("mdev", flags=flags)
 
-    def _list_devices(self, cap, flags=0):
+    def _list_devices(self, cap, flags: int = 0) -> ty.List[str]:
         """Lookup devices.
 
-        :returns: a list of virNodeDevice instance
+        :returns: a list of strings, names of the virNodeDevice instances
         """
         try:
-            devs = [self._wrap_libvirt_proxy(dev)
-                    for dev in self.get_connection().listDevices(cap, flags)]
-            return devs
+            return self.get_connection().listDevices(cap, flags)
         except libvirt.libvirtError as ex:
             error_code = ex.get_error_code()
             if error_code == libvirt.VIR_ERR_NO_SUPPORT:

From 0fdd21fb4ba4d8c0f5ad45cb8bf1d2698c382c6d Mon Sep 17 00:00:00 2001
From: Elod Illes <elod.illes@est.tech>
Date: Tue, 13 May 2025 15:06:27 +0200
Subject: [PATCH 09/12] [tool] Fix backport validator for non-SLURP

non-SLURP branches are EOL'd in case they reach their end of maintained
phase. This could produce a situation when a patch is merged in a
non-SLURP branch that was deleted in the meantime and it's further
backports fail on gate with backport validator as the hash of the
non-SLURP version of the patch is not on any branch.

This patch fixes the above issue as follows: in case a hash is not
found on any branch, then it checks if it can be found under any *-eol
tag and only fails if there is not found either.

Change-Id: I56705bce8ee4354cd5cb1577a520c2d1c525f57b
(cherry picked from commit e383b465458969ec9271013f2b9e9f24b8225418)
(cherry picked from commit 8b0ae7243f8d581e1e73f0b9dcccf710666d931f)
(cherry picked from commit 88e49dd65c58536ba8dd39ab7cfde669a433f3f6)
(cherry picked from commit db438e55e62599faf2931d0992a5c7689ade3610)
---
 tools/check-cherry-picks.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/check-cherry-picks.sh b/tools/check-cherry-picks.sh
index 74887a9178b..fe75867e59f 100755
--- a/tools/check-cherry-picks.sh
+++ b/tools/check-cherry-picks.sh
@@ -26,8 +26,11 @@ branches+=""
 for hash in $hashes; do
     branch=$(git branch -a --contains "$hash" 2>/dev/null| grep -oE '(master|stable/[a-z0-9.]+|unmaintained/[a-z0-9.]+)')
     if [ $? -ne 0 ]; then
-        echo "Cherry pick hash $hash not on any master, stable or unmaintained branches"
-        exit 1
+        branch=$(git tag --contains "$hash" 2>/dev/null| grep -oE '([0-9.]+-eol)')
+        if [ $? -ne 0 ]; then
+            echo "Cherry pick hash $hash not on any master, stable, unmaintained or EOL'd branches"
+            exit 1
+        fi
     fi
     branches+=" $branch"
     checked=$(($checked + 1))

From 5b57acbbda8224a5afc26b90b09c0a24b4dc3129 Mon Sep 17 00:00:00 2001
From: Zhang Hua <joshua.zhang@canonical.com>
Date: Fri, 24 May 2024 15:49:12 +0800
Subject: [PATCH 10/12] Fix deepcopy usage for BlockDeviceMapping in
 get_root_info

The method get_root_info sometimes receives a BlockDeviceMapping object,
which lacks a copy method. The previous code assumed root_bdm was always
an instance of DriverBlockDevice, a subclass of dict that supports the copy()
method. However, during testing, it was discovered that root_bdm could also
be a BlockDeviceMapping object, which does not have a copy method.

To address this, the change replaces the copy() call with copy.deepcopy()
according to the suggestion in the comment [1], which works for both
BlockDeviceMapping and DriverBlockDevice instances. The deepcopy method is
supported because oslo.versionedobjects implements the __deepcopy__ method.

This change ensures the function handles both object types correctly,
preventing the AttributeError observed during testing.

[1] https://review.opendev.org/c/openstack/nova/+/909611/4/nova/virt/libvirt/blockinfo.py

Change-Id: I9432718586855ff57e8e6a5cae064e0685dd01e8
(cherry picked from commit 065bf99fc79a3d086e1859f9542afaafa8c3bf00)
Signed-off-by: Zhang Hua <joshua.zhang@canonical.com>
(cherry picked from commit 9ff4953954dddf9985698869cbe9ff5d00857210)
(cherry picked from commit 608a73ee68e6036188b3d2087fddfe8209f50260)
---
 .../tests/unit/virt/libvirt/test_blockinfo.py | 44 +++++++++++++++++++
 nova/virt/libvirt/blockinfo.py                |  8 ++--
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/nova/tests/unit/virt/libvirt/test_blockinfo.py b/nova/tests/unit/virt/libvirt/test_blockinfo.py
index 5a0dbb40ce3..4e245093af6 100644
--- a/nova/tests/unit/virt/libvirt/test_blockinfo.py
+++ b/nova/tests/unit/virt/libvirt/test_blockinfo.py
@@ -1289,6 +1289,7 @@ def test_get_root_info_no_bdm_empty_image_meta(self, mock_find_dev):
 
     @mock.patch('nova.virt.libvirt.blockinfo.get_info_from_bdm')
     def test_get_root_info_bdm(self, mock_get_info):
+        # call get_root_info() with DriverBlockDevice
         instance = objects.Instance(**self.test_instance)
         image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
         root_bdm = {'mount_device': '/dev/vda',
@@ -1318,6 +1319,49 @@ def test_get_root_info_bdm(self, mock_get_info):
                                               {}, 'virtio')
         mock_get_info.reset_mock()
 
+    @mock.patch('nova.virt.libvirt.blockinfo.get_info_from_bdm')
+    def test_get_root_info_bdm_with_deepcopy(self, mock_get_info):
+        # call get_root_info() with BlockDeviceMapping
+        instance = objects.Instance(**self.test_instance)
+        image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
+        root_bdm = objects.BlockDeviceMapping(self.context,
+            **fake_block_device.FakeDbBlockDeviceDict(
+                {'id': 3, 'instance_uuid': uuids.instance,
+                 'device_name': '/dev/sda',
+                 'source_type': 'blank',
+                 'destination_type': 'local',
+                 'device_type': 'cdrom',
+                 'disk_bus': 'virtio',
+                 'volume_id': 'fake-volume-id-1',
+                 'boot_index': 0}))
+        # No root_device_name
+        blockinfo.get_root_info(
+            instance, 'kvm', image_meta, root_bdm, 'virtio', 'ide')
+        mock_get_info.assert_called_once_with(
+            instance, 'kvm', image_meta, root_bdm, {}, 'virtio')
+        mock_get_info.reset_mock()
+        # Both device names
+        blockinfo.get_root_info(
+            instance, 'kvm', image_meta, root_bdm, 'virtio', 'scsi',
+            root_device_name='/dev/sda')
+        mock_get_info.assert_called_once_with(
+            instance, 'kvm', image_meta, root_bdm, {}, 'virtio')
+        mock_get_info.reset_mock()
+        # Missing device names
+        original_bdm = copy.deepcopy(root_bdm)
+        root_bdm.device_name = ''
+        blockinfo.get_root_info(
+            instance, 'kvm', image_meta, root_bdm, 'virtio', 'scsi',
+            root_device_name='/dev/sda')
+        mock_get_info.assert_called_with(
+            instance, 'kvm', image_meta, mock.ANY, {}, 'virtio')
+        actual_call = mock_get_info.call_args
+        _, _, _, actual_bdm, _, _ = actual_call[0]
+        self.assertEqual(
+            original_bdm.obj_to_primitive(),
+            actual_bdm.obj_to_primitive()
+        )
+
     def test_get_boot_order_simple(self):
         disk_info = {
             'disk_bus': 'virtio',
diff --git a/nova/virt/libvirt/blockinfo.py b/nova/virt/libvirt/blockinfo.py
index 4efc6fbaeb1..4d03dc38ea2 100644
--- a/nova/virt/libvirt/blockinfo.py
+++ b/nova/virt/libvirt/blockinfo.py
@@ -69,6 +69,7 @@
 
 """
 
+import copy
 import itertools
 import operator
 
@@ -444,12 +445,13 @@ def get_root_info(instance, virt_type, image_meta, root_bdm,
                 'dev': block_device.strip_dev(root_device_name),
                 'boot_index': '1'}
 
+    root_bdm_copy = root_bdm
     if not get_device_name(root_bdm) and root_device_name:
-        root_bdm = root_bdm.copy()
-        root_bdm['device_name'] = root_device_name
+        root_bdm_copy = copy.deepcopy(root_bdm)
+        root_bdm_copy['device_name'] = root_device_name
 
     return get_info_from_bdm(
-        instance, virt_type, image_meta, root_bdm, {}, disk_bus,
+        instance, virt_type, image_meta, root_bdm_copy, {}, disk_bus,
     )
 
 

From 462cf03dae0aa906119c70aa46a73b8bb68a49d3 Mon Sep 17 00:00:00 2001
From: Pavlo Shchelokovskyy <shchelokovskyy@gmail.com>
Date: Tue, 20 Feb 2024 19:47:44 +0000
Subject: [PATCH 11/12] Fix device type when booting from ISO image

Closes-Bug: #2054446
Change-Id: Ib4cc34c30a8acf09a1ab6be89d0faa76fb6e705e
(cherry picked from commit 96a5c21f24f6a89411d12677ae5fa75adf26ed61)
(cherry picked from commit a8de6737d22d19fea3d93024fb0cbbe40d67dde9)
(cherry picked from commit b8fcc5fd224cb01728a524594bd08b347f59f7a0)
(cherry picked from commit 2c0463d70e712925254b1e348927633c8c884b5c)
---
 .../tests/unit/virt/libvirt/test_blockinfo.py | 20 ++++++++----
 nova/virt/libvirt/blockinfo.py                | 32 +++++++++++--------
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/nova/tests/unit/virt/libvirt/test_blockinfo.py b/nova/tests/unit/virt/libvirt/test_blockinfo.py
index 4e245093af6..2fa024107d1 100644
--- a/nova/tests/unit/virt/libvirt/test_blockinfo.py
+++ b/nova/tests/unit/virt/libvirt/test_blockinfo.py
@@ -578,7 +578,7 @@ def test_get_disk_mapping_volumes_swap(self):
                                       'disk_bus': u'virtio',
                                       'device_type': u'disk'}]}
         instance_ref.flavor.swap = 5
-        image_meta = {}
+        image_meta = objects.ImageMeta.from_dict(None)
 
         mapping = blockinfo.get_disk_mapping("kvm", instance_ref,
                                              "virtio", "ide",
@@ -840,7 +840,7 @@ def test_get_disk_mapping_blockdev_root(self):
     def test_get_disk_mapping_blockdev_root_on_spawn(self):
         # A disk mapping with a blockdev initializing the default root
         instance_ref = objects.Instance(**self.test_instance)
-        image_meta = {}
+        image_meta = objects.ImageMeta.from_dict(None)
 
         block_device_info = {
             'image': [],
@@ -1287,6 +1287,18 @@ def test_get_root_info_no_bdm_empty_image_meta(self, mock_find_dev):
 
         self.assertEqual('virtio', info['bus'])
 
+    @mock.patch('nova.virt.libvirt.blockinfo.get_info_from_bdm')
+    def test_get_root_info_bdm_with_iso_image(self, mock_get_info):
+        self.test_image_meta['disk_format'] = 'iso'
+        instance = objects.Instance(**self.test_instance)
+        image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
+        init_root_bdm = {'device_type': 'disk'}
+        iso_root_bdm = {'device_type': 'cdrom', 'disk_bus': 'ide'}
+        blockinfo.get_root_info(instance, 'kvm', image_meta, init_root_bdm,
+                                'virtio', 'ide')
+        mock_get_info.assert_called_once_with(instance, 'kvm', image_meta,
+                                              iso_root_bdm, {}, 'virtio')
+
     @mock.patch('nova.virt.libvirt.blockinfo.get_info_from_bdm')
     def test_get_root_info_bdm(self, mock_get_info):
         # call get_root_info() with DriverBlockDevice
@@ -1337,15 +1349,11 @@ def test_get_root_info_bdm_with_deepcopy(self, mock_get_info):
         # No root_device_name
         blockinfo.get_root_info(
             instance, 'kvm', image_meta, root_bdm, 'virtio', 'ide')
-        mock_get_info.assert_called_once_with(
-            instance, 'kvm', image_meta, root_bdm, {}, 'virtio')
         mock_get_info.reset_mock()
         # Both device names
         blockinfo.get_root_info(
             instance, 'kvm', image_meta, root_bdm, 'virtio', 'scsi',
             root_device_name='/dev/sda')
-        mock_get_info.assert_called_once_with(
-            instance, 'kvm', image_meta, root_bdm, {}, 'virtio')
         mock_get_info.reset_mock()
         # Missing device names
         original_bdm = copy.deepcopy(root_bdm)
diff --git a/nova/virt/libvirt/blockinfo.py b/nova/virt/libvirt/blockinfo.py
index 4d03dc38ea2..d55915cec6b 100644
--- a/nova/virt/libvirt/blockinfo.py
+++ b/nova/virt/libvirt/blockinfo.py
@@ -426,28 +426,32 @@ def get_device_name(bdm):
 def get_root_info(instance, virt_type, image_meta, root_bdm,
                   disk_bus, cdrom_bus, root_device_name=None):
 
+    # NOTE(mriedem): In case the image_meta object was constructed from
+    # an empty dict, like in the case of evacuate, we have to first check
+    # if disk_format is set on the ImageMeta object.
+    if is_iso := (image_meta.obj_attr_is_set('disk_format') and
+                  image_meta.disk_format == 'iso'):
+        root_device_bus = cdrom_bus
+        root_device_type = 'cdrom'
+    else:
+        root_device_bus = disk_bus
+        root_device_type = 'disk'
+
     if root_bdm is None:
-        # NOTE(mriedem): In case the image_meta object was constructed from
-        # an empty dict, like in the case of evacuate, we have to first check
-        # if disk_format is set on the ImageMeta object.
-        if (image_meta.obj_attr_is_set('disk_format') and
-                image_meta.disk_format == 'iso'):
-            root_device_bus = cdrom_bus
-            root_device_type = 'cdrom'
-        else:
-            root_device_bus = disk_bus
-            root_device_type = 'disk'
         if not root_device_name:
             root_device_name = find_disk_dev_for_disk_bus({}, root_device_bus)
-
         return {'bus': root_device_bus,
                 'type': root_device_type,
                 'dev': block_device.strip_dev(root_device_name),
                 'boot_index': '1'}
 
-    root_bdm_copy = root_bdm
-    if not get_device_name(root_bdm) and root_device_name:
-        root_bdm_copy = copy.deepcopy(root_bdm)
+    root_bdm_copy = copy.deepcopy(root_bdm)
+
+    if is_iso:
+        root_bdm_copy['disk_bus'] = root_device_bus
+        root_bdm_copy['device_type'] = root_device_type
+
+    if not get_device_name(root_bdm_copy) and root_device_name:
         root_bdm_copy['device_name'] = root_device_name
 
     return get_info_from_bdm(

From 6f8decf0b4f1aa2e96292b6a2ffc28249fe4af5e Mon Sep 17 00:00:00 2001
From: Amit Uniyal <auniyal@redhat.com>
Date: Wed, 18 Sep 2024 23:19:15 +0000
Subject: [PATCH 12/12] Update Nova bdm with updated swap info

This change updates bdm for swap in finish_resize and revert_resize
functionality.

Change also adds supporting:
- unit and functional tests
- docs: releasenote

Closes-Bug: #1552777

Change-Id: Ic8aaa0728a43936cd4c6e1ed590e01ba8f0fbf5b
(cherry picked from commit 15dccaeed3cd0cc5a4283ec66d7347becabb64df)
(cherry picked from commit 3d3ada4bce6d58f10ef9e6bc1021c5adb318edc9)
(cherry picked from commit 39714c24619b449758a1fe3ce141ef39e951adaf)
Signed-off-by: Elod Illes <elod.illes@est.tech>
---
 nova/compute/manager.py                       |  52 +++++++-
 nova/tests/functional/test_servers.py         |  67 ++++++++++
 nova/tests/unit/compute/test_compute_mgr.py   | 117 ++++++++++++++++++
 .../resize-swap-size-1e15e67c436f4b95.yaml    |  10 ++
 4 files changed, 242 insertions(+), 4 deletions(-)
 create mode 100644 releasenotes/notes/resize-swap-size-1e15e67c436f4b95.yaml

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 49d4d434d74..e16a159c5e9 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -4990,6 +4990,50 @@ def _delete_volume_attachments(self, ctxt, bdms):
                               'Error: %s', bdm.attachment_id, str(e),
                               instance_uuid=bdm.instance_uuid)
 
+    def _update_bdm_for_swap_to_finish_resize(
+            self, context, instance, confirm=True):
+        """This updates bdm.swap with new swap info"""
+
+        bdms = instance.get_bdms()
+        if not (instance.old_flavor and instance.new_flavor):
+            return bdms
+
+        if instance.old_flavor.swap == instance.new_flavor.swap:
+            return bdms
+
+        old_swap = instance.old_flavor.swap
+        new_swap = instance.new_flavor.swap
+        if not confirm:
+            # revert flavor on _finish_revert_resize
+            old_swap = instance.new_flavor.swap
+            new_swap = instance.old_flavor.swap
+
+        # add swap
+        if old_swap == 0 and new_swap:
+            # (auniyal)old_swap = 0 means we did not have swap bdm
+            # for this instance.
+            # and as there is a new_swap, its a swap addition
+            new_swap_bdm = block_device.create_blank_bdm(new_swap, 'swap')
+            bdm_obj = objects.BlockDeviceMapping(
+                context, instance_uuid=instance.uuid, **new_swap_bdm)
+            bdm_obj.update_or_create()
+            return instance.get_bdms()
+
+        # update swap
+        for bdm in bdms:
+            if bdm.guest_format == 'swap' and bdm.device_type == 'disk':
+                if new_swap > 0:
+                    LOG.info('Adding swap BDM.', instance=instance)
+                    bdm.volume_size = new_swap
+                    bdm.save()
+                    break
+                elif new_swap == 0:
+                    LOG.info('Deleting swap BDM.', instance=instance)
+                    bdm.destroy()
+                    bdms.objects.remove(bdm)
+                    break
+        return bdms
+
     @wrap_exception()
     @reverts_task_state
     @wrap_instance_event(prefix='compute')
@@ -5327,8 +5371,9 @@ def _finish_revert_resize(
     ):
         """Inner version of finish_revert_resize."""
         with self._error_out_instance_on_exception(context, instance):
-            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
-                context, instance.uuid)
+            bdms = self._update_bdm_for_swap_to_finish_resize(
+                context, instance, confirm=False)
+
             self._notify_about_instance_usage(
                     context, instance, "resize.revert.start")
             compute_utils.notify_about_instance_action(context, instance,
@@ -6265,8 +6310,7 @@ def _finish_resize_helper(self, context, disk_info, image, instance,
         The caller must revert the instance's allocations if the migration
         process failed.
         """
-        bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
-            context, instance.uuid)
+        bdms = self._update_bdm_for_swap_to_finish_resize(context, instance)
 
         with self._error_out_instance_on_exception(context, instance):
             image_meta = objects.ImageMeta.from_dict(image)
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index d6846a84a6b..15f9a718c90 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -3207,6 +3207,73 @@ def test_finish_resize_fails_allocation_cleanup(self):
         self._test_resize_to_same_host_instance_fails(
             '_finish_resize', 'compute_finish_resize')
 
+    def _verify_swap_resize_in_bdm(self, server_id, swap_size):
+        """Verify swap dev in BDM"""
+        ctxt = context.get_admin_context()
+        bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
+            ctxt, server_id)
+        if swap_size != 0:
+            self.assertIn('swap', [bdm.guest_format for bdm in bdms])
+            swaps = [
+                bdm.volume_size for bdm in bdms if bdm.guest_format == 'swap']
+            self.assertEqual(len(swaps), 1)
+            self.assertIn(swap_size, swaps)
+        else:
+            self.assertNotIn('swap', [bdm.guest_format for bdm in bdms])
+
+    def _test_swap_resize(self, swap1, swap2, confirm=True):
+        fl_1 = self._create_flavor(swap=swap1)
+        fl_2 = self._create_flavor(swap=swap2)
+        server = self._create_server(flavor_id=fl_1, networks=[])
+        # before resize
+        self.assertEqual(server['flavor']['swap'], swap1)
+        server = self._resize_server(server, fl_2)
+        self.assertEqual(server['flavor']['swap'], swap2)
+        self._verify_swap_resize_in_bdm(server['id'], swap2)
+
+        if confirm:
+            server = self._confirm_resize(server)
+            # after resize
+            self.assertEqual(server['flavor']['swap'], swap2)
+            # verify block device mapping
+            self._verify_swap_resize_in_bdm(server['id'], swap2)
+        else:
+            server = self._revert_resize(server)
+            # after revert
+            self.assertEqual(server['flavor']['swap'], swap1)
+            # verify block device mapping
+            self._verify_swap_resize_in_bdm(server['id'], swap1)
+
+    def test_swap_expand_0_to_0_confirm(self):
+        self._test_swap_resize(0, 0)
+
+    def test_swap_expand_0_to_1024_confirm(self):
+        self._test_swap_resize(0, 1024)
+
+    def test_swap_expand_0_to_1024_revert(self):
+        self._test_swap_resize(0, 1024, confirm=False)
+
+    def test_swap_expand_1024_to_2048_confirm(self):
+        self._test_swap_resize(1024, 2048)
+
+    def test_swap_expand_1024_to_2048_revert(self):
+        self._test_swap_resize(1024, 2048, confirm=False)
+
+    def test_swap_expand_2048_to_2048_confirm(self):
+        self._test_swap_resize(2048, 2048)
+
+    def test_swap_shrink_1024_to_0_confirm(self):
+        self._test_swap_resize(1024, 0)
+
+    def test_swap_shrink_1024_to_0_revert(self):
+        self._test_swap_resize(1024, 0, confirm=False)
+
+    def test_swap_shrink_2048_to_1024_confirm(self):
+        self._test_swap_resize(2048, 1024)
+
+    def test_swap_shrink_2048_to_1024_revert(self):
+        self._test_swap_resize(2048, 1024, confirm=False)
+
     def _server_created_with_host(self):
         hostname = self.compute1.host
         server_req = self._build_server(
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index b98517721a8..47dd86ea5ed 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -13164,3 +13164,120 @@ def test_update_compute_provider_status_unexpected_error(self, m_exc):
         self.assertIn('An error occurred while updating '
                       'COMPUTE_STATUS_DISABLED trait',
                       m_exc.call_args_list[0][0][0])
+
+
+class ComputeManagerBDMUpdateTestCase(test.TestCase):
+
+    def setUp(self):
+        super(ComputeManagerBDMUpdateTestCase, self).setUp()
+        self.compute = manager.ComputeManager()
+        self.context = context.RequestContext(fakes.FAKE_USER_ID,
+                                              fakes.FAKE_PROJECT_ID)
+        self.instance = mock.Mock()
+
+    @mock.patch('nova.block_device.create_blank_bdm')
+    @mock.patch('nova.objects.BlockDeviceMapping')
+    def test_no_flavor_change(self, mock_bdm_obj, mock_create_bdm):
+        self.instance.get_bdms.return_value = []
+        self.instance.old_flavor = None
+        self.instance.new_flavor = None
+
+        bdms = self.compute._update_bdm_for_swap_to_finish_resize(
+            self.context, self.instance)
+
+        self.assertEqual(bdms, [])
+        self.instance.get_bdms.assert_called_once()
+
+    @mock.patch('nova.block_device.create_blank_bdm')
+    @mock.patch('nova.objects.BlockDeviceMapping.create')
+    def test_no_swap_change(self, mock_bdm_obj, mock_create_bdm):
+        self.instance.old_flavor = mock.Mock(swap=1024)
+        self.instance.new_flavor = mock.Mock(swap=1024)
+
+        existing_swap_bdm = objects.BlockDeviceMapping(
+            guest_format='swap',
+            device_type='disk',
+            volume_size=1024)
+
+        bdms = objects.BlockDeviceMappingList(objects=[existing_swap_bdm])
+
+        self.instance.get_bdms.return_value = bdms
+
+        new_bdms = self.compute._update_bdm_for_swap_to_finish_resize(
+            self.context, self.instance)
+
+        self.assertEqual(new_bdms, bdms)
+        self.instance.get_bdms.assert_called_once()
+
+    @mock.patch('nova.block_device.create_blank_bdm')
+    @mock.patch('nova.objects.BlockDeviceMapping')
+    def test_add_new_swap_bdm(self, mock_bdm_obj, mock_create_bdm):
+        self.instance.old_flavor = mock.Mock(swap=0)
+        self.instance.new_flavor = mock.Mock(swap=1024)
+
+        self.instance.get_bdms.return_value = []
+
+        new_swap_bdm = {
+            'guest_format': 'swap',
+            'device_type': 'disk',
+            'volume_size': 1024}
+        mock_create_bdm.return_value = new_swap_bdm
+        mock_bdm_instance = mock_bdm_obj.return_value
+
+        self.compute._update_bdm_for_swap_to_finish_resize(
+            self.context, self.instance)
+
+        mock_bdm_obj.assert_called_once_with(
+            self.context, instance_uuid=self.instance.uuid, **new_swap_bdm
+        )
+        mock_bdm_instance.update_or_create.assert_called_once()
+        # called twice
+        self.assertEqual(self.instance.get_bdms.call_count, 2)
+
+    @mock.patch('nova.block_device.create_blank_bdm')
+    @mock.patch('nova.objects.BlockDeviceMapping.create')
+    @mock.patch('nova.objects.BlockDeviceMapping.save')
+    def test_update_swap_bdm(
+            self, mock_bdm_save, mock_bdm_create,
+            mock_create_blank_bdm):
+        self.instance.old_flavor = mock.Mock(swap=1024)  # Existing swap size
+        self.instance.new_flavor = mock.Mock(swap=2048)  # New swap size
+
+        existing_swap_bdm = objects.BlockDeviceMapping(
+            guest_format='swap',
+            device_type='disk',
+            volume_size=1024)
+
+        bdms = objects.BlockDeviceMappingList(objects=[existing_swap_bdm])
+
+        self.instance.get_bdms.return_value = bdms
+
+        self.compute._update_bdm_for_swap_to_finish_resize(
+            self.context, self.instance)
+
+        # assert bdm get saved in DB
+        existing_swap_bdm.save.assert_called_once()
+        # here we are returning same bdms object, not freh from DB
+        self.assertEqual(existing_swap_bdm.volume_size, 2048)
+        # get_bdms is called only once
+        self.assertEqual(self.instance.get_bdms.call_count, 1)
+
+    @mock.patch('nova.block_device.create_blank_bdm')
+    @mock.patch('nova.objects.BlockDeviceMapping.destroy')
+    def test_delete_swap_bdm(self, mock_bdm_destroy, mock_create_bdm):
+        self.instance.old_flavor = mock.Mock(swap=1024)
+        self.instance.new_flavor = mock.Mock(swap=0)
+
+        existing_swap_bdm = objects.BlockDeviceMapping(
+            guest_format='swap',
+            device_type='disk',
+            volume_size=1024)
+
+        self.instance.get_bdms.return_value = objects.BlockDeviceMappingList(
+            objects=[existing_swap_bdm])
+
+        self.compute._update_bdm_for_swap_to_finish_resize(
+            self.context, self.instance)
+
+        mock_bdm_destroy.assert_called_once()
+        self.instance.get_bdms.assert_called_once()
diff --git a/releasenotes/notes/resize-swap-size-1e15e67c436f4b95.yaml b/releasenotes/notes/resize-swap-size-1e15e67c436f4b95.yaml
new file mode 100644
index 00000000000..d8fba0fb02d
--- /dev/null
+++ b/releasenotes/notes/resize-swap-size-1e15e67c436f4b95.yaml
@@ -0,0 +1,10 @@
+---
+
+fixes:
+  - |
+    With this change, operators can now resize the instance flavor swap to
+    a smaller swap size, it can be expand and shrunk down to 0 using the same
+    resize API.
+    For more details see: `bug 1552777`_
+
+    .. _`bug 1552777`: https://bugs.launchpad.net/nova/+bug/1552777