From 608105a7c508ed48055dca6e2d327c944ecb18ea Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Thu, 8 Dec 2016 12:09:59 -0500 Subject: [PATCH] Provide an online data migration to cleanup orphaned build requests This exhibits the failure reported in bug 1633734 when upgrading from mitaka to newton with some bad build request records that weren't cleaned up, and were created before API DB migration 013_build_request_extended_attrs when we didn't have the instance_uuid or instance records in the database. After 013_build_request_extended_attrs and the object change to BuildRequest in a5d3b57c3d4fb785c5f5eebf2559e495595a6b34 if we try loading up a 'dirty' build request DB record without the instance_uuid it fails with a ValueError, as shown in the functional test in this change. This also provides an online data migration (which will be backported to Newton for upgrades from Mitaka) that will query the API DB for build requests where instance_uuid=NULL and delete them. Change-Id: I8a05ee01ec7f6a6f88b896f78414fb5487e0071e Related-Bug: #1633734 (cherry picked from commit ab05b90295c64ff7e6925591dd87a467ef85c00f) --- nova/cmd/manage.py | 5 ++ nova/objects/build_request.py | 50 +++++++++++ .../tests/functional/db/test_build_request.py | 82 +++++++++++++++++++ 3 files changed, 137 insertions(+) diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 83e52393510..d21b3d9243e 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -81,6 +81,7 @@ from nova.i18n import _ from nova import objects from nova.objects import aggregate as aggregate_obj +from nova.objects import build_request as build_request_obj from nova.objects import flavor as flavor_obj from nova.objects import instance as instance_obj from nova.objects import instance_group as instance_group_obj @@ -793,6 +794,10 @@ class DbCommands(object): aggregate_obj.migrate_aggregates, aggregate_obj.migrate_aggregate_reset_autoincrement, instance_group_obj.migrate_instance_groups_to_api_db, + # Added in Ocata + # NOTE(mriedem): This online migration is going to be backported to + # Newton also since it's an upgrade issue when upgrading from Mitaka. + build_request_obj.delete_build_requests_with_no_instance_uuid, ) def __init__(self): diff --git a/nova/objects/build_request.py b/nova/objects/build_request.py index 37cb113ab4f..e0f2c6a81a3 100644 --- a/nova/objects/build_request.py +++ b/nova/objects/build_request.py @@ -19,6 +19,7 @@ from oslo_utils import versionutils from oslo_versionedobjects import exception as ovoo_exc import six +from sqlalchemy.sql import null from nova.db.sqlalchemy import api as db from nova.db.sqlalchemy import api_models @@ -391,3 +392,52 @@ def get_by_filters(cls, context, filters, limit=None, marker=None, return cls(context, objects=sorted_build_reqs[marker_index:limit_index]) + + +@db.api_context_manager.reader +def _get_build_requests_with_no_instance_uuid(context, limit): + """Returns up to $limit build_requests where instance_uuid is null""" + # build_requests don't use the SoftDeleteMixin so we don't have to filter + # on the deleted column. + return context.session.query(api_models.BuildRequest).\ + filter_by(instance_uuid=null()).\ + limit(limit).\ + all() + + +@db.api_context_manager.writer +def _destroy_in_db(context, id): + return context.session.query(api_models.BuildRequest).filter_by( + id=id).delete() + + +def delete_build_requests_with_no_instance_uuid(context, count): + """Online data migration which cleans up failed build requests from Mitaka + + build_requests were initially a mirror of instances and had similar fields + to satisfy listing/showing instances while they were building. In Mitaka + if an instance failed to build we'd delete the instance but didn't delete + the associated BuildRequest. In the Newton release we changed the schema + on the build_requests table to just store a serialized Instance object and + added an instance_uuid field which is expected to not be None as seen how + it's used in _from_db_object. However, failed build requests created before + that schema migration won't have the instance_uuid set and fail to load + as an object when calling BuildRequestList.get_all(). So we need to perform + a cleanup routine here where we search for build requests which do not have + the instance_uuid field set and delete them. + + :param context: The auth context used to query the database. + :type context: nova.context.RequestContext + :param count: The max number of build requests to delete. + :type count: int + :returns: 2-item tuple of + (number of orphaned build requests read from DB, number deleted) + """ + orphaned_build_requests = ( + _get_build_requests_with_no_instance_uuid(context, count)) + done = 0 + for orphan_buildreq in orphaned_build_requests: + result = _destroy_in_db(context, orphan_buildreq.id) + if result: + done += 1 + return len(orphaned_build_requests), done diff --git a/nova/tests/functional/db/test_build_request.py b/nova/tests/functional/db/test_build_request.py index a22192fa98d..c3a60f0e1bf 100644 --- a/nova/tests/functional/db/test_build_request.py +++ b/nova/tests/functional/db/test_build_request.py @@ -10,8 +10,11 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime + from oslo_serialization import jsonutils from oslo_utils import uuidutils +import six from nova import context from nova import exception @@ -103,6 +106,85 @@ def test_save_not_found(self): db_req.destroy() self.assertRaises(exception.BuildRequestNotFound, db_req.save) + def _get_mitaka_db_build_request_no_instance_uuid(self): + fake_info_cache = objects.InstanceInfoCache(network_model=[]) + fake_secgroups = objects.SecurityGroupList() + # This is more or less taken straight from bug 1633734. + db_req = { + 'created_at': datetime.datetime(2016, 8, 2, 20, 26, 20), + 'updated_at': None, + 'project_id': self.context.project_id, + 'user_id': self.context.user_id, + 'display_name': 'admin-auth', + 'instance_metadata': None, + 'progress': 0, + 'vm_state': 'building', + 'task_state': 'scheduling', + 'image_ref': None, + 'access_ip_v4': None, + 'access_ip_v6': None, + 'info_cache': jsonutils.dumps(fake_info_cache.obj_to_primitive()), + 'security_groups': + jsonutils.dumps(fake_secgroups.obj_to_primitive()), + 'config_drive': 1, + 'key_name': 'Turbo_Fredriksson', + 'locked_by': None, + 'instance_uuid': None, + 'instance': None, + 'block_device_mappings': None, + } + return db_req + + def test_load_from_broken_mitaka_build_request_with_no_instance(self): + db_req = self._get_mitaka_db_build_request_no_instance_uuid() + db_req = self.build_req_obj._create_in_db(self.context, db_req) + # This should fail because the build request in the database does not + # have instance_uuid set, and BuildRequest.instance_uuid is not + # nullable so trying to set build_request.instance_uuid = None is going + # to raise the ValueError. + ex = self.assertRaises(ValueError, + build_request.BuildRequest._from_db_object, + self.context, self.build_req_obj, db_req) + self.assertIn('instance_uuid', six.text_type(ex)) + + def test_delete_build_requests_with_no_instance_uuid(self): + """Tests the online data migration used to cleanup failed Mitaka-era + build requests that have no instance_uuid set. + """ + # First let's create 2 of these busted build requests so we can test + # paging. + for x in range(2): + db_req = self._get_mitaka_db_build_request_no_instance_uuid() + self.build_req_obj._create_in_db(self.context, db_req) + # nova-manage uses an admin contet + ctxt = context.get_admin_context() + + # Make sure we can get 0 back without deleting any. + total, deleted = ( + build_request.delete_build_requests_with_no_instance_uuid(ctxt, 0)) + self.assertEqual(0, total) + self.assertEqual(0, deleted) + + # Delete only 1. + total, deleted = ( + build_request.delete_build_requests_with_no_instance_uuid(ctxt, 1)) + self.assertEqual(1, total) + self.assertEqual(1, deleted) + + # Delete 50 (default in nova-manage online_data_migrations). + total, deleted = ( + build_request.delete_build_requests_with_no_instance_uuid( + ctxt, 50)) + self.assertEqual(1, total) + self.assertEqual(1, deleted) + + # Do it again, nothing should come back. + total, deleted = ( + build_request.delete_build_requests_with_no_instance_uuid( + ctxt, 50)) + self.assertEqual(0, total) + self.assertEqual(0, deleted) + class BuildRequestListTestCase(test.NoDBTestCase): USES_DB_SELF = True