Skip to content

Commit

Permalink
Check hosts have no instances for AZ rename
Browse files Browse the repository at this point in the history
Update aggregate and update aggregate metadata API calls have the
ability to update availability zone name for the aggregate. If the
aggregate is not empty (has hosts with instances on it)
the update leads to discrepancy for objects saving availability zone as a
string but not reference.

From devstack DB they are:
- cinder.backups.availability_zone
- cinder.consistencygroups.availability_zone
- cinder.groups.availability_zone
- cinder.services.availability_zone
- cinder.volumes.availability_zone
- neutron.agents.availability_zone
- neutron.networks.availability_zone_hints
- neutron.router_extra_attributes.availability_zone_hints
- nova.dns_domains.availability_zone
- nova.instances.availability_zone
- nova.volume_usage_cache.availability_zone
- nova.shadow_dns_domains.availability_zone
- nova.shadow_instances.availability_zone
- nova.shadow_volume_usage_cache.availability_zone

Why that's bad?
First, API and Horizon show different values for host and instance for
example. Second, migration for instances with changed availability
zone fails with "No valid host found" for old AZ.

This change adds an additional check to aggregate an Update Aggregate API call.
With the check, it's not possible to rename AZ if the corresponding
aggregate has instances in any hosts.

PUT /os-aggregates/{aggregate_id} and
POST /os-aggregates/{aggregate_id}/action return HTTP 400 for
availability zone renaming if the hosts of the aggregate have any instances.
It's similar to conflicting AZ names error already available.

Change-Id: Ic27195e46502067c87ee9c71a811a3ca3f610b73
Closes-Bug: #1378904
  • Loading branch information
amadev authored and mriedem committed Mar 1, 2019
1 parent d5bde60 commit 8e19ef4
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 4 deletions.
25 changes: 21 additions & 4 deletions nova/compute/api.py
Expand Up @@ -5267,7 +5267,8 @@ def update_aggregate(self, context, aggregate_id, values):
aggregate.name = values.pop('name')
aggregate.save()
self.is_safe_to_update_az(context, values, aggregate=aggregate,
action_name=AGGREGATE_ACTION_UPDATE)
action_name=AGGREGATE_ACTION_UPDATE,
check_no_instances_in_az=True)
if values:
aggregate.update_metadata(values)
aggregate.updated_at = timeutils.utcnow()
Expand All @@ -5283,7 +5284,8 @@ def update_aggregate_metadata(self, context, aggregate_id, metadata):
"""Updates the aggregate metadata."""
aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
self.is_safe_to_update_az(context, metadata, aggregate=aggregate,
action_name=AGGREGATE_ACTION_UPDATE_META)
action_name=AGGREGATE_ACTION_UPDATE_META,
check_no_instances_in_az=True)
aggregate.update_metadata(metadata)
self.query_client.update_aggregates(context, [aggregate])
# If updated metadata include availability_zones, then the cache
Expand Down Expand Up @@ -5325,15 +5327,18 @@ def delete_aggregate(self, context, aggregate_id):

def is_safe_to_update_az(self, context, metadata, aggregate,
hosts=None,
action_name=AGGREGATE_ACTION_ADD):
action_name=AGGREGATE_ACTION_ADD,
check_no_instances_in_az=False):
"""Determine if updates alter an aggregate's availability zone.
:param context: local context
:param metadata: Target metadata for updating aggregate
:param aggregate: Aggregate to update
:param hosts: Hosts to check. If None, aggregate.hosts is used
:type hosts: list
:action_name: Calling method for logging purposes
:param action_name: Calling method for logging purposes
:param check_no_instances_in_az: if True, it checks
there is no instances on any hosts of the aggregate
"""
if 'availability_zone' in metadata:
Expand All @@ -5354,6 +5359,18 @@ def is_safe_to_update_az(self, context, metadata, aggregate,
"%s") % conflicting_azs
self._raise_invalid_aggregate_exc(action_name, aggregate.id,
msg)
same_az_name = (aggregate.availability_zone ==
metadata['availability_zone'])
if check_no_instances_in_az and not same_az_name:
instance_count_by_cell = (
nova_context.scatter_gather_skip_cell0(
context,
objects.InstanceList.get_count_by_hosts,
_hosts))
if any(cnt for cnt in instance_count_by_cell.values()):
msg = _("One or more hosts contain instances in this zone")
self._raise_invalid_aggregate_exc(
action_name, aggregate.id, msg)

def _raise_invalid_aggregate_exc(self, action_name, aggregate_id, reason):
if action_name == AGGREGATE_ACTION_ADD:
Expand Down
11 changes: 11 additions & 0 deletions nova/objects/instance.py
Expand Up @@ -1570,3 +1570,14 @@ def get_counts(cls, context, project_id, user_id=None):
'ram': <count across user>}}
"""
return cls._get_counts_in_db(context, project_id, user_id=user_id)

@staticmethod
@db_api.pick_context_manager_reader
def _get_count_by_hosts(context, hosts):
return context.session.query(models.Instance).\
filter_by(deleted=0).\
filter(models.Instance.host.in_(hosts)).count()

@classmethod
def get_count_by_hosts(cls, context, hosts):
return cls._get_count_by_hosts(context, hosts)
14 changes: 14 additions & 0 deletions nova/tests/functional/db/test_instance.py
Expand Up @@ -122,3 +122,17 @@ def test_populate_missing_availability_zones(self):
self.assertEqual(1, count_hit)
inst3 = objects.Instance.get_by_uuid(self.context, uuid3)
self.assertEqual('nova-test', inst3.availability_zone)

def test_get_count_by_hosts(self):
self._create_instance(host='fake_host1')
self._create_instance(host='fake_host1')
self._create_instance(host='fake_host2')
count = objects.InstanceList.get_count_by_hosts(
self.context, hosts=['fake_host1'])
self.assertEqual(2, count)
count = objects.InstanceList.get_count_by_hosts(
self.context, hosts=['fake_host2'])
self.assertEqual(1, count)
count = objects.InstanceList.get_count_by_hosts(
self.context, hosts=['fake_host1', 'fake_host2'])
self.assertEqual(3, count)
98 changes: 98 additions & 0 deletions nova/tests/functional/test_aggregates.py
Expand Up @@ -21,6 +21,7 @@
from nova.scheduler import weights
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
import nova.tests.unit.image.fake
Expand Down Expand Up @@ -191,6 +192,22 @@ def _set_az_aggregate(self, agg, az):
}
self.admin_api.post_aggregate_action(agg['id'], action)

def _set_metadata(self, agg, metadata):
"""POST /os-aggregates/{aggregate_id}/action (set_metadata)
:param agg: Name of the nova aggregate
:param metadata: dict of aggregate metadata key/value pairs to add,
update, or remove if value=None (note "availability_zone" cannot be
nulled out once set).
"""
agg = self.aggregates[agg]
action = {
'set_metadata': {
'metadata': metadata
},
}
self.admin_api.post_aggregate_action(agg['id'], action)

def _grant_tenant_aggregate(self, agg, tenants):
"""Grant a set of tenants access to use an aggregate.
Expand All @@ -209,6 +226,87 @@ def _grant_tenant_aggregate(self, agg, tenants):
self.admin_api.post_aggregate_action(agg['id'], action)


class AggregatePostTest(AggregateRequestFiltersTest):

def test_set_az_for_aggreate_no_instances(self):
"""Should be possible to update AZ for an empty aggregate.
Check you can change the AZ name of an aggregate when it does
not contain any servers.
"""
self._set_az_aggregate('only-host1', 'fake-az')

def test_rename_to_same_az(self):
"""AZ rename should pass successfully if AZ name is not changed"""
az = 'fake-az'
self._set_az_aggregate('only-host1', az)
self._boot_server(az=az)
self._set_az_aggregate('only-host1', az)

def test_fail_set_az(self):
"""Check it is not possible to update a non-empty aggregate.
Check you cannot change the AZ name of an aggregate when it
contains any servers.
"""
az = 'fake-az'
self._set_az_aggregate('only-host1', az)
server = self._boot_server(az=az)
self.assertRaisesRegex(
client.OpenStackApiException,
'One or more hosts contain instances in this zone.',
self._set_az_aggregate, 'only-host1', 'new' + az)

# Configure for the SOFT_DELETED scenario.
self.flags(reclaim_instance_interval=300)
self.api.delete_server(server['id'])
server = self._wait_for_state_change(server, from_status='ACTIVE')
self.assertEqual('SOFT_DELETED', server['status'])
self.assertRaisesRegex(
client.OpenStackApiException,
'One or more hosts contain instances in this zone.',
self._set_az_aggregate, 'only-host1', 'new' + az)
# Force delete the SOFT_DELETED server.
self.api.api_post(
'/servers/%s/action' % server['id'], {'forceDelete': None})
# Wait for it to be deleted since forceDelete is asynchronous.
self._wait_until_deleted(server)
# Now we can rename the AZ since the server is gone.
self._set_az_aggregate('only-host1', 'new' + az)

def test_cannot_delete_az(self):
az = 'fake-az'
# Assign the AZ to the aggregate.
self._set_az_aggregate('only-host1', az)
# Set some metadata on the aggregate; note the "availability_zone"
# metadata key is not specified.
self._set_metadata('only-host1', {'foo': 'bar'})
# Verify the AZ was retained.
agg = self.admin_api.api_get(
'/os-aggregates/%s' %
self.aggregates['only-host1']['id']).body['aggregate']
self.assertEqual(az, agg['availability_zone'])


# NOTE: this test case has the same test methods as AggregatePostTest
# but for the AZ update it uses PUT /os-aggregates/{aggregate_id} method
class AggregatePutTest(AggregatePostTest):

def _set_az_aggregate(self, agg, az):
"""Set the availability_zone of an aggregate via PUT
:param agg: Name of the nova aggregate
:param az: Availability zone name
"""
agg = self.aggregates[agg]
body = {
'aggregate': {
'availability_zone': az,
},
}
self.admin_api.put_aggregate(agg['id'], body)


class TenantAggregateFilterTest(AggregateRequestFiltersTest):
def setUp(self):
super(TenantAggregateFilterTest, self).setUp()
Expand Down
@@ -0,0 +1,7 @@
---
fixes:
- |
``PUT /os-aggregates/{aggregate_id}`` and
``POST /os-aggregates/{aggregate_id}/action`` (for set_metadata action) will
now return HTTP 400 for availability zone renaming if the hosts of
the aggregate have any instances.

0 comments on commit 8e19ef4

Please sign in to comment.