Skip to content

Commit

Permalink
Introduce API for aborting introspection
Browse files Browse the repository at this point in the history
It is not currently possible to stop a running introspection.  This
may be annoying for the operator, considering the amount of time it
takes for the bare metal node to call the continue API request,
especially knowing the introspection will fail/time-out eventually
(such as when debugging).

This patch introduces a REST API endpoint "POST
/v1/introspection/<node-UUID>/abort" in order to fill the gap.  Upon the
abort method call, following preconditions are checked:

* there's a bare metal node matching the UUID
* introspection was not finished for the node
* introspection process is waiting for the node to give the continue call

Following Responses are returned to the caller in case the
preconditions were not met:

* 404 Response in case node wasn't found
* 409 Response (resource busy) in case the introspection process is not
      waiting for the Continue call

Otherwise, a 202 Response is returned.

When the abort method is processed, the node is powered off and it is
black-listed in inspector's firewall to prevent it from booting the
introspection image. This happens asynchronously.

To prevent interference with the continue call processing, the
processing method was updated to give a 400 Response to the
introspection client in case continuing a finished or canceled
introspection.

Limitations:
* IMPI credentials are never updated in case introspection was canceled
* 202 response is returned even if the introspection was already finished
* the endpoint differs from requested "DELETE
  /v1/introspection/<node-UUID>"

Links:
[1] https://bugs.launchpad.net/ironic-inspector/+bug/1525235

Change-Id: If043171f0d292ae2775dc1f26233dd4911599247
Closes-Bug: #1525235
  • Loading branch information
dparalen committed Feb 9, 2016
1 parent dd369ea commit 7a3d937
Show file tree
Hide file tree
Showing 9 changed files with 283 additions and 2 deletions.
21 changes: 20 additions & 1 deletion doc/source/http-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,25 @@ Response body: JSON dictionary with keys:

* ``finished`` (boolean) whether introspection is finished
(``true`` on introspection completion or if it ends because of an error)
* ``error`` error string or ``null``
* ``error`` error string or ``null``; ``Canceled by operator`` in
case introspection was aborted


Abort Running Introspection
~~~~~~~~~~~~~~~~~~~~~~~~~~~

``POST /v1/introspection/<UUID>/abort`` abort running introspection.

Requires X-Auth-Token header with Keystone token for authentication.

Response:

* 202 - accepted
* 400 - bad request
* 401, 403 - missing or invalid authentication
* 404 - node cannot be found
* 409 - inspector has locked this node for processing


Get Introspection Data
~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -295,3 +313,4 @@ Version History
* **1.0** version of API at the moment of introducing versioning.
* **1.1** adds endpoint to retrieve stored introspection data.
* **1.2** endpoints for manipulating introspection rules.
* **1.3** endpoint for canceling running introspection
51 changes: 51 additions & 0 deletions ironic_inspector/introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,54 @@ def _background_introspect_locked(ironic, node_info):
LOG.info(_LI('Introspection environment is ready, manual power on is '
'required within %d seconds'), CONF.timeout,
node_info=node_info)


def abort(uuid, token=None):
"""Abort running introspection.
:param uuid: node uuid
:param token: authentication token
:raises: Error
"""
LOG.debug('Aborting introspection for node %s', uuid)
ironic = utils.get_client(token)
node_info = node_cache.get_node(uuid, ironic=ironic, locked=False)

# check pending operations
locked = node_info.acquire_lock(blocking=False)
if not locked:
# Node busy --- cannot abort atm
raise utils.Error(_('Node is locked, please, retry later'),
node_info=node_info, code=409)

utils.spawn_n(_abort, node_info, ironic)


def _abort(node_info, ironic):
# runs in background
if node_info.finished_at is not None:
# introspection already finished; nothing to do
LOG.info(_LI('Cannot abort introspection as it is already '
'finished'), node_info=node_info)
node_info.release_lock()
return

# block this node from PXE Booting the introspection image
try:
firewall.update_filters(ironic)
except Exception as exc:
# Note(mkovacik): this will be retried in firewall update
# periodic task; we continue aborting
LOG.warning(_LW('Failed to update firewall filters: %s'), exc,
node_info=node_info)

# finish the introspection
LOG.debug('Forcing power-off', node_info=node_info)
try:
ironic.node.set_power_state(node_info.uuid, 'off')
except Exception as exc:
LOG.warning(_LW('Failed to power off node: %s'), exc,
node_info=node_info)

node_info.finished(error=_('Canceled by operator'))
LOG.info(_LI('Introspection aborted'), node_info=node_info)
14 changes: 13 additions & 1 deletion ironic_inspector/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
LOG = utils.getProcessingLogger(__name__)

MINIMUM_API_VERSION = (1, 0)
CURRENT_API_VERSION = (1, 2)
CURRENT_API_VERSION = (1, 3)
_MIN_VERSION_HEADER = 'X-OpenStack-Ironic-Inspector-API-Minimum-Version'
_MAX_VERSION_HEADER = 'X-OpenStack-Ironic-Inspector-API-Maximum-Version'
_VERSION_HEADER = 'X-OpenStack-Ironic-Inspector-API-Version'
Expand Down Expand Up @@ -209,6 +209,18 @@ def api_introspection(uuid):
error=node_info.error or None)


@app.route('/v1/introspection/<uuid>/abort', methods=['POST'])
@convert_exceptions
def api_introspection_abort(uuid):
utils.check_auth(flask.request)

if not uuidutils.is_uuid_like(uuid):
raise utils.Error(_('Invalid UUID value'), code=400)

introspect.abort(uuid, token=flask.request.headers.get('X-Auth-Token'))
return '', 202


@app.route('/v1/introspection/<uuid>/data', methods=['GET'])
@convert_exceptions
def api_introspection_data(uuid):
Expand Down
6 changes: 6 additions & 0 deletions ironic_inspector/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ def process(introspection_data):
LOG.info(_LI('Matching node is %s'), node_info.uuid,
node_info=node_info, data=introspection_data)

if node_info.finished_at is not None:
# race condition or introspection canceled
raise utils.Error(_('Node processing already finished with '
'error: %s') % node_info.error,
node_info=node_info, code=400)

try:
node = node_info.node()
except exceptions.NotFound:
Expand Down
27 changes: 27 additions & 0 deletions ironic_inspector/test/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ def call_introspect(self, uuid, new_ipmi_username=None,
def call_get_status(self, uuid):
return self.call('get', '/v1/introspection/%s' % uuid).json()

def call_abort_introspect(self, uuid):
return self.call('post', '/v1/introspection/%s/abort' % uuid)

def call_continue(self, data):
return self.call('post', '/v1/continue', data=data).json()

Expand Down Expand Up @@ -361,6 +364,30 @@ def test_root_device_hints(self):
status = self.call_get_status(self.uuid)
self.assertEqual({'finished': True, 'error': None}, status)

def test_abort_introspection(self):
self.call_introspect(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
self.cli.node.set_power_state.assert_called_once_with(self.uuid,
'reboot')
status = self.call_get_status(self.uuid)
self.assertEqual({'finished': False, 'error': None}, status)

res = self.call_abort_introspect(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)

self.assertEqual(res.status_code, 202)
status = self.call_get_status(self.uuid)
self.assertTrue(status['finished'])
self.assertEqual('Canceled by operator', status['error'])

# Note(mkovacik): we're checking just this doesn't pass OK as
# there might be either a race condition (hard to test) that
# yields a 'Node already finished.' or an attribute-based
# look-up error from some pre-processing hooks because
# node_info.finished() deletes the look-up attributes only
# after releasing the node lock
self.call('post', '/v1/continue', self.data, expect_error=400)


@contextlib.contextmanager
def mocked_server():
Expand Down
106 changes: 106 additions & 0 deletions ironic_inspector/test/test_introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,109 @@ def test_wrong_state(self, client_mock, add_mock, filters_mock):

self.assertRaises(utils.Error, introspect.introspect, self.uuid,
new_ipmi_credentials=self.new_creds)


@mock.patch.object(utils, 'spawn_n',
lambda f, *a, **kw: f(*a, **kw) and None)
@mock.patch.object(firewall, 'update_filters', autospec=True)
@mock.patch.object(node_cache, 'get_node', autospec=True)
@mock.patch.object(utils, 'get_client', autospec=True)
class TestAbort(BaseTest):
def setUp(self):
super(TestAbort, self).setUp()
self.node_info.started_at = None
self.node_info.finished_at = None

def test_ok(self, client_mock, get_mock, filters_mock):
cli = self._prepare(client_mock)
get_mock.return_value = self.node_info
self.node_info.acquire_lock.return_value = True
self.node_info.started_at = time.time()
self.node_info.finished_at = None

introspect.abort(self.node.uuid)

get_mock.assert_called_once_with(self.uuid, ironic=cli,
locked=False)
self.node_info.acquire_lock.assert_called_once_with(blocking=False)
filters_mock.assert_called_once_with(cli)
cli.node.set_power_state.assert_called_once_with(self.uuid, 'off')
self.node_info.finished.assert_called_once_with(error='Canceled '
'by operator')

def test_node_not_found(self, client_mock, get_mock, filters_mock):
cli = self._prepare(client_mock)
exc = utils.Error('Not found.', code=404)
get_mock.side_effect = iter([exc])

self.assertRaisesRegexp(utils.Error, str(exc),
introspect.abort, self.uuid)

self.assertEqual(0, filters_mock.call_count)
self.assertEqual(0, cli.node.set_power_state.call_count)
self.assertEqual(0, self.node_info.finished.call_count)

def test_node_locked(self, client_mock, get_mock, filters_mock):
cli = self._prepare(client_mock)
get_mock.return_value = self.node_info
self.node_info.acquire_lock.return_value = False
self.node_info.started_at = time.time()

self.assertRaisesRegexp(utils.Error, 'Node is locked, please, '
'retry later', introspect.abort, self.uuid)

self.assertEqual(0, filters_mock.call_count)
self.assertEqual(0, cli.node.set_power_state.call_count)
self.assertEqual(0, self.node_info.finshed.call_count)

def test_introspection_already_finished(self, client_mock,
get_mock, filters_mock):
cli = self._prepare(client_mock)
get_mock.return_value = self.node_info
self.node_info.acquire_lock.return_value = True
self.node_info.started_at = time.time()
self.node_info.finished_at = time.time()

introspect.abort(self.uuid)

self.assertEqual(0, filters_mock.call_count)
self.assertEqual(0, cli.node.set_power_state.call_count)
self.assertEqual(0, self.node_info.finshed.call_count)

def test_firewall_update_exception(self, client_mock, get_mock,
filters_mock):
cli = self._prepare(client_mock)
get_mock.return_value = self.node_info
self.node_info.acquire_lock.return_value = True
self.node_info.started_at = time.time()
self.node_info.finished_at = None
filters_mock.side_effect = iter([Exception('Boom')])

introspect.abort(self.uuid)

get_mock.assert_called_once_with(self.uuid, ironic=cli,
locked=False)
self.node_info.acquire_lock.assert_called_once_with(blocking=False)
filters_mock.assert_called_once_with(cli)
cli.node.set_power_state.assert_called_once_with(self.uuid, 'off')
self.node_info.finished.assert_called_once_with(error='Canceled '
'by operator')

def test_node_power_off_exception(self, client_mock, get_mock,
filters_mock):
cli = self._prepare(client_mock)
get_mock.return_value = self.node_info
self.node_info.acquire_lock.return_value = True
self.node_info.started_at = time.time()
self.node_info.finished_at = None
cli.node.set_power_state.side_effect = iter([Exception('BadaBoom')])

introspect.abort(self.uuid)

get_mock.assert_called_once_with(self.uuid, ironic=cli,
locked=False)
self.node_info.acquire_lock.assert_called_once_with(blocking=False)
filters_mock.assert_called_once_with(cli)
cli.node.set_power_state.assert_called_once_with(self.uuid, 'off')
self.node_info.finished.assert_called_once_with(error='Canceled '
'by operator')
44 changes: 44 additions & 0 deletions ironic_inspector/test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,50 @@ def test_continue_wrong_type(self, process_mock):
self.assertFalse(process_mock.called)


@mock.patch.object(introspect, 'abort', autospec=True)
class TestApiAbort(BaseAPITest):
def test_ok(self, abort_mock):
abort_mock.return_value = '', 202

res = self.app.post('/v1/introspection/%s/abort' % self.uuid,
headers={'X-Auth-Token': 'token'})

abort_mock.assert_called_once_with(self.uuid, token='token')
self.assertEqual(202, res.status_code)
self.assertEqual(b'', res.data)

def test_no_authentication(self, abort_mock):
abort_mock.return_value = b'', 202

res = self.app.post('/v1/introspection/%s/abort' % self.uuid)

abort_mock.assert_called_once_with(self.uuid, token=None)
self.assertEqual(202, res.status_code)
self.assertEqual(b'', res.data)

def test_node_not_found(self, abort_mock):
exc = utils.Error("Not Found.", code=404)
abort_mock.side_effect = iter([exc])

res = self.app.post('/v1/introspection/%s/abort' % self.uuid)

abort_mock.assert_called_once_with(self.uuid, token=None)
self.assertEqual(404, res.status_code)
data = json.loads(str(res.data.decode()))
self.assertEqual(str(exc), data['error']['message'])

def test_abort_failed(self, abort_mock):
exc = utils.Error("Locked.", code=409)
abort_mock.side_effect = iter([exc])

res = self.app.post('/v1/introspection/%s/abort' % self.uuid)

abort_mock.assert_called_once_with(self.uuid, token=None)
self.assertEqual(409, res.status_code)
data = json.loads(res.data.decode())
self.assertEqual(str(exc), data['error']['message'])


class TestApiGetStatus(BaseAPITest):
@mock.patch.object(node_cache, 'get_node', autospec=True)
def test_get_introspection_in_progress(self, get_mock):
Expand Down
12 changes: 12 additions & 0 deletions ironic_inspector/test/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,18 @@ def test_not_found_in_ironic(self, cli, pop_mock, process_mock):
self.assertFalse(process_mock.called)
pop_mock.return_value.finished.assert_called_once_with(error=mock.ANY)

@prepare_mocks
def test_already_finished(self, cli, pop_mock, process_mock):
old_finished_at = pop_mock.return_value.finished_at
pop_mock.return_value.finished_at = time.time()
try:
self.assertRaisesRegexp(utils.Error, 'already finished',
process.process, self.data)
self.assertFalse(process_mock.called)
self.assertFalse(pop_mock.return_value.finished.called)
finally:
pop_mock.return_value.finished_at = old_finished_at

@prepare_mocks
def test_expected_exception(self, cli, pop_mock, process_mock):
process_mock.side_effect = iter([utils.Error('boom')])
Expand Down
4 changes: 4 additions & 0 deletions releasenotes/notes/abort-introspection-ae5cb5a9fbacd2ac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
features:
- Introduced API "POST /v1/introspection/<UUID>/abort" for aborting
the introspection process.

0 comments on commit 7a3d937

Please sign in to comment.