-
Notifications
You must be signed in to change notification settings - Fork 2.5k
/
cross_cell_migrate.py
1466 lines (1288 loc) · 71.9 KB
/
cross_cell_migrate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import copy
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_utils import excutils
from nova import availability_zones
from nova.compute import instance_actions
from nova.compute import power_state
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova.conductor.tasks import base
from nova import conf
from nova import context as nova_context
from nova import exception
from nova.i18n import _
from nova.image import glance
from nova.network import constants as neutron_constants
from nova.network import neutron
from nova import objects
from nova.objects import fields
from nova.scheduler import utils as scheduler_utils
from nova.volume import cinder
LOG = logging.getLogger(__name__)
CONF = conf.CONF
def clone_creatable_object(ctxt, obj, delete_fields=None):
"""Targets the object at the given context and removes its id attribute
Dirties all of the set fields on a new copy of the object.
This is necessary before the object is created in a new cell.
:param ctxt: cell-targeted nova auth request context to set on the clone
:param obj: the object to re-target
:param delete_fields: list of fields to delete from the new object;
note that the ``id`` field is always deleted
:returns: Cloned version of ``obj`` with all set fields marked as
"changed" so they will be persisted on a subsequent
``obj.create()`` call.
"""
if delete_fields is None:
delete_fields = []
if 'id' not in delete_fields:
delete_fields.append('id')
new_obj = obj.obj_clone()
new_obj._context = ctxt
for field in obj.obj_fields:
if field in obj:
if field in delete_fields:
delattr(new_obj, field)
else:
# Dirty the field since obj_clone does not modify
# _changed_fields.
setattr(new_obj, field, getattr(obj, field))
return new_obj
class TargetDBSetupTask(base.TaskBase):
"""Sub-task to create the instance data in the target cell DB.
This is needed before any work can be done with the instance in the target
cell, like validating the selected target compute host.
"""
def __init__(self, context, instance, source_migration,
target_cell_context):
"""Initialize this task.
:param context: source-cell targeted auth RequestContext
:param instance: source-cell Instance object
:param source_migration: source-cell Migration object for this
operation
:param target_cell_context: target-cell targeted auth RequestContext
"""
super(TargetDBSetupTask, self).__init__(context, instance)
self.target_ctx = target_cell_context
self.source_migration = source_migration
self._target_cell_instance = None
def _copy_migrations(self, migrations):
"""Copy migration records from the source cell to the target cell.
:param migrations: MigrationList object of source cell DB records.
:returns: Migration record in the target cell database that matches
the active migration in the source cell.
"""
target_cell_migration = None
for migration in migrations:
migration = clone_creatable_object(self.target_ctx, migration)
migration.create()
if self.source_migration.uuid == migration.uuid:
# Save this off so subsequent tasks don't need to look it up.
target_cell_migration = migration
return target_cell_migration
def _execute(self):
"""Creates the instance and its related records in the target cell
Instance.pci_devices are not copied over since those records are
tightly coupled to the compute_nodes records and are meant to track
inventory and allocations of PCI devices on a specific compute node.
The instance.pci_requests are what "move" with the instance to the
target cell and will result in new PCIDevice allocations on the target
compute node in the target cell during the resize_claim.
The instance.services field is not copied over since that represents
the nova-compute service mapped to the instance.host, which will not
make sense in the target cell.
:returns: A two-item tuple of the Instance and Migration object
created in the target cell
"""
LOG.debug(
'Creating (hidden) instance and its related records in the target '
'cell: %s', self.target_ctx.cell_uuid, instance=self.instance)
# We also have to create the BDMs and tags separately, just like in
# ComputeTaskManager.schedule_and_build_instances, so get those out
# of the source cell DB first before we start creating anything.
# NOTE(mriedem): Console auth tokens are not copied over to the target
# cell DB since they will be regenerated in the target cell as needed.
# Similarly, expired console auth tokens will be automatically cleaned
# from the source cell.
bdms = self.instance.get_bdms()
vifs = objects.VirtualInterfaceList.get_by_instance_uuid(
self.context, self.instance.uuid)
tags = self.instance.tags
# We copy instance actions to preserve the history of the instance
# in case the resize is confirmed.
actions = objects.InstanceActionList.get_by_instance_uuid(
self.context, self.instance.uuid)
migrations = objects.MigrationList.get_by_filters(
self.context, filters={'instance_uuid': self.instance.uuid})
# db.instance_create cannot handle some fields which might be loaded on
# the instance object, so we omit those from the cloned object and
# explicitly create the ones we care about (like tags) below. Things
# like pci_devices and services will not make sense in the target DB
# so we omit those as well.
# TODO(mriedem): Determine if we care about copying faults over to the
# target cell in case people use those for auditing (remember that
# faults are only shown in the API for ERROR/DELETED instances and only
# the most recent fault is shown).
inst = clone_creatable_object(
self.target_ctx, self.instance,
delete_fields=['fault', 'pci_devices', 'services', 'tags'])
# This part is important - we want to create the instance in the target
# cell as "hidden" so while we have two copies of the instance in
# different cells, listing servers out of the API will filter out the
# hidden one.
inst.hidden = True
inst.create()
self._target_cell_instance = inst # keep track of this for rollbacks
# TODO(mriedem): Consider doing all of the inserts in a single
# transaction context. If any of the following creates fail, the
# rollback should perform a cascading hard-delete anyway.
# Do the same dance for the other instance-related records.
for bdm in bdms:
bdm = clone_creatable_object(self.target_ctx, bdm)
bdm.create()
for vif in vifs:
vif = clone_creatable_object(self.target_ctx, vif)
vif.create()
if tags:
primitive_tags = [tag.tag for tag in tags]
objects.TagList.create(self.target_ctx, inst.uuid, primitive_tags)
for action in actions:
new_action = clone_creatable_object(self.target_ctx, action)
new_action.create()
# For each pre-existing action, we need to also re-create its
# events in the target cell.
events = objects.InstanceActionEventList.get_by_action(
self.context, action.id)
for event in events:
new_event = clone_creatable_object(self.target_ctx, event)
new_event.create(action.instance_uuid, action.request_id)
target_cell_migration = self._copy_migrations(migrations)
return inst, target_cell_migration
def rollback(self, ex):
"""Deletes the instance data from the target cell in case of failure"""
if self._target_cell_instance:
# Deleting the instance in the target cell DB should perform a
# cascading delete of all related records, e.g. BDMs, VIFs, etc.
LOG.debug('Destroying instance from target cell: %s',
self.target_ctx.cell_uuid,
instance=self._target_cell_instance)
# This needs to be a hard delete because if resize fails later for
# some reason, we want to be able to retry the resize to this cell
# again without hitting a duplicate entry unique constraint error.
self._target_cell_instance.destroy(hard_delete=True)
class PrepResizeAtDestTask(base.TaskBase):
"""Task used to verify a given target host in a target cell.
Upon successful completion, port bindings and volume attachments
should be created for the target host in the target cell and resources
should be claimed on the target host for the resize. Also, the instance
task_state should be ``resize_prep``.
"""
def __init__(self, context, instance, flavor, target_migration,
request_spec, compute_rpcapi, host_selection, network_api,
volume_api):
"""Construct the PrepResizeAtDestTask instance
:param context: The user request auth context. This should be targeted
at the target cell.
:param instance: The instance being migrated (this is the target cell
copy of the instance record).
:param flavor: The new flavor if performing resize and not just a
cold migration
:param target_migration: The Migration object from the target cell DB.
:param request_spec: nova.objects.RequestSpec object for the operation
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param host_selection: nova.objects.Selection which is a possible
target host for the cross-cell resize
:param network_api: The neutron (client side) networking API.
:param volume_api: The cinder (client side) block-storage API.
"""
super(PrepResizeAtDestTask, self).__init__(context, instance)
self.flavor = flavor
self.target_migration = target_migration
self.request_spec = request_spec
self.compute_rpcapi = compute_rpcapi
self.host_selection = host_selection
self.network_api = network_api
self.volume_api = volume_api
# Keep track of anything we created so we can rollback.
self._bindings_by_port_id = {}
self._created_volume_attachment_ids = []
def _create_port_bindings(self):
"""Creates inactive port bindings against the selected target host
for the ports attached to the instance.
The ``self._bindings_by_port_id`` variable will be set upon successful
completion.
:raises: MigrationPreCheckError if port binding failed
"""
LOG.debug('Creating port bindings for destination host %s',
self.host_selection.service_host)
try:
self._bindings_by_port_id = self.network_api.bind_ports_to_host(
self.context, self.instance, self.host_selection.service_host)
except exception.PortBindingFailed:
raise exception.MigrationPreCheckError(reason=_(
'Failed to create port bindings for host %s') %
self.host_selection.service_host)
def _create_volume_attachments(self):
"""Create empty volume attachments for volume BDMs attached to the
instance in the target cell.
The BlockDeviceMapping.attachment_id field is updated for each
volume BDM processed. Remember that these BDM records are from the
target cell database so the changes will only go there.
:return: BlockDeviceMappingList of volume BDMs with an updated
attachment_id field for the newly created empty attachment for
that BDM
"""
LOG.debug('Creating volume attachments for destination host %s',
self.host_selection.service_host)
volume_bdms = objects.BlockDeviceMappingList(objects=[
bdm for bdm in self.instance.get_bdms() if bdm.is_volume])
for bdm in volume_bdms:
# Create the empty (no host connector) attachment.
attach_ref = self.volume_api.attachment_create(
self.context, bdm.volume_id, bdm.instance_uuid)
# Keep track of what we create for rollbacks.
self._created_volume_attachment_ids.append(attach_ref['id'])
# Update the BDM in the target cell database.
bdm.attachment_id = attach_ref['id']
# Note that ultimately the BDMs in the target cell are either
# pointing at attachments that we can use, or this sub-task has
# failed in which case we will fail the main task and should
# rollback and delete the instance and its BDMs in the target cell
# database, so that is why we do not track the original attachment
# IDs in order to roll them back on the BDM records.
bdm.save()
return volume_bdms
def _execute(self):
"""Performs pre-cross-cell resize checks/claims on the targeted host
This ensures things like networking (ports) will continue to work on
the target host in the other cell before we initiate the migration of
the server.
Resources are also claimed on the target host which in turn creates the
MigrationContext for the instance in the target cell database.
:returns: MigrationContext created in the target cell database during
the resize_claim in the destination compute service.
:raises: nova.exception.MigrationPreCheckError if the pre-check
validation fails for the given host selection; this indicates an
alternative host *may* work but this one does not.
"""
destination = self.host_selection.service_host
LOG.debug('Verifying selected host %s for cross-cell resize.',
destination, instance=self.instance)
# Validate networking by creating port bindings for this host.
self._create_port_bindings()
# Create new empty volume attachments for the volume BDMs attached
# to the instance. Technically this is not host specific and we could
# do this outside of the PrepResizeAtDestTask sub-task but volume
# attachments are meant to be cheap and plentiful so it is nice to
# keep them self-contained within each execution of this task and
# rollback anything we created if we fail.
self._create_volume_attachments()
try:
LOG.debug('Calling destination host %s to prepare for cross-cell '
'resize and claim resources.', destination)
return self.compute_rpcapi.prep_snapshot_based_resize_at_dest(
self.context, self.instance, self.flavor,
self.host_selection.nodename, self.target_migration,
self.host_selection.limits, self.request_spec, destination)
except messaging.MessagingTimeout:
msg = _('RPC timeout while checking if we can cross-cell migrate '
'to host: %s') % destination
raise exception.MigrationPreCheckError(reason=msg)
def rollback(self, ex):
# Rollback anything we created.
host = self.host_selection.service_host
# Cleanup any destination host port bindings.
LOG.debug('Cleaning up port bindings for destination host %s', host)
for port_id in self._bindings_by_port_id:
try:
self.network_api.delete_port_binding(
self.context, port_id, host)
except Exception:
# Don't raise if we fail to cleanup, just log it.
LOG.exception('An error occurred while cleaning up binding '
'for port %s on host %s.', port_id, host,
instance=self.instance)
# Cleanup any destination host volume attachments.
LOG.debug(
'Cleaning up volume attachments for destination host %s', host)
for attachment_id in self._created_volume_attachment_ids:
try:
self.volume_api.attachment_delete(self.context, attachment_id)
except Exception:
# Don't raise if we fail to cleanup, just log it.
LOG.exception('An error occurred while cleaning up volume '
'attachment %s.', attachment_id,
instance=self.instance)
class PrepResizeAtSourceTask(base.TaskBase):
"""Task to prepare the instance at the source host for the resize.
Will power off the instance at the source host, create and upload a
snapshot image for a non-volume-backed server, and disconnect volumes and
networking from the source host.
The vm_state is recorded with the "old_vm_state" key in the
instance.system_metadata field prior to powering off the instance so the
revert flow can determine if the guest should be running or stopped.
Returns the snapshot image ID, if one was created, from the ``execute``
method.
Upon successful completion, the instance.task_state will be
``resize_migrated`` and the migration.status will be ``post-migrating``.
"""
def __init__(
self, context, instance, flavor, migration, request_spec,
compute_rpcapi, image_api,
):
"""Initializes this PrepResizeAtSourceTask instance.
:param context: nova auth context targeted at the source cell
:param instance: Instance object from the source cell
:param flavor: The new flavor if performing resize and not just a
cold migration
:param migration: Migration object from the source cell
:param request_spec: RequestSpec object for the resize operation
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param image_api: instance of nova.image.glance.API
"""
super(PrepResizeAtSourceTask, self).__init__(context, instance)
self.flavor = flavor
self.migration = migration
self.request_spec = request_spec
self.compute_rpcapi = compute_rpcapi
self.image_api = image_api
self._image_id = None
def _execute(self):
# Save off the vm_state so we can use that later on the source host
# if the resize is reverted - it is used to determine if the reverted
# guest should be powered on.
self.instance.system_metadata['old_vm_state'] = self.instance.vm_state
self.instance.task_state = task_states.RESIZE_MIGRATING
self.instance.old_flavor = self.instance.flavor
self.instance.new_flavor = self.flavor
# If the instance is not volume-backed, create a snapshot of the root
# disk.
if not self.request_spec.is_bfv:
# Create an empty image.
name = '%s-resize-temp' % self.instance.display_name
image_meta = compute_utils.create_image(
self.context, self.instance, name, 'snapshot', self.image_api)
self._image_id = image_meta['id']
LOG.debug('Created snapshot image %s for cross-cell resize.',
self._image_id, instance=self.instance)
self.instance.save(expected_task_state=task_states.RESIZE_PREP)
# RPC call the source host to prepare for resize.
self.compute_rpcapi.prep_snapshot_based_resize_at_source(
self.context, self.instance, self.migration,
snapshot_id=self._image_id)
return self._image_id
def rollback(self, ex):
# If we created a snapshot image, attempt to delete it.
if self._image_id:
compute_utils.delete_image(
self.context, self.instance, self.image_api, self._image_id)
# If the compute service successfully powered off the guest but failed
# to snapshot (or timed out during the snapshot), then the
# _sync_power_states periodic task should mark the instance as stopped
# and the user can start/reboot it.
# If the compute service powered off the instance, snapshot it and
# destroyed the guest and then a failure occurred, the instance should
# have been set to ERROR status (by the compute service) so the user
# has to hard reboot or rebuild it.
LOG.error('Preparing for cross-cell resize at the source host %s '
'failed. The instance may need to be hard rebooted.',
self.instance.host, instance=self.instance)
class FinishResizeAtDestTask(base.TaskBase):
"""Task to finish the resize at the destination host.
Calls the finish_snapshot_based_resize_at_dest method on the destination
compute service which sets up networking and block storage and spawns
the guest on the destination host. Upon successful completion of this
task, the migration status should be 'finished', the instance task_state
should be None and the vm_state should be 'resized'. The instance host/node
information should also reflect the destination compute.
If the compute call is successful, the task will change the instance
mapping to point at the target cell and hide the source cell instance thus
making the confirm/revert operations act on the target cell instance.
"""
def __init__(self, context, instance, migration, source_cell_instance,
compute_rpcapi, target_cell_mapping, snapshot_id,
request_spec):
"""Initialize this task.
:param context: nova auth request context targeted at the target cell
:param instance: Instance object in the target cell database
:param migration: Migration object in the target cell database
:param source_cell_instance: Instance object in the source cell DB
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param target_cell_mapping: CellMapping object for the target cell
:param snapshot_id: ID of the image snapshot to use for a
non-volume-backed instance.
:param request_spec: nova.objects.RequestSpec object for the operation
"""
super(FinishResizeAtDestTask, self).__init__(context, instance)
self.migration = migration
self.source_cell_instance = source_cell_instance
self.compute_rpcapi = compute_rpcapi
self.target_cell_mapping = target_cell_mapping
self.snapshot_id = snapshot_id
self.request_spec = request_spec
def _finish_snapshot_based_resize_at_dest(self):
"""Synchronously RPC calls finish_snapshot_based_resize_at_dest
If the finish_snapshot_based_resize_at_dest method fails in the
compute service, this method will update the source cell instance
data to reflect the error (vm_state='error', copy the fault and
instance action events for that compute method).
"""
LOG.debug('Finishing cross-cell resize at the destination host %s',
self.migration.dest_compute, instance=self.instance)
# prep_snapshot_based_resize_at_source in the source cell would have
# changed the source cell instance.task_state to resize_migrated and
# we need to reflect that in the target cell instance before calling
# the destination compute.
self.instance.task_state = task_states.RESIZE_MIGRATED
self.instance.save()
event_name = 'compute_finish_snapshot_based_resize_at_dest'
source_cell_context = self.source_cell_instance._context
try:
with compute_utils.EventReporter(
source_cell_context, event_name,
self.migration.dest_compute, self.instance.uuid):
self.compute_rpcapi.finish_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration,
self.snapshot_id, self.request_spec)
# finish_snapshot_based_resize_at_dest updates the target cell
# instance so we need to refresh it here to have the latest copy.
self.instance.refresh()
except Exception:
# We need to mimic the error handlers on
# finish_snapshot_based_resize_at_dest in the destination compute
# service so those changes are reflected in the source cell
# instance.
with excutils.save_and_reraise_exception(logger=LOG):
# reverts_task_state and _error_out_instance_on_exception:
self.source_cell_instance.task_state = None
self.source_cell_instance.vm_state = vm_states.ERROR
self.source_cell_instance.save()
# wrap_instance_fault (this is best effort)
self._copy_latest_fault(source_cell_context)
def _copy_latest_fault(self, source_cell_context):
"""Copies the latest instance fault from the target cell to the source
:param source_cell_context: nova auth request context targeted at the
source cell
"""
try:
# Get the latest fault from the target cell database.
fault = objects.InstanceFault.get_latest_for_instance(
self.context, self.instance.uuid)
if fault:
fault_clone = clone_creatable_object(source_cell_context,
fault)
fault_clone.create()
except Exception:
LOG.exception(
'Failed to copy instance fault from target cell DB',
instance=self.instance)
def _update_instance_mapping(self):
"""Swaps the hidden field value on the source and target cell instance
and updates the instance mapping to point at the target cell.
"""
LOG.debug('Marking instance in source cell as hidden and updating '
'instance mapping to point at target cell %s.',
self.target_cell_mapping.identity, instance=self.instance)
# Get the instance mapping first to make the window of time where both
# instances are hidden=False as small as possible.
instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
self.context, self.instance.uuid)
# Mark the target cell instance record as hidden=False so it will show
# up when listing servers. Note that because of how the API filters
# duplicate instance records, even if the user is listing servers at
# this exact moment only one copy of the instance will be returned.
self.instance.hidden = False
self.instance.save()
# Update the instance mapping to point at the target cell. This is so
# that the confirm/revert actions will be performed on the resized
# instance in the target cell rather than the destroyed guest in the
# source cell. Note that we could do this before finishing the resize
# on the dest host, but it makes sense to defer this until the
# instance is successfully resized in the dest host because if that
# fails, we want to be able to rebuild in the source cell to recover
# the instance.
instance_mapping.cell_mapping = self.target_cell_mapping
# If this fails the cascading task failures should delete the instance
# in the target cell database so we do not need to hide it again.
instance_mapping.save()
# Mark the source cell instance record as hidden=True to hide it from
# the user when listing servers.
self.source_cell_instance.hidden = True
self.source_cell_instance.save()
def _execute(self):
# Finish the resize on the destination host in the target cell.
self._finish_snapshot_based_resize_at_dest()
# Do the instance.hidden/instance_mapping.cell_mapping swap.
self._update_instance_mapping()
def rollback(self, ex):
# The method executed in this task are self-contained for rollbacks.
pass
class CrossCellMigrationTask(base.TaskBase):
"""Orchestrates a cross-cell cold migration (resize)."""
def __init__(self, context, instance, flavor,
request_spec, source_migration, compute_rpcapi,
host_selection, alternate_hosts):
"""Construct the CrossCellMigrationTask instance
:param context: The user request auth context. This should be targeted
to the source cell in which the instance is currently running.
:param instance: The instance being migrated (from the source cell)
:param flavor: The new flavor if performing resize and not just a
cold migration
:param request_spec: nova.objects.RequestSpec with scheduling details
:param source_migration: nova.objects.Migration record for this
operation (from the source cell)
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param host_selection: nova.objects.Selection of the initial
selected target host from the scheduler where the selected host
is in another cell which is different from the cell in which
the instance is currently running
:param alternate_hosts: list of 0 or more nova.objects.Selection
objects representing alternate hosts within the same target cell
as ``host_selection``.
"""
super(CrossCellMigrationTask, self).__init__(context, instance)
self.request_spec = request_spec
self.flavor = flavor
self.source_migration = source_migration
self.compute_rpcapi = compute_rpcapi
self.host_selection = host_selection
self.alternate_hosts = alternate_hosts
self._target_cell_instance = None
self._target_cell_context = None
self.network_api = neutron.API()
self.volume_api = cinder.API()
self.image_api = glance.API()
# Keep an ordered dict of the sub-tasks completed so we can call their
# rollback routines if something fails.
self._completed_tasks = collections.OrderedDict()
def _get_target_cell_mapping(self):
"""Get the target host CellMapping for the selected host
:returns: nova.objects.CellMapping for the cell of the selected target
host
:raises: nova.exception.CellMappingNotFound if the cell mapping for
the selected target host cannot be found (this should not happen
if the scheduler just selected it)
"""
return objects.CellMapping.get_by_uuid(
self.context, self.host_selection.cell_uuid)
def _setup_target_cell_db(self):
"""Creates the instance and its related records in the target cell
Upon successful completion the self._target_cell_context and
self._target_cell_instance variables are set.
:returns: A 2-item tuple of:
- The active Migration object from the target cell DB
- The CellMapping for the target cell
"""
LOG.debug('Setting up the target cell database for the instance and '
'its related records.', instance=self.instance)
target_cell_mapping = self._get_target_cell_mapping()
# Clone the context targeted at the source cell and then target the
# clone at the target cell.
self._target_cell_context = copy.copy(self.context)
nova_context.set_target_cell(
self._target_cell_context, target_cell_mapping)
task = TargetDBSetupTask(
self.context, self.instance, self.source_migration,
self._target_cell_context)
self._target_cell_instance, target_cell_migration = task.execute()
self._completed_tasks['TargetDBSetupTask'] = task
return target_cell_migration, target_cell_mapping
def _perform_external_api_checks(self):
"""Performs checks on external service APIs for support.
* Checks that the neutron port binding-extended API is available
:raises: MigrationPreCheckError if any checks fail
"""
LOG.debug('Making sure neutron is new enough for cross-cell resize.')
# Check that the port binding-extended API extension is available in
# neutron because if it's not we can just fail fast.
if not self.network_api.supports_port_binding_extension(self.context):
raise exception.MigrationPreCheckError(
reason=_("Required networking service API extension '%s' "
"not found.") %
neutron_constants.PORT_BINDING_EXTENDED)
def _prep_resize_at_dest(self, target_cell_migration):
"""Executes PrepResizeAtDestTask and updates the source migration.
:param target_cell_migration: Migration record from the target cell DB
:returns: Refreshed Migration record from the target cell DB after the
resize_claim on the destination host has updated the record.
"""
# TODO(mriedem): Check alternates if the primary selected host fails;
# note that alternates are always in the same cell as the selected host
# so if the primary fails pre-checks, the alternates may also fail. We
# could reschedule but the scheduler does not yet have an ignore_cells
# capability like ignore_hosts.
# We set the target cell instance new_flavor attribute now since the
# ResourceTracker.resize_claim on the destination host uses it.
self._target_cell_instance.new_flavor = self.flavor
verify_task = PrepResizeAtDestTask(
self._target_cell_context, self._target_cell_instance, self.flavor,
target_cell_migration, self.request_spec, self.compute_rpcapi,
self.host_selection, self.network_api, self.volume_api)
target_cell_migration_context = verify_task.execute()
self._completed_tasks['PrepResizeAtDestTask'] = verify_task
# Stash the old vm_state so we can set the resized/reverted instance
# back to the same state later, i.e. if STOPPED do not power on the
# guest.
self._target_cell_instance.system_metadata['old_vm_state'] = (
self._target_cell_instance.vm_state)
# Update the target cell instance availability zone now that we have
# prepared the resize on the destination host. We do this in conductor
# to avoid the "up-call" from the compute service to the API database.
self._target_cell_instance.availability_zone = (
availability_zones.get_host_availability_zone(
self.context, self.host_selection.service_host))
self._target_cell_instance.save()
# We need to mirror the MigrationContext, created in the target cell
# database, into the source cell database. Keep in mind that the
# MigrationContext has pci_devices and a migration_id in it which
# are specific to the target cell database. The only one we care about
# correcting for the source cell database is migration_id since that
# is used to route neutron external events to the source and target
# hosts.
self.instance.migration_context = (
target_cell_migration_context.obj_clone())
self.instance.migration_context.migration_id = self.source_migration.id
self.instance.save()
return self._update_migration_from_dest_after_claim(
target_cell_migration)
def _update_migration_from_dest_after_claim(self, target_cell_migration):
"""Update the source cell migration record with target cell info.
The PrepResizeAtDestTask runs a resize_claim on the target compute
host service in the target cell which sets fields about the destination
in the migration record in the target cell. We need to reflect those
changes back into the migration record in the source cell.
:param target_cell_migration: Migration record from the target cell DB
:returns: Refreshed Migration record from the target cell DB after the
resize_claim on the destination host has updated the record.
"""
# Copy information about the dest compute that was set on the dest
# migration record during the resize claim on the dest host.
# We have to get a fresh copy of the target cell migration record to
# pick up the changes made in the dest compute service.
target_cell_migration = objects.Migration.get_by_uuid(
self._target_cell_context, target_cell_migration.uuid)
self.source_migration.dest_compute = target_cell_migration.dest_compute
self.source_migration.dest_node = target_cell_migration.dest_node
self.source_migration.dest_host = target_cell_migration.dest_host
self.source_migration.save()
return target_cell_migration
def _prep_resize_at_source(self):
"""Executes PrepResizeAtSourceTask
:return: The image snapshot ID if the instance is not volume-backed,
else None.
"""
LOG.debug('Preparing source host %s for cross-cell resize.',
self.source_migration.source_compute, instance=self.instance)
prep_source_task = PrepResizeAtSourceTask(
self.context, self.instance, self.flavor, self.source_migration,
self.request_spec, self.compute_rpcapi, self.image_api)
snapshot_id = prep_source_task.execute()
self._completed_tasks['PrepResizeAtSourceTask'] = prep_source_task
return snapshot_id
def _finish_resize_at_dest(
self, target_cell_migration, target_cell_mapping, snapshot_id):
"""Executes FinishResizeAtDestTask
:param target_cell_migration: Migration object from the target cell DB
:param target_cell_mapping: CellMapping object for the target cell
:param snapshot_id: ID of the image snapshot to use for a
non-volume-backed instance.
"""
task = FinishResizeAtDestTask(
self._target_cell_context, self._target_cell_instance,
target_cell_migration, self.instance, self.compute_rpcapi,
target_cell_mapping, snapshot_id, self.request_spec)
task.execute()
self._completed_tasks['FinishResizeAtDestTask'] = task
def _execute(self):
"""Execute high-level orchestration of the cross-cell resize"""
# We are committed to a cross-cell move at this point so update the
# migration record to reflect that. If we fail after this we are not
# going to go back and try to run the MigrationTask to do a same-cell
# migration, so we set the cross_cell_move flag early for audit/debug
# in case something fails later and the operator wants to know if this
# was a cross-cell or same-cell move operation.
self.source_migration.cross_cell_move = True
self.source_migration.save()
# Make sure neutron APIs we need are available.
self._perform_external_api_checks()
# Before preparing the target host create the instance record data
# in the target cell database since we cannot do anything in the
# target cell without having an instance record there. Remember that
# we lose the cell-targeting on the request context over RPC so we
# cannot simply pass the source cell context and instance over RPC
# to the target compute host and assume changes get mirrored back to
# the source cell database.
target_cell_migration, target_cell_mapping = (
self._setup_target_cell_db())
# Claim resources and validate the selected host in the target cell.
target_cell_migration = self._prep_resize_at_dest(
target_cell_migration)
# Prepare the instance at the source host (stop it, optionally snapshot
# it, disconnect volumes and VIFs, etc).
snapshot_id = self._prep_resize_at_source()
# Finish the resize at the destination host, swap the hidden fields
# on the instances and update the instance mapping.
self._finish_resize_at_dest(
target_cell_migration, target_cell_mapping, snapshot_id)
def rollback(self, ex):
"""Rollback based on how sub-tasks completed
Sub-tasks should rollback appropriately for whatever they do but here
we need to handle cleaning anything up from successful tasks, e.g. if
tasks A and B were successful but task C fails, then we might need to
cleanup changes from A and B here.
"""
# Rollback the completed tasks in reverse order.
for task_name in reversed(self._completed_tasks):
try:
self._completed_tasks[task_name].rollback(ex)
except Exception:
LOG.exception('Rollback for task %s failed.', task_name)
def get_inst_and_cell_map_from_source(
target_cell_context, source_compute, instance_uuid):
"""Queries the instance from the source cell database.
:param target_cell_context: nova auth request context targeted at the
target cell database
:param source_compute: name of the source compute service host
:param instance_uuid: UUID of the instance
:returns: 2-item tuple of:
- Instance object from the source cell database.
- CellMapping object of the source cell mapping
"""
# We can get the source cell via the host mapping based on the
# source_compute in the migration object.
source_host_mapping = objects.HostMapping.get_by_host(
target_cell_context, source_compute)
source_cell_mapping = source_host_mapping.cell_mapping
# Clone the context targeted at the target cell and then target the
# clone at the source cell.
source_cell_context = copy.copy(target_cell_context)
nova_context.set_target_cell(source_cell_context, source_cell_mapping)
# Now get the instance from the source cell DB using the source
# cell context which will make the source cell instance permanently
# targeted to the source cell database.
instance = objects.Instance.get_by_uuid(
source_cell_context, instance_uuid,
expected_attrs=['flavor', 'info_cache', 'system_metadata'])
return instance, source_cell_mapping
class ConfirmResizeTask(base.TaskBase):
"""Task which orchestrates a cross-cell resize confirm operation
When confirming a cross-cell resize, the instance is in both the source
and target cell databases and on the source and target compute hosts.
The API operation is performed on the target cell instance and it is the
job of this task to cleanup the source cell host and database and
update the status of the instance in the target cell.
This can be called either asynchronously from the API service during a
normal confirmResize server action or synchronously when deleting a server
in VERIFY_RESIZE status.
"""
def __init__(self, context, instance, migration, legacy_notifier,
compute_rpcapi):
"""Initialize this ConfirmResizeTask instance
:param context: nova auth request context targeted at the target cell
:param instance: Instance object in "resized" status from the target
cell
:param migration: Migration object from the target cell for the resize
operation expected to have status "confirming"
:param legacy_notifier: LegacyValidatingNotifier for sending legacy
unversioned notifications
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
"""
super(ConfirmResizeTask, self).__init__(context, instance)
self.migration = migration
self.legacy_notifier = legacy_notifier
self.compute_rpcapi = compute_rpcapi
def _send_resize_confirm_notification(self, instance, phase):
"""Sends an unversioned and versioned resize.confirm.(phase)
notification.
:param instance: The instance whose resize is being confirmed.
:param phase: The phase for the resize.confirm operation (either
"start" or "end").
"""
ctxt = instance._context
# Send the legacy unversioned notification.
compute_utils.notify_about_instance_usage(
self.legacy_notifier, ctxt, instance, 'resize.confirm.%s' % phase)
# Send the versioned notification.
compute_utils.notify_about_instance_action(
ctxt, instance, CONF.host,
action=fields.NotificationAction.RESIZE_CONFIRM,
phase=phase)
def _cleanup_source_host(self, source_instance):
"""Cleans up the instance from the source host.
Creates a confirmResize instance action in the source cell DB.
Destroys the guest from the source hypervisor, cleans up networking
and storage and frees up resource usage on the source host.
:param source_instance: Instance object from the source cell DB
"""
ctxt = source_instance._context
# The confirmResize instance action has to be created in the source
# cell database before calling the compute service to properly
# track action events. Note that the API created the same action
# record but on the target cell instance.
objects.InstanceAction.action_start(
ctxt, source_instance.uuid, instance_actions.CONFIRM_RESIZE,
want_result=False)
# Get the Migration record from the source cell database.
source_migration = objects.Migration.get_by_uuid(
ctxt, self.migration.uuid)
LOG.debug('Cleaning up source host %s for cross-cell resize confirm.',
source_migration.source_compute, instance=source_instance)
# Use the EventReport context manager to create the same event that
# the source compute will create but in the target cell DB so we do not
# have to explicitly copy it over from source to target DB.
event_name = 'compute_confirm_snapshot_based_resize_at_source'
with compute_utils.EventReporter(
self.context, event_name, source_migration.source_compute,
source_instance.uuid):
self.compute_rpcapi.confirm_snapshot_based_resize_at_source(
ctxt, source_instance, source_migration)
def _finish_confirm_in_target_cell(self):
"""Sets "terminal" states on the migration and instance in target cell.
This is similar to how ``confirm_resize`` works in the compute service
for same-cell resize.
"""
LOG.debug('Updating migration and instance status in target cell DB.',
instance=self.instance)
# Update the target cell migration.
self.migration.status = 'confirmed'
self.migration.save()
# Update the target cell instance.
# Delete stashed information for the resize.
self.instance.old_flavor = None
self.instance.new_flavor = None
self.instance.system_metadata.pop('old_vm_state', None)
self._set_vm_and_task_state()
self.instance.drop_migration_context()
# There are multiple possible task_states set on the instance because
# if we are called from the confirmResize instance action the
# task_state should be None, but if we are called from