From a44b6b2429d4282d17a91b677e81daa34b6a06e1 Mon Sep 17 00:00:00 2001 From: Harry Pidcock Date: Fri, 6 Mar 2020 14:03:32 +0000 Subject: [PATCH] Fix 1860083 suitable availability zone for machine not found Minor refactor to cleanup branch logic for distribution groups for AZs. The actual fix was in dropping the FilterZones method as it was relying on improper slice expectations. The append mutates the array, the correct way would have been a nil array. Go only returns a new underlying array for an append operation when the array capacity is increased. See https://play.golang.org/p/gUuDY0y1sqa for an example. --- worker/provisioner/provisioner_task.go | 92 ++++++++--------- worker/provisioner/provisioner_task_test.go | 109 ++++++++++++++++++++ 2 files changed, 155 insertions(+), 46 deletions(-) diff --git a/worker/provisioner/provisioner_task.go b/worker/provisioner/provisioner_task.go index 722d7d9b81d..b58511945ac 100644 --- a/worker/provisioner/provisioner_task.go +++ b/worker/provisioner/provisioner_task.go @@ -753,6 +753,20 @@ type AvailabilityZoneMachine struct { ExcludedMachineIds set.Strings // Don't use these machines in the zone. } +// MatchesConstraints against an AZ. If the constraints specifies Zones, make sure +// this AZ matches a listed ZoneName. +func (az *AvailabilityZoneMachine) MatchesConstraints(cons constraints.Value) bool { + if !cons.HasZones() { + return true + } + for _, zone := range *cons.Zones { + if az.ZoneName == zone { + return true + } + } + return false +} + // populateAvailabilityZoneMachines fills in the map, availabilityZoneMachines, // if empty, with a current mapping of availability zone to IDs of machines // running in that zone. If the provider does not implement the ZonedEnviron @@ -848,37 +862,42 @@ func (task *provisionerTask) machineAvailabilityZoneDistribution( // If the machine has a distribution group, assign based on lowest zone // population of the distribution group machine. var machineZone string + zoneMap := azMachineFilterSort(task.availabilityZoneMachines) if len(distGroupMachineIds) > 0 { - dgZoneMap := azMachineFilterSort(task.populateDistributionGroupZoneMap(distGroupMachineIds)).FilterZones(cons) - sort.Sort(dgZoneMap) - for _, dgZoneMachines := range dgZoneMap { - if !dgZoneMachines.FailedMachineIds.Contains(machineId) && - !dgZoneMachines.ExcludedMachineIds.Contains(machineId) { - machineZone = dgZoneMachines.ZoneName - for _, azm := range task.availabilityZoneMachines { - if azm.ZoneName == dgZoneMachines.ZoneName { - azm.MachineIds.Add(machineId) - break - } - } - break - } + zoneMap = azMachineFilterSort(task.populateDistributionGroupZoneMap(distGroupMachineIds)) + } + + sort.Sort(zoneMap) + for _, zoneMachines := range zoneMap { + if !zoneMachines.MatchesConstraints(cons) { + task.logger.Debugf("machine %s does not match az %s: constraints do not match", + machineId, zoneMachines.ZoneName) + continue } - } else { - zoneMap := azMachineFilterSort(task.availabilityZoneMachines).FilterZones(cons) - sort.Sort(zoneMap) - for _, zoneMachines := range zoneMap { - if !zoneMachines.FailedMachineIds.Contains(machineId) && - !zoneMachines.ExcludedMachineIds.Contains(machineId) { - machineZone = zoneMachines.ZoneName - zoneMachines.MachineIds.Add(machineId) - break - } + if zoneMachines.FailedMachineIds.Contains(machineId) { + task.logger.Debugf("machine %s does not match az %s: excluded in failed machine ids", + machineId, zoneMachines.ZoneName) + continue } + if zoneMachines.ExcludedMachineIds.Contains(machineId) { + task.logger.Debugf("machine %s does not match az %s: excluded machine id", + machineId, zoneMachines.ZoneName) + continue + } + machineZone = zoneMachines.ZoneName + break } + if machineZone == "" { return machineZone, errors.NotFoundf("suitable availability zone for machine %v", machineId) } + + for _, zoneMachines := range task.availabilityZoneMachines { + if zoneMachines.ZoneName == machineZone { + zoneMachines.MachineIds.Add(machineId) + break + } + } return machineZone, nil } @@ -887,26 +906,6 @@ func (task *provisionerTask) machineAvailabilityZoneDistribution( // and filtration based on zones expressed in constraints. type azMachineFilterSort []*AvailabilityZoneMachine -// FilterZones returns a new instance consisting of slice members limited to -// zones expressed in the input constraints. -// Absence of zone constraints leaves the return unfiltered. -func (a azMachineFilterSort) FilterZones(cons constraints.Value) azMachineFilterSort { - if !cons.HasZones() { - return a - } - - filtered := a[:0] - for _, azm := range a { - for _, zone := range *cons.Zones { - if azm.ZoneName == zone { - filtered = append(filtered, azm) - break - } - } - } - return filtered -} - func (a azMachineFilterSort) Len() int { return len(a) } @@ -1271,9 +1270,10 @@ func (task *provisionerTask) markMachineFailedInAZ(machine apiprovisioner.Machin } // Check if there are any zones left to try (that also match constraints). - zoneMap := azMachineFilterSort(task.availabilityZoneMachines).FilterZones(cons) + zoneMap := azMachineFilterSort(task.availabilityZoneMachines) for _, zoneMachines := range zoneMap { - if !zoneMachines.FailedMachineIds.Contains(machine.Id()) && + if zoneMachines.MatchesConstraints(cons) && + !zoneMachines.FailedMachineIds.Contains(machine.Id()) && !zoneMachines.ExcludedMachineIds.Contains(machine.Id()) { return true, nil } diff --git a/worker/provisioner/provisioner_task_test.go b/worker/provisioner/provisioner_task_test.go index d3ef9b095d0..3d4aec1bd5b 100644 --- a/worker/provisioner/provisioner_task_test.go +++ b/worker/provisioner/provisioner_task_test.go @@ -378,6 +378,115 @@ func (s *ProvisionerTaskSuite) TestZoneConstraintsWithDistributionGroup(c *gc.C) workertest.CleanKill(c, task) } +func (s *ProvisionerTaskSuite) TestZoneConstraintsWithDistributionGroupRetry(c *gc.C) { + ctrl := gomock.NewController(c) + defer ctrl.Finish() + + broker := s.setUpZonedEnviron(ctrl) + azConstraints := newAZConstraintStartInstanceParamsMatcher("az1", "az2") + + // For the call to start instance, we expect the same zone constraints to + // be present, but we also expect that the zone in start instance params + // was selected from the constraints, based on a machine from the same + // distribution group already being in one of the zones. + azConstraintsAndDerivedZone := newAZConstraintStartInstanceParamsMatcher("az1", "az2") + azConstraintsAndDerivedZone.addMatch("availability zone: az1", func(p environs.StartInstanceParams) bool { + return p.AvailabilityZone == "az2" + }) + + // Use satisfaction of this call as the synchronisation point. + failedErr := errors.Errorf("oh no") + started := make(chan struct{}) + gomock.InOrder( + broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil), + broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(nil, failedErr), + broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil), + broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(&environs.StartInstanceResult{ + Instance: &testInstance{id: "instance-1"}, + }, nil).Do(func(_ ...interface{}) { + go func() { started <- struct{}{} }() + }), + ) + + // Another machine from the same distribution group is already in az1, + // so we expect the machine to be created in az2. + task := s.newProvisionerTaskWithBroker(c, broker, map[names.MachineTag][]string{ + names.NewMachineTag("0"): {"az1"}, + }) + + m0 := &testMachine{ + id: "0", + constraints: "zones=az1,az2", + } + s.machineStatusResults = []apiprovisioner.MachineStatusResult{{Machine: m0, Status: params.StatusResult{}}} + s.sendMachineErrorRetryChange(c) + s.sendMachineErrorRetryChange(c) + + select { + case <-started: + case <-time.After(coretesting.LongWait): + c.Fatalf("no matching call to StartInstance") + } + + workertest.CleanKill(c, task) +} + +func (s *ProvisionerTaskSuite) TestZoneRestrictiveConstraintsWithDistributionGroupRetry(c *gc.C) { + ctrl := gomock.NewController(c) + defer ctrl.Finish() + + broker := s.setUpZonedEnviron(ctrl) + azConstraints := newAZConstraintStartInstanceParamsMatcher("az2") + + // For the call to start instance, we expect the same zone constraints to + // be present, but we also expect that the zone in start instance params + // was selected from the constraints, based on a machine from the same + // distribution group already being in one of the zones. + azConstraintsAndDerivedZone := newAZConstraintStartInstanceParamsMatcher("az2") + azConstraintsAndDerivedZone.addMatch("availability zone: az2", func(p environs.StartInstanceParams) bool { + return p.AvailabilityZone == "az2" + }) + + // Use satisfaction of this call as the synchronisation point. + failedErr := errors.Errorf("oh no") + started := make(chan struct{}) + gomock.InOrder( + broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil), + broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(nil, failedErr), + broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil), + broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(&environs.StartInstanceResult{ + Instance: &testInstance{id: "instance-2"}, + }, nil).Do(func(_ ...interface{}) { + go func() { started <- struct{}{} }() + }), + ) + + // Another machine from the same distribution group is already in az1, + // so we expect the machine to be created in az2. + task := s.newProvisionerTaskWithBroker(c, broker, map[names.MachineTag][]string{ + names.NewMachineTag("0"): {"az2"}, + names.NewMachineTag("1"): {"az3"}, + }) + + m0 := &testMachine{ + id: "0", + constraints: "zones=az2", + } + s.machineStatusResults = []apiprovisioner.MachineStatusResult{ + {Machine: m0, Status: params.StatusResult{}}, + } + s.sendMachineErrorRetryChange(c) + s.sendMachineErrorRetryChange(c) + + select { + case <-started: + case <-time.After(coretesting.LongWait): + c.Fatalf("no matching call to StartInstance") + } + + workertest.CleanKill(c, task) +} + func (s *ProvisionerTaskSuite) TestPopulateAZMachinesErrorWorkerStopped(c *gc.C) { ctrl := gomock.NewController(c) defer ctrl.Finish()