Skip to content

Commit

Permalink
Merge pull request #11296 from hpidcock/fix-1860083
Browse files Browse the repository at this point in the history
#11296

## Description of change

Minor refactor to cleanup branch logic for distribution groups
for AZs.

The actual fix was in dropping the FilterZones method as it was
relying on improper slice expectations. The append mutates the
array, the correct way would have been a nil array.
Go only returns a new underlying array for an append operation
when the array capacity is increased.
See https://play.golang.org/p/gUuDY0y1sqa for an example.

## QA steps

Follow the steps in the bug.

## Documentation changes

N/A

## Bug reference

https://bugs.launchpad.net/juju/+bug/1860083
  • Loading branch information
jujubot committed Mar 9, 2020
2 parents d85e7bf + a44b6b2 commit e2fd905
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 46 deletions.
92 changes: 46 additions & 46 deletions worker/provisioner/provisioner_task.go
Expand Up @@ -753,6 +753,20 @@ type AvailabilityZoneMachine struct {
ExcludedMachineIds set.Strings // Don't use these machines in the zone.
}

// MatchesConstraints against an AZ. If the constraints specifies Zones, make sure
// this AZ matches a listed ZoneName.
func (az *AvailabilityZoneMachine) MatchesConstraints(cons constraints.Value) bool {
if !cons.HasZones() {
return true
}
for _, zone := range *cons.Zones {
if az.ZoneName == zone {
return true
}
}
return false
}

// populateAvailabilityZoneMachines fills in the map, availabilityZoneMachines,
// if empty, with a current mapping of availability zone to IDs of machines
// running in that zone. If the provider does not implement the ZonedEnviron
Expand Down Expand Up @@ -848,37 +862,42 @@ func (task *provisionerTask) machineAvailabilityZoneDistribution(
// If the machine has a distribution group, assign based on lowest zone
// population of the distribution group machine.
var machineZone string
zoneMap := azMachineFilterSort(task.availabilityZoneMachines)
if len(distGroupMachineIds) > 0 {
dgZoneMap := azMachineFilterSort(task.populateDistributionGroupZoneMap(distGroupMachineIds)).FilterZones(cons)
sort.Sort(dgZoneMap)
for _, dgZoneMachines := range dgZoneMap {
if !dgZoneMachines.FailedMachineIds.Contains(machineId) &&
!dgZoneMachines.ExcludedMachineIds.Contains(machineId) {
machineZone = dgZoneMachines.ZoneName
for _, azm := range task.availabilityZoneMachines {
if azm.ZoneName == dgZoneMachines.ZoneName {
azm.MachineIds.Add(machineId)
break
}
}
break
}
zoneMap = azMachineFilterSort(task.populateDistributionGroupZoneMap(distGroupMachineIds))
}

sort.Sort(zoneMap)
for _, zoneMachines := range zoneMap {
if !zoneMachines.MatchesConstraints(cons) {
task.logger.Debugf("machine %s does not match az %s: constraints do not match",
machineId, zoneMachines.ZoneName)
continue
}
} else {
zoneMap := azMachineFilterSort(task.availabilityZoneMachines).FilterZones(cons)
sort.Sort(zoneMap)
for _, zoneMachines := range zoneMap {
if !zoneMachines.FailedMachineIds.Contains(machineId) &&
!zoneMachines.ExcludedMachineIds.Contains(machineId) {
machineZone = zoneMachines.ZoneName
zoneMachines.MachineIds.Add(machineId)
break
}
if zoneMachines.FailedMachineIds.Contains(machineId) {
task.logger.Debugf("machine %s does not match az %s: excluded in failed machine ids",
machineId, zoneMachines.ZoneName)
continue
}
if zoneMachines.ExcludedMachineIds.Contains(machineId) {
task.logger.Debugf("machine %s does not match az %s: excluded machine id",
machineId, zoneMachines.ZoneName)
continue
}
machineZone = zoneMachines.ZoneName
break
}

if machineZone == "" {
return machineZone, errors.NotFoundf("suitable availability zone for machine %v", machineId)
}

for _, zoneMachines := range task.availabilityZoneMachines {
if zoneMachines.ZoneName == machineZone {
zoneMachines.MachineIds.Add(machineId)
break
}
}
return machineZone, nil
}

Expand All @@ -887,26 +906,6 @@ func (task *provisionerTask) machineAvailabilityZoneDistribution(
// and filtration based on zones expressed in constraints.
type azMachineFilterSort []*AvailabilityZoneMachine

// FilterZones returns a new instance consisting of slice members limited to
// zones expressed in the input constraints.
// Absence of zone constraints leaves the return unfiltered.
func (a azMachineFilterSort) FilterZones(cons constraints.Value) azMachineFilterSort {
if !cons.HasZones() {
return a
}

filtered := a[:0]
for _, azm := range a {
for _, zone := range *cons.Zones {
if azm.ZoneName == zone {
filtered = append(filtered, azm)
break
}
}
}
return filtered
}

func (a azMachineFilterSort) Len() int {
return len(a)
}
Expand Down Expand Up @@ -1271,9 +1270,10 @@ func (task *provisionerTask) markMachineFailedInAZ(machine apiprovisioner.Machin
}

// Check if there are any zones left to try (that also match constraints).
zoneMap := azMachineFilterSort(task.availabilityZoneMachines).FilterZones(cons)
zoneMap := azMachineFilterSort(task.availabilityZoneMachines)
for _, zoneMachines := range zoneMap {
if !zoneMachines.FailedMachineIds.Contains(machine.Id()) &&
if zoneMachines.MatchesConstraints(cons) &&
!zoneMachines.FailedMachineIds.Contains(machine.Id()) &&
!zoneMachines.ExcludedMachineIds.Contains(machine.Id()) {
return true, nil
}
Expand Down
109 changes: 109 additions & 0 deletions worker/provisioner/provisioner_task_test.go
Expand Up @@ -378,6 +378,115 @@ func (s *ProvisionerTaskSuite) TestZoneConstraintsWithDistributionGroup(c *gc.C)
workertest.CleanKill(c, task)
}

func (s *ProvisionerTaskSuite) TestZoneConstraintsWithDistributionGroupRetry(c *gc.C) {
ctrl := gomock.NewController(c)
defer ctrl.Finish()

broker := s.setUpZonedEnviron(ctrl)
azConstraints := newAZConstraintStartInstanceParamsMatcher("az1", "az2")

// For the call to start instance, we expect the same zone constraints to
// be present, but we also expect that the zone in start instance params
// was selected from the constraints, based on a machine from the same
// distribution group already being in one of the zones.
azConstraintsAndDerivedZone := newAZConstraintStartInstanceParamsMatcher("az1", "az2")
azConstraintsAndDerivedZone.addMatch("availability zone: az1", func(p environs.StartInstanceParams) bool {
return p.AvailabilityZone == "az2"
})

// Use satisfaction of this call as the synchronisation point.
failedErr := errors.Errorf("oh no")
started := make(chan struct{})
gomock.InOrder(
broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil),
broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(nil, failedErr),
broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil),
broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(&environs.StartInstanceResult{
Instance: &testInstance{id: "instance-1"},
}, nil).Do(func(_ ...interface{}) {
go func() { started <- struct{}{} }()
}),
)

// Another machine from the same distribution group is already in az1,
// so we expect the machine to be created in az2.
task := s.newProvisionerTaskWithBroker(c, broker, map[names.MachineTag][]string{
names.NewMachineTag("0"): {"az1"},
})

m0 := &testMachine{
id: "0",
constraints: "zones=az1,az2",
}
s.machineStatusResults = []apiprovisioner.MachineStatusResult{{Machine: m0, Status: params.StatusResult{}}}
s.sendMachineErrorRetryChange(c)
s.sendMachineErrorRetryChange(c)

select {
case <-started:
case <-time.After(coretesting.LongWait):
c.Fatalf("no matching call to StartInstance")
}

workertest.CleanKill(c, task)
}

func (s *ProvisionerTaskSuite) TestZoneRestrictiveConstraintsWithDistributionGroupRetry(c *gc.C) {
ctrl := gomock.NewController(c)
defer ctrl.Finish()

broker := s.setUpZonedEnviron(ctrl)
azConstraints := newAZConstraintStartInstanceParamsMatcher("az2")

// For the call to start instance, we expect the same zone constraints to
// be present, but we also expect that the zone in start instance params
// was selected from the constraints, based on a machine from the same
// distribution group already being in one of the zones.
azConstraintsAndDerivedZone := newAZConstraintStartInstanceParamsMatcher("az2")
azConstraintsAndDerivedZone.addMatch("availability zone: az2", func(p environs.StartInstanceParams) bool {
return p.AvailabilityZone == "az2"
})

// Use satisfaction of this call as the synchronisation point.
failedErr := errors.Errorf("oh no")
started := make(chan struct{})
gomock.InOrder(
broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil),
broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(nil, failedErr),
broker.EXPECT().DeriveAvailabilityZones(s.callCtx, azConstraints).Return([]string{}, nil),
broker.EXPECT().StartInstance(s.callCtx, azConstraints).Return(&environs.StartInstanceResult{
Instance: &testInstance{id: "instance-2"},
}, nil).Do(func(_ ...interface{}) {
go func() { started <- struct{}{} }()
}),
)

// Another machine from the same distribution group is already in az1,
// so we expect the machine to be created in az2.
task := s.newProvisionerTaskWithBroker(c, broker, map[names.MachineTag][]string{
names.NewMachineTag("0"): {"az2"},
names.NewMachineTag("1"): {"az3"},
})

m0 := &testMachine{
id: "0",
constraints: "zones=az2",
}
s.machineStatusResults = []apiprovisioner.MachineStatusResult{
{Machine: m0, Status: params.StatusResult{}},
}
s.sendMachineErrorRetryChange(c)
s.sendMachineErrorRetryChange(c)

select {
case <-started:
case <-time.After(coretesting.LongWait):
c.Fatalf("no matching call to StartInstance")
}

workertest.CleanKill(c, task)
}

func (s *ProvisionerTaskSuite) TestPopulateAZMachinesErrorWorkerStopped(c *gc.C) {
ctrl := gomock.NewController(c)
defer ctrl.Finish()
Expand Down

0 comments on commit e2fd905

Please sign in to comment.