Skip to content

Commit

Permalink
Merge pull request #210 from damdo/release-4.12-backport-OCPBUGS-7516
Browse files Browse the repository at this point in the history
[release-4.12] OCPBUGS-13943: fix double machine creation on stale cache
  • Loading branch information
openshift-merge-robot committed May 26, 2023
2 parents 94e4faf + d7c7eb1 commit 7e26923
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 9 deletions.
23 changes: 14 additions & 9 deletions pkg/controllers/controlplanemachineset/updates.go
Expand Up @@ -401,7 +401,7 @@ func (r *ControlPlaneMachineSetReconciler) createOnDeleteReplacementMachines(ctx
// Trigger a Machine creation.
logger := logger.WithValues("index", idx, "namespace", r.Namespace, "name", unknownMachineName)

result, err := r.createMachine(ctx, logger, machineProvider, idx)
_, result, err := r.createMachine(ctx, logger, machineProvider, idx)
if err != nil {
return false, result, err
}
Expand All @@ -416,7 +416,7 @@ func (r *ControlPlaneMachineSetReconciler) createOnDeleteReplacementMachines(ctx

if isDeletedMachine(machines[0]) {
// if deleted create the replacement
result, err := r.createMachine(ctx, logger, machineProvider, idx)
_, result, err := r.createMachine(ctx, logger, machineProvider, idx)
if err != nil {
return false, result, err
}
Expand Down Expand Up @@ -488,13 +488,13 @@ func deleteMachine(ctx context.Context, logger logr.Logger, machineProvider mach
return ctrl.Result{}, nil
}

// createMachine creates the Machine provided.
func (r *ControlPlaneMachineSetReconciler) createMachine(ctx context.Context, logger logr.Logger, machineProvider machineproviders.MachineProvider, idx int32) (ctrl.Result, error) {
// createMachine checks if a machine already exists and otherwise creates the Machine provided.
func (r *ControlPlaneMachineSetReconciler) createMachine(ctx context.Context, logger logr.Logger, machineProvider machineproviders.MachineProvider, idx int32) (bool, ctrl.Result, error) {
// Check if a replacement machine already exists and
// was not previously detected due to potential stale cache.
exists, err := r.checkForExistingReplacement(ctx, logger, machineProvider, idx)
if err != nil {
return ctrl.Result{}, fmt.Errorf("error checking for existing replacement: %w", err)
return false, ctrl.Result{}, fmt.Errorf("error checking for existing replacement: %w", err)
}

if exists {
Expand All @@ -503,18 +503,21 @@ func (r *ControlPlaneMachineSetReconciler) createMachine(ctx context.Context, lo
// This means the machine provider cache was stale when we previously checked.
// No need to create a replacement.
logger.V(2).Info(alreadyPresentReplacement)

// Do not error but signal the machine was not created (created=false).
return false, ctrl.Result{}, nil
}

if err := machineProvider.CreateMachine(ctx, logger, idx); err != nil {
werr := fmt.Errorf("error creating new Machine for index %d: %w", idx, err)
logger.Error(werr, errorCreatingMachine)

return ctrl.Result{}, werr
return false, ctrl.Result{}, werr
}

logger.V(2).Info(createdReplacement)

return ctrl.Result{}, nil
return true, ctrl.Result{}, nil
}

// createMachineWithSurge creates the Machine provided while observing the surge count.
Expand All @@ -531,12 +534,14 @@ func (r *ControlPlaneMachineSetReconciler) createMachineWithSurge(ctx context.Co

// There is still room to surge,
// trigger a Replacement Machine creation.
result, err := r.createMachine(ctx, logger, machineProvider, idx)
created, result, err := r.createMachine(ctx, logger, machineProvider, idx)
if err != nil {
return result, err
}

*surgeCount++
if created {
*surgeCount++
}

return result, nil
}
Expand Down
76 changes: 76 additions & 0 deletions pkg/controllers/controlplanemachineset/updates_test.go
Expand Up @@ -177,6 +177,44 @@ var _ = Describe("reconcileMachineUpdates", func() {
}
},
}),
Entry("with updates required in a single index, but actually the machine cache was stale, and so no creation needed", rollingUpdateTableInput{
cpmsBuilder: cpmsBuilder.WithReplicas(3),
machineInfos: map[int32][]machineproviders.MachineInfo{
0: {updatedMachineBuilder.WithIndex(0).WithMachineName("machine-0").WithNodeName("node-0").Build()},
1: {updatedMachineBuilder.WithIndex(1).WithMachineName("machine-1").WithNodeName("node-1").WithNeedsUpdate(true).Build()},
2: {updatedMachineBuilder.WithIndex(2).WithMachineName("machine-2").WithNodeName("node-2").Build()},
},
setupMock: func(machineInfos map[int32][]machineproviders.MachineInfo) {
mockMachineProvider.EXPECT().WithClient(gomock.Any()).Return(mockMachineProvider).AnyTimes()
mockMachineProvider.EXPECT().GetMachineInfos(gomock.Any(), gomock.Any()).Return(machineInfosMaptoSlice(
func(mI map[int32][]machineproviders.MachineInfo) map[int32][]machineproviders.MachineInfo {
mICopy := make(map[int32][]machineproviders.MachineInfo)
for k, v := range mI {
mICopy[k] = v
}

mICopy[int32(1)] = append(mICopy[int32(1)], updatedMachineBuilder.WithIndex(1).WithMachineName("machine-replacement-1").Build())

return mICopy
}(machineInfos)), nil).Times(1)
mockMachineProvider.EXPECT().CreateMachine(gomock.Any(), gomock.Any(), int32(1)).Return(nil).Times(0)
mockMachineProvider.EXPECT().DeleteMachine(gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
},
expectedLogsBuilder: func() []test.LogEntry {
return []test.LogEntry{
{
Level: 2,
KeysAndValues: []interface{}{
"updateStrategy", machinev1.RollingUpdate,
"index", int32(1),
"namespace", namespaceName,
"name", "machine-1",
},
Message: alreadyPresentReplacement,
},
}
},
}),
Entry("with updates required in a single index, and an error occurs", rollingUpdateTableInput{
cpmsBuilder: cpmsBuilder.WithReplicas(3),
expectedErrorBuilder: func() error { return fmt.Errorf("error creating new Machine for index %d: %w", 1, transientError) },
Expand Down Expand Up @@ -1130,6 +1168,44 @@ var _ = Describe("reconcileMachineUpdates", func() {
}
},
}),
Entry("with updates required in a single index, and the machine has been deleted, but actually the machine cache was stale, and so no creation needed", onDeleteUpdateTableInput{
cpmsBuilder: cpmsBuilder.WithReplicas(3),
machineInfos: map[int32][]machineproviders.MachineInfo{
0: {updatedMachineBuilder.WithIndex(0).WithMachineName("machine-0").WithNodeName("node-0").Build()},
1: {updatedMachineBuilder.WithIndex(1).WithMachineName("machine-1").WithNodeName("node-1").WithNeedsUpdate(true).WithMachineDeletionTimestamp(metav1.Now()).Build()},
2: {updatedMachineBuilder.WithIndex(2).WithMachineName("machine-2").WithNodeName("node-2").Build()},
},
setupMock: func(machineInfos map[int32][]machineproviders.MachineInfo) {
mockMachineProvider.EXPECT().WithClient(gomock.Any()).Return(mockMachineProvider).AnyTimes()
mockMachineProvider.EXPECT().GetMachineInfos(gomock.Any(), gomock.Any()).Return(machineInfosMaptoSlice(
func(mI map[int32][]machineproviders.MachineInfo) map[int32][]machineproviders.MachineInfo {
mICopy := make(map[int32][]machineproviders.MachineInfo)
for k, v := range mI {
mICopy[k] = v
}

mICopy[int32(1)] = append(mICopy[int32(1)], updatedMachineBuilder.WithIndex(1).WithMachineName("machine-replacement-1").Build())

return mICopy
}(machineInfos)), nil).Times(1)
mockMachineProvider.EXPECT().CreateMachine(gomock.Any(), gomock.Any(), int32(1)).Return(nil).Times(0)
mockMachineProvider.EXPECT().DeleteMachine(gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
},
expectedLogsBuilder: func() []test.LogEntry {
return []test.LogEntry{
{
Level: 2,
KeysAndValues: []interface{}{
"updateStrategy", machinev1.OnDelete,
"index", int32(1),
"namespace", namespaceName,
"name", "machine-1",
},
Message: alreadyPresentReplacement,
},
}
},
}),
Entry("with updates required in a single index, and the machine has been deleted", onDeleteUpdateTableInput{
cpmsBuilder: cpmsBuilder.WithReplicas(3),
machineInfos: map[int32][]machineproviders.MachineInfo{
Expand Down

0 comments on commit 7e26923

Please sign in to comment.