Skip to content

Commit

Permalink
update should not rollback when an error occurs on redis or on kubern…
Browse files Browse the repository at this point in the history
…etes
  • Loading branch information
henrod committed Dec 15, 2017
1 parent 077258d commit e4be1a1
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 45 deletions.
10 changes: 5 additions & 5 deletions controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -558,23 +558,23 @@ waitForLock:
for i, chunk := range podChunks {
l.Debugf("deleting chunk %d: %#v", i, names(chunk))

newlyCreatedPods, newlyDeletedPods, err := replacePodsAndWait(
newlyCreatedPods, newlyDeletedPods, timedout := replacePodsAndWait(
l, mr, clientset, db, redisClient.Client,
willTimeoutAt, clock, configYAML, chunk,
)
createdPods = append(createdPods, newlyCreatedPods...)
deletedPods = append(deletedPods, newlyDeletedPods...)
if err != nil {

if timedout {
l.Debug("update timed out, rolling back")
rollErr := rollback(
l, mr, db, redisClient.Client, clientset,
&oldConfig, maxSurge, 2*timeoutDur, createdPods, deletedPods,
)
&oldConfig, maxSurge, 2*timeoutDur, createdPods, deletedPods)
if rollErr != nil {
l.WithError(rollErr).Debug("error during update roll back")
err = rollErr
}
return err
return errors.New("timedout waiting rooms to be replaced, rolled back")
}
}
}
Expand Down
46 changes: 35 additions & 11 deletions controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2500,10 +2500,9 @@ cmd:
timeoutSec, lockTimeoutMS, 10,
lockKey,
mockClock,
nil,
)
nil)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(Equal("timeout waiting for rooms to be removed"))
Expect(err.Error()).To(Equal("timedout waiting rooms to be replaced, rolled back"))
})

It("should return error if timeout when creating rooms", func() {
Expand Down Expand Up @@ -2676,7 +2675,7 @@ cmd:
nil,
)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(Equal("timeout waiting for rooms to be created"))
Expect(err.Error()).To(Equal("timedout waiting rooms to be replaced, rolled back"))
})

It("should not return error if ClearAll fails in deleting old rooms", func() {
Expand Down Expand Up @@ -2705,7 +2704,7 @@ cmd:
*scheduler = *models.NewScheduler(configYaml1.Name, configYaml1.Game, yaml1)
}))

// Delete first room
// Delete rooms
for _, pod := range pods.Items {
calls.Add(mockRedisClient.EXPECT().TxPipeline().Return(mockPipeline))
calls.Add(mockPipeline.EXPECT().SAdd(models.FreePortsRedisKey(), gomock.Any()))
Expand All @@ -2727,10 +2726,14 @@ cmd:
calls.Add(mockPipeline.EXPECT().ZRem(models.GetRoomPingRedisKey(pod.GetNamespace()), room.ID))
calls.Add(mockPipeline.EXPECT().Del(room.GetRoomRedisKey()))
calls.Add(mockPipeline.EXPECT().Exec().Return(nil, errors.New("redis error")))
break
}

// Recreate pod that was deleted but redis failed
calls.Add(
mockClock.EXPECT().
Now().
Return(time.Unix(0, 0)))

// Create new pods
for range pods.Items {
calls.Add(mockRedisClient.EXPECT().TxPipeline().Return(mockPipeline))
calls.Add(
Expand Down Expand Up @@ -2759,8 +2762,16 @@ cmd:
SPop(models.FreePortsRedisKey()).
Return(goredis.NewStringResult("5000", nil)))
calls.Add(mockPipeline.EXPECT().Exec())
break
}

calls.Add(
mockClock.EXPECT().
Now().
Return(time.Unix(0, 0)))

mockDb.EXPECT().
Query(gomock.Any(), "UPDATE schedulers SET (name, game, yaml, state, state_last_changed_at, last_scale_op_at) = (?name, ?game, ?yaml, ?state, ?state_last_changed_at, ?last_scale_op_at) WHERE id=?id", gomock.Any())

calls.Add(
mockRedisClient.EXPECT().
Eval(gomock.Any(), []string{lockKey}, gomock.Any()).
Expand All @@ -2774,17 +2785,30 @@ cmd:
redisClient,
clientset,
&configYaml2,
timeoutSec, lockTimeoutMS, 10,
timeoutSec, lockTimeoutMS, 100,
lockKey,
mockClock,
nil,
)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(Equal("redis error"))
Expect(err).NotTo(HaveOccurred())

pods, err = clientset.CoreV1().Pods("controller-name").List(metav1.ListOptions{})
Expect(err).NotTo(HaveOccurred())
Expect(pods.Items).To(HaveLen(3))

for _, pod := range pods.Items {
Expect(pod.GetName()).To(ContainSubstring("controller-name-"))
Expect(pod.GetName()).To(HaveLen(len("controller-name-") + 8))
Expect(pod.Spec.Containers[0].Env[0].Name).To(Equal("MY_ENV_VAR"))
Expect(pod.Spec.Containers[0].Env[0].Value).To(Equal("myvalue"))
Expect(pod.Spec.Containers[0].Env[1].Name).To(Equal("MY_NEW_ENV_VAR"))
Expect(pod.Spec.Containers[0].Env[1].Value).To(Equal("myvalue"))
Expect(pod.Spec.Containers[0].Env[2].Name).To(Equal("MAESTRO_SCHEDULER_NAME"))
Expect(pod.Spec.Containers[0].Env[2].Value).To(Equal("controller-name"))
Expect(pod.Spec.Containers[0].Env[3].Name).To(Equal("MAESTRO_ROOM_ID"))
Expect(pod.Spec.Containers[0].Env[3].Value).To(Equal(pod.GetName()))
Expect(pod.Spec.Containers[0].Env).To(HaveLen(4))
}
})

It("should update in two steps if maxSurge is 50%", func() {
Expand Down
47 changes: 20 additions & 27 deletions controller/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,46 +34,42 @@ func replacePodsAndWait(
clock clockinterfaces.Clock,
configYAML *models.ConfigYAML,
podsToDelete []v1.Pod,
) (createdPods []v1.Pod, deletedPods []v1.Pod, err error) {
) (createdPods []v1.Pod, deletedPods []v1.Pod, timedout bool) {
createdPods = []v1.Pod{}
deletedPods = []v1.Pod{}

for _, pod := range podsToDelete {
logger.Debugf("deleting pod %s", pod.GetName())

err = deletePodAndRoom(
err := deletePodAndRoom(
logger, mr, clientset, redisClient,
configYAML.Name, configYAML.Game,
pod.GetName(), reportersConstants.ReasonUpdate)
if err == nil || strings.Contains(err.Error(), "redis") {

deletedPods = append(deletedPods, pod)
}
if err != nil {
logger.WithError(err).Debugf("error deleting pod %s", pod.GetName())
return createdPods, deletedPods, err
}
}

timeout := willTimeoutAt.Sub(clock.Now())
createdPods, err = createPodsAsTheyAreDeleted(
createdPods, timedout = createPodsAsTheyAreDeleted(
logger, mr, clientset, db, redisClient, timeout, configYAML,
deletedPods,
)
if err != nil {
return createdPods, deletedPods, err
deletedPods)
if timedout {
return createdPods, deletedPods, timedout
}

timeout = willTimeoutAt.Sub(clock.Now())
err = waitCreatingPods(
timedout = waitCreatingPods(
logger, clientset, timeout, configYAML.Name,
createdPods,
)
if err != nil {
return createdPods, deletedPods, err
createdPods)
if timedout {
return createdPods, deletedPods, timedout
}

return createdPods, deletedPods, err
return createdPods, deletedPods, false
}

// In rollback, it must delete newly created pod and
Expand Down Expand Up @@ -154,13 +150,9 @@ func rollback(
}

waitTimeout := willTimeoutAt.Sub(time.Now())
err = waitCreatingPods(
waitCreatingPods(
logger, clientset, waitTimeout, configYAML.Name,
newlyCreatedPods,
)
if err != nil {
return err
}
newlyCreatedPods)
}
}

Expand All @@ -176,7 +168,7 @@ func createPodsAsTheyAreDeleted(
timeout time.Duration,
configYAML *models.ConfigYAML,
deletedPods []v1.Pod,
) (createdPods []v1.Pod, err error) {
) (createdPods []v1.Pod, timedout bool) {
logger := l.WithFields(logrus.Fields{
"operation": "controller.waitTerminatingPods",
"scheduler": configYAML.Name,
Expand Down Expand Up @@ -225,7 +217,7 @@ func createPodsAsTheyAreDeleted(
case <-timeoutTimer.C:
err := errors.New("timeout waiting for rooms to be removed")
logger.WithError(err).Error("stopping scale")
return createdPods, err
return createdPods, true
}

if exit {
Expand All @@ -234,7 +226,7 @@ func createPodsAsTheyAreDeleted(
}
}

return createdPods, err
return createdPods, false
}

func waitTerminatingPods(
Expand Down Expand Up @@ -292,7 +284,7 @@ func waitCreatingPods(
timeout time.Duration,
namespace string,
createdPods []v1.Pod,
) error {
) bool {
logger := l.WithFields(logrus.Fields{
"operation": "controller.waitCreatingPods",
"scheduler": namespace,
Expand Down Expand Up @@ -340,7 +332,8 @@ func waitCreatingPods(
}
}
case <-timeoutTimer.C:
return errors.New("timeout waiting for rooms to be created")
logger.Error("timeout waiting for rooms to be created")
return true
}

if exit {
Expand All @@ -349,7 +342,7 @@ func waitCreatingPods(
}
}

return nil
return false
}

func deletePodAndRoom(
Expand Down
4 changes: 2 additions & 2 deletions migrations/migrations.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e4be1a1

Please sign in to comment.