Skip to content

Commit

Permalink
Merge pull request #120 from topfreegames/feature/prevent-creating-po…
Browse files Browse the repository at this point in the history
…ds-with-error

Stop pods creation when they have error
  • Loading branch information
gabrielcorado authored Apr 14, 2021
2 parents 8671b9a + d074c7a commit 3f7dd6f
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 4 deletions.
4 changes: 2 additions & 2 deletions controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,12 @@ func ScaleUp(

configYAML, _ := models.NewConfigYAML(scheduler.YAML)

existPendingPods, err := pendingPods(config, redisClient, scheduler.Name, mr)
existPendingPods, err := pendingOrFailedPods(config, redisClient, scheduler.Name, mr)
if err != nil {
return err
}
if existPendingPods {
return errors.New("there are pending pods, check if there are enough CPU and memory to allocate new rooms")
return errors.New("there are pending or failed pods")
}

amount, err = SetScalingAmount(
Expand Down
84 changes: 83 additions & 1 deletion controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1594,7 +1594,46 @@ cmd:

err = controller.ScaleUp(logger, roomManager, mr, mockDb, mockRedisClient, clientset, scheduler, amount, timeoutSec, true, config, true)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(Equal("there are pending pods, check if there are enough CPU and memory to allocate new rooms"))
Expect(err.Error()).To(Equal("there are pending or failed pods"))
})

It("should return error and not scale up if there are failed pods", func() {
amount := 5
var configYaml1 models.ConfigYAML
err := yaml.Unmarshal([]byte(yaml1), &configYaml1)
Expect(err).NotTo(HaveOccurred())
scheduler := models.NewScheduler(configYaml1.Name, configYaml1.Game, yaml1)

pods := make([]string, 0, amount*2)
for i := 0; i < amount; i++ {
pod := &models.Pod{}
pod.Name = fmt.Sprintf("room-%d", i)
pod.Status.Phase = v1.PodRunning
if i == 0 {
pod.Status.ContainerStatuses = []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "ErrImagePull",
},
},
},
}
}
jsonBytes, err := pod.MarshalToRedis()
Expect(err).NotTo(HaveOccurred())
pods = append(pods, pod.Name, string(jsonBytes))
}

mockRedisClient.EXPECT().TxPipeline().Return(mockPipeline)
mockPipeline.EXPECT().
HScan(models.GetPodMapRedisKey(configYaml1.Name), uint64(0), "*", gomock.Any()).
Return(goredis.NewScanCmdResult(pods, 0, nil))
mockPipeline.EXPECT().Exec()

err = controller.ScaleUp(logger, roomManager, mr, mockDb, mockRedisClient, clientset, scheduler, amount, timeoutSec, true, config, true)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(Equal("there are pending or failed pods"))
})

It("should scale up to max if scaling amount is higher than max", func() {
Expand Down Expand Up @@ -6863,5 +6902,48 @@ containers:
Expect(cancelErr).ToNot(HaveOccurred())
Expect(err).ToNot(HaveOccurred())
})

It("should return error when a pod is with error", func() {
pods := []*models.Pod{
&models.Pod{Name: "room-1"},
}

scheduler := models.NewScheduler(configYaml1.Name, configYaml1.Game, string(configYaml1.ToYAML()))

mockRedisClient.EXPECT().
HGetAll(opManager.GetOperationKey()).
Return(goredis.NewStringStringMapResult(map[string]string{
"description": models.OpManagerRollingUpdate,
}, nil)).AnyTimes()

mt.MockCreateRoomsAnyTimes(mockRedisClient, mockPipeline, &configYaml1, 1)
mt.MockGetPortsFromPoolAnyTimes(&configYaml1, mockRedisClient, mockPortChooser, workerPortRange, portStart, portEnd)

runningPodJSON := `{"name":"room-1", "status":{"phase":"Pending", "containerStatuses": [{"state": {"waiting": {"reason": "ErrImagePull"}}}]}}`
mockRedisClient.EXPECT().
HGet(models.GetPodMapRedisKey(configYaml1.Name), gomock.Any()).
Return(goredis.NewStringResult(runningPodJSON, nil)).
Times(2)

timeoutErr, cancelErr, err := controller.SegmentAndReplacePods(
logger,
roomManager,
mr,
clientset,
mockDb,
mockRedisClient,
time.Now().Add(time.Minute),
&configYaml1,
pods,
scheduler,
opManager,
10*time.Second,
10,
1,
)
Expect(timeoutErr).ToNot(HaveOccurred())
Expect(cancelErr).ToNot(HaveOccurred())
Expect(err).To(HaveOccurred())
})
})
})
16 changes: 15 additions & 1 deletion controller/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,11 @@ func waitCreatingPods(
}

if createdPod.Status.Phase != v1.PodRunning {
if err := models.ValidatePodWaitingState(createdPod); err != nil {
l.WithError(err).Error("pod has error, aborting the pods creation")
return false, err
}

isPending, reason, message := models.PodPending(createdPod)
if isPending && strings.Contains(message, models.PodNotFitsHostPorts) {
l.WithFields(logrus.Fields{
Expand Down Expand Up @@ -765,6 +770,11 @@ func waitForPods(
retryNo[i] = 0

if pod.Status.Phase != v1.PodRunning {
if err := models.ValidatePodWaitingState(pod); err != nil {
l.WithError(err).Error("pod has error, aborting the pods creation")
return err
}

isPending, reason, message := models.PodPending(pod)
if isPending && strings.Contains(message, models.PodNotFitsHostPorts) {
l.WithFields(logrus.Fields{
Expand Down Expand Up @@ -801,7 +811,7 @@ func waitForPods(
return nil
}

func pendingPods(
func pendingOrFailedPods(
config *viper.Viper,
redisClient redisinterfaces.RedisClient,
namespace string,
Expand All @@ -821,6 +831,10 @@ func pendingPods(
if pod.Status.Phase == v1.PodPending {
return true, nil
}

if err := models.ValidatePodWaitingState(pod); err != nil {
return true, nil
}
}

return false, nil
Expand Down
2 changes: 2 additions & 0 deletions models/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ var InvalidPodWaitingStates = []string{
"ErrImagePullBackOff",
"ImagePullBackOff",
"ErrInvalidImageName",
"ErrImagePull",
"CrashLoopBackOff",
}

// OperationManager description constants
Expand Down

0 comments on commit 3f7dd6f

Please sign in to comment.