Skip to content

Commit

Permalink
Merge pull request #966 from johnugeorge/minorchanges
Browse files Browse the repository at this point in the history
Minor changes
  • Loading branch information
richardsliu committed Mar 27, 2019
2 parents aa322c7 + 9fe81db commit 7e5ece8
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 15 deletions.
1 change: 0 additions & 1 deletion pkg/apis/tensorflow/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ type TFJobSpec struct {
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"`

// Optional number of retries before marking this job failed.
// Defaults to 6
// +optional
BackoffLimit *int32 `json:"backoffLimit,omitempty"`

Expand Down
15 changes: 4 additions & 11 deletions pkg/controller.v1beta2/tensorflow/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,14 +358,14 @@ func (tc *TFController) reconcileTFJobs(tfjob *tfv1beta2.TFJob) error {

activePods := k8sutil.FilterActivePods(pods)
active := int32(len(activePods))
_, failed := getSucceededAndFailedCount(pods)
failed := k8sutil.FilterPodCount(pods, v1.PodFailed)
totalReplicas := getTotalReplicas(tfjob)
prevReplicasFailedNum := getTotalFailedReplicas(tfjob)

tfJobExceedsLimit := false
var failureMessage string
var exceedsBackoffLimit bool = false
var pastBackoffLimit bool = false
tfJobExceedsLimit := false
exceedsBackoffLimit := false
pastBackoffLimit := false

if tfjob.Spec.BackoffLimit != nil {
jobHasNewFailure := failed > prevReplicasFailedNum
Expand Down Expand Up @@ -536,13 +536,6 @@ func (tc *TFController) pastActiveDeadline(tfjob *tfv1beta2.TFJob) bool {
return duration >= allowedDuration
}

// getSucceededAndFailedCount returns no of succeeded and failed pods running a job
func getSucceededAndFailedCount(pods []*v1.Pod) (succeeded, failed int32) {
succeeded = int32(k8sutil.FilterPods(pods, v1.PodSucceeded))
failed = int32(k8sutil.FilterPods(pods, v1.PodFailed))
return
}

func (tc *TFController) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) {
return tc.getTFJobFromName(namespace, name)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/util/k8sutil/k8sutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ func IsPodActive(p *v1.Pod) bool {
p.DeletionTimestamp == nil
}

// filterPods returns pods based on their phase.
func FilterPods(pods []*v1.Pod, phase v1.PodPhase) int {
result := 0
// filterPodCount returns pods based on their phase.
func FilterPodCount(pods []*v1.Pod, phase v1.PodPhase) int32 {
var result int32
for i := range pods {
if phase == pods[i].Status.Phase {
result++
Expand Down

0 comments on commit 7e5ece8

Please sign in to comment.