/
kfcommon.go
82 lines (70 loc) · 2.47 KB
/
kfcommon.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package controllers
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
operationv1 "github.com/polyaxon/mloperator/api/v1"
"github.com/polyaxon/mloperator/controllers/kfapi"
"github.com/polyaxon/mloperator/controllers/managers"
)
// Common logic for reconciling job status
func (r *OperationReconciler) reconcileKFJobStatus(instance *operationv1.Operation, job unstructured.Unstructured) (bool, error) {
now := metav1.Now()
log := r.Log
// Check the pods
podStatus, reason, message := managers.HasUnschedulablePods(r.Client, instance)
if podStatus == operationv1.OperationWarning {
log.V(1).Info("Service has unschedulable pod(s)", "Reason", reason, "message", message)
if updated := instance.LogWarning(reason, message); updated {
log.V(1).Info("Service Logging Status Warning")
return true, nil
}
return false, nil
}
status, ok, unerr := unstructured.NestedFieldCopy(job.Object, "status")
if !ok {
if unerr != nil {
log.Error(unerr, "NestedFieldCopy unstructured to status error")
return false, nil
}
log.Info("NestedFieldCopy unstructured to status error",
"err", "Status is not found in job")
return false, nil
}
statusMap := status.(map[string]interface{})
jobStatus := kfapi.JobStatus{}
err := runtime.DefaultUnstructuredConverter.FromUnstructured(statusMap, &jobStatus)
if err != nil {
log.Error(err, "Convert unstructured to status error")
return false, err
}
if len(jobStatus.Conditions) == 0 {
return false, nil
}
cond := jobStatus.Conditions[len(jobStatus.Conditions)-1]
if cond.Type == kfapi.JobRunning || cond.Type == kfapi.JobCreated {
instance.LogRunning()
log.V(1).Info("Job Logging Status Running")
return true, nil
}
if cond.Type == kfapi.JobSucceeded {
instance.LogSucceeded()
instance.Status.CompletionTime = &now
log.V(1).Info("Job Logging Status Succeeded")
return true, nil
}
if cond.Type == kfapi.JobFailed {
newMessage := operationv1.GetFailureMessage(cond.Message, podStatus, reason, message)
if updated := instance.LogFailed(cond.Reason, newMessage); updated {
instance.Status.CompletionTime = &now
log.V(1).Info("Job Logging Status Failed", "Message", newMessage, "podStatus", podStatus, "PodMessage", message)
return true, nil
}
}
if cond.Type == kfapi.JobRestarting {
instance.LogWarning(cond.Reason, cond.Message)
log.V(1).Info("Job Logging Status Warning")
return true, nil
}
return false, nil
}