diff --git a/pkg/engine/health/health.go b/pkg/engine/health/health.go deleted file mode 100644 index 33420c655..000000000 --- a/pkg/engine/health/health.go +++ /dev/null @@ -1,145 +0,0 @@ -package health - -import ( - "context" - "errors" - "fmt" - "log" - "reflect" - - appsv1 "k8s.io/api/apps/v1" - batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" - apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - apiextv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/discovery" - "k8s.io/kubectl/pkg/polymorphichelpers" - "k8s.io/kubectl/pkg/util/podutils" - "sigs.k8s.io/controller-runtime/pkg/client" - - kudoapi "github.com/kudobuilder/kudo/pkg/apis/kudo/v1beta1" - "github.com/kudobuilder/kudo/pkg/engine" - "github.com/kudobuilder/kudo/pkg/engine/resource" - "github.com/kudobuilder/kudo/pkg/kudoctl/clog" -) - -func isJobTerminallyFailed(job *batchv1.Job) (bool, string) { - for _, c := range job.Status.Conditions { - if c.Type == batchv1.JobFailed && c.Status == corev1.ConditionTrue { - log.Printf("HealthUtil: Job %q has failed: %s", job.Name, c.Message) - return true, c.Message - } - } - return false, "" -} - -func IsDeleted(client client.Client, discovery discovery.CachedDiscoveryInterface, objs []runtime.Object) error { - for _, obj := range objs { - key, err := resource.ObjectKeyFromObject(obj, discovery) - if err != nil { - return err - } - newObj := obj.DeepCopyObject() - err = client.Get(context.TODO(), key, newObj) - if !apierrors.IsNotFound(err) { - return fmt.Errorf("%s/%s is not deleted", key.Namespace, key.Name) - } - } - return nil -} - -// IsHealthy returns whether an object is healthy. Must be implemented for each type. -func IsHealthy(obj runtime.Object) error { - if obj == nil { - return nil - } - unstructMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj) - if err != nil { - return err - } - - objUnstructured := &unstructured.Unstructured{Object: unstructMap} - switch obj := obj.(type) { - case *apiextv1beta1.CustomResourceDefinition: - for _, c := range obj.Status.Conditions { - if c.Type == apiextv1beta1.Established && c.Status == apiextv1beta1.ConditionTrue { - log.Printf("CRD %s is now healthy", obj.Name) - return nil - } - } - msg := fmt.Sprintf("CRD %s is not healthy ( Conditions: %v )", obj.Name, obj.Status.Conditions) - return errors.New(msg) - case *apiextv1.CustomResourceDefinition: - for _, c := range obj.Status.Conditions { - if c.Type == apiextv1.Established && c.Status == apiextv1.ConditionTrue { - log.Printf("CRD %s is now healthy", obj.Name) - return nil - } - } - msg := fmt.Sprintf("CRD %s is not healthy ( Conditions: %v )", obj.Name, obj.Status.Conditions) - return errors.New(msg) - case *appsv1.StatefulSet: - statusViewer := &polymorphichelpers.StatefulSetStatusViewer{} - msg, done, err := statusViewer.Status(objUnstructured, 0) - if err != nil { - return err - } - if !done { - log.Printf("HealthUtil: Statefulset %v is NOT healthy. %s", obj.Name, msg) - return errors.New(msg) - } - log.Printf("Statefulset %v is marked healthy\n", obj.Name) - return nil - case *appsv1.Deployment: - statusViewer := &polymorphichelpers.DeploymentStatusViewer{} - msg, done, err := statusViewer.Status(objUnstructured, 0) - if err != nil { - return err - } - if !done { - log.Printf("HealthUtil: Deployment %v is NOT healthy. %s", obj.Name, msg) - return errors.New(msg) - } - clog.V(2).Printf("Deployment %v is marked healthy\n", obj.Name) - return nil - case *batchv1.Job: - - if obj.Status.Succeeded == int32(1) { - // Done! - log.Printf("HealthUtil: Job %q is marked healthy", obj.Name) - return nil - } - if terminal, msg := isJobTerminallyFailed(obj); terminal { - return fmt.Errorf("%wHealthUtil: Job %q has failed terminally: %s", engine.ErrFatalExecution, obj.Name, msg) - } - - return fmt.Errorf("job %q still running or failed", obj.Name) - case *kudoapi.Instance: - // if there is no scheduled plan, then we're done - if obj.Spec.PlanExecution.PlanName == "" { - return nil - } - - return fmt.Errorf("instance %s/%s active plan is in state %v", obj.Namespace, obj.Name, obj.Spec.PlanExecution.Status) - - case *corev1.Pod: - if obj.Status.Phase == corev1.PodRunning && podutils.IsPodReady(obj) { - return nil - } - return fmt.Errorf("pod %q is not running yet: %s", obj.Name, obj.Status.Phase) - - case *corev1.Namespace: - if obj.Status.Phase == corev1.NamespaceActive { - return nil - } - return fmt.Errorf("namespace %s is not active: %s", obj.Name, obj.Status.Phase) - - // unless we build logic for what a healthy object is, assume it's healthy when created. - default: - log.Printf("HealthUtil: Unknown type %s is marked healthy by default", reflect.TypeOf(obj)) - return nil - } -} diff --git a/pkg/engine/task/task_apply.go b/pkg/engine/task/task_apply.go index 741a66099..6f5dba9c6 100644 --- a/pkg/engine/task/task_apply.go +++ b/pkg/engine/task/task_apply.go @@ -20,8 +20,8 @@ import ( kudoapi "github.com/kudobuilder/kudo/pkg/apis/kudo/v1beta1" "github.com/kudobuilder/kudo/pkg/engine" - "github.com/kudobuilder/kudo/pkg/engine/health" "github.com/kudobuilder/kudo/pkg/engine/resource" + "github.com/kudobuilder/kudo/pkg/kubernetes/status" "github.com/kudobuilder/kudo/pkg/util/kudo" ) @@ -238,15 +238,39 @@ func strategicThreeWayMergePatch(r runtime.Object, original, modified, current [ func isHealthy(ro []runtime.Object) error { for _, r := range ro { - err := health.IsHealthy(r) + err := isResourceHealthy(r) if err != nil { - key, _ := client.ObjectKeyFromObject(r) + key, _ := client.ObjectKeyFromObject(r) // err not possible as all runtime.Objects have metadata return fmt.Errorf("object %s/%s is NOT healthy: %w", key.Namespace, key.Name, err) } } return nil } +func isResourceHealthy(obj runtime.Object) error { + healthy, msg, err := status.IsHealthy(obj) + if err != nil { + return err + } + if healthy { + if msg != "" { + log.Printf("HealthUtil: %s", msg) + } + return nil + } + isTerminal, terminalMsg, err := status.IsTerminallyFailed(obj) + if err != nil { + return err + } + if isTerminal { + log.Printf("HealthUtil: %s", terminalMsg) + return fmt.Errorf("%wHealthUtil: %s", engine.ErrFatalExecution, terminalMsg) + } + + log.Printf("HealthUtil: %s", msg) + return errors.New(msg) +} + // copy from k8s.io/kubectl@v0.16.6/pkg/util/apply.go, with adjustments // GetOriginalConfiguration retrieves the original configuration of the object // from the annotation. diff --git a/pkg/engine/task/task_delete.go b/pkg/engine/task/task_delete.go index ac993a52f..e1734cf81 100644 --- a/pkg/engine/task/task_delete.go +++ b/pkg/engine/task/task_delete.go @@ -5,7 +5,7 @@ import ( "fmt" "log" - "github.com/kudobuilder/kudo/pkg/engine/health" + "github.com/kudobuilder/kudo/pkg/kubernetes/status" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -54,11 +54,20 @@ func (dt DeleteTask) Run(ctx Context) (bool, error) { return false, err } - // 6. - Check health: always true for Delete task - - err = health.IsDeleted(ctx.Client, ctx.Discovery, objs) - if err != nil { - log.Printf("TaskExecution: %v", err) - return false, nil + // 6. - Check health - wait for object deletion + return allObjsDeleted(ctx, objs) +} + +func allObjsDeleted(ctx Context, objs []runtime.Object) (bool, error) { + for _, obj := range objs { + objDeleted, _, err := status.IsDeleted(ctx.Client, ctx.Discovery, obj) + if err != nil { + log.Printf("TaskExecution: wait for object deletion: %v", err) + return false, nil + } + if !objDeleted { + return false, nil + } } return true, nil } diff --git a/pkg/engine/task/task_kudo_operator.go b/pkg/engine/task/task_kudo_operator.go index b747e499a..157736197 100644 --- a/pkg/engine/task/task_kudo_operator.go +++ b/pkg/engine/task/task_kudo_operator.go @@ -15,7 +15,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" kudoapi "github.com/kudobuilder/kudo/pkg/apis/kudo/v1beta1" - "github.com/kudobuilder/kudo/pkg/engine/health" "github.com/kudobuilder/kudo/pkg/engine/renderer" parser "github.com/kudobuilder/kudo/pkg/kudoctl/cmd/params" "github.com/kudobuilder/kudo/pkg/kudoctl/packages" @@ -63,7 +62,7 @@ func (kt KudoOperatorTask) Run(ctx Context) (bool, error) { } // 4. - Check the Instance health - - if err := health.IsHealthy(instance); err != nil { + if err := isResourceHealthy(instance); err != nil { return false, nil } diff --git a/pkg/kubernetes/status/health.go b/pkg/kubernetes/status/health.go new file mode 100644 index 000000000..fe467ebfd --- /dev/null +++ b/pkg/kubernetes/status/health.go @@ -0,0 +1,171 @@ +package status + +import ( + "context" + "fmt" + "reflect" + + appsv1 "k8s.io/api/apps/v1" + appsv1beta1 "k8s.io/api/apps/v1beta1" + appsv1beta2 "k8s.io/api/apps/v1beta2" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apiextv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/discovery" + "k8s.io/kubectl/pkg/polymorphichelpers" + "k8s.io/kubectl/pkg/util/podutils" + "sigs.k8s.io/controller-runtime/pkg/client" + + kudoapi "github.com/kudobuilder/kudo/pkg/apis/kudo/v1beta1" + "github.com/kudobuilder/kudo/pkg/engine/resource" +) + +func isJobTerminallyFailed(job *batchv1.Job) (bool, string, error) { + for _, c := range job.Status.Conditions { + if c.Type == batchv1.JobFailed && c.Status == corev1.ConditionTrue { + return true, fmt.Sprintf("job %q has failed terminally: %s", job.Name, c.Message), nil + } + } + return false, "", nil +} + +// IsTerminallyFailed returns true if a resource will never become healthy anymore and a corresponding message +// The returned msg is optional and should reflect the terminal state in a human readable form and potential reason. +// If the returned error is non-nil, the other returned values can be undefined and should not be used. +// This is a generic function and works on all resource types. +func IsDeleted(client client.Client, discovery discovery.CachedDiscoveryInterface, obj runtime.Object) (deleted bool, msg string, err error) { + key, err := resource.ObjectKeyFromObject(obj, discovery) + if err != nil { + return false, "", fmt.Errorf("failed to get object key from object: %v", err) + } + newObj := obj.DeepCopyObject() + err = client.Get(context.TODO(), key, newObj) + if err == nil { + // Object was retrieved without error - not deleted + return false, fmt.Sprintf("%s/%s is not deleted", key.Namespace, key.Name), nil + } + if apierrors.IsNotFound(err) { + // Object was not found - deleted + return true, fmt.Sprintf("%s/%s is deleted", key.Namespace, key.Name), nil + } + + // We got a different error - it's an error + return false, "", fmt.Errorf("%s/%s is not deleted: %v", key.Namespace, key.Name, err) +} + +// IsTerminallyFailed returns true if a resource will never become healthy anymore and a corresponding message +// The returned msg is optional and should reflect the terminal state in a human readable form and may include +// a potential reason. +// If the returned error is non-nil, the other returned values can be undefined and should not be used. +// Must be implemented for each type; all unimplemented resources are considered non-terminal by default. +func IsTerminallyFailed(obj runtime.Object) (terminal bool, msg string, err error) { + if obj == nil { + return true, "", nil + } + + switch obj := obj.(type) { + case *batchv1.Job: + return isJobTerminallyFailed(obj) + } + return false, "", nil +} + +// IsHealthy returns whether an object is healthy and a corresponding message. +// The message is optional and may be empty in any case; it is human readable and should reflect +// the returned healthy status and an optional reason. +// When the returned error is non-nil, all other parameters can have undefined values and should not +// be used. +// Must be implemented for each type; all unimplemented resources are considered healthy by default. +func IsHealthy(obj runtime.Object) (healthy bool, msg string, err error) { + if obj == nil { + return true, "", nil + } + switch obj := obj.(type) { + case *apiextv1beta1.CustomResourceDefinition: + for _, c := range obj.Status.Conditions { + if c.Type == apiextv1beta1.Established && c.Status == apiextv1beta1.ConditionTrue { + return true, fmt.Sprintf("CRD %s is now healthy", obj.Name), nil + } + } + return false, fmt.Sprintf("CRD %s is not healthy ( Conditions: %v )", obj.Name, obj.Status.Conditions), nil + + case *apiextv1.CustomResourceDefinition: + for _, c := range obj.Status.Conditions { + if c.Type == apiextv1.Established && c.Status == apiextv1.ConditionTrue { + return true, fmt.Sprintf("CRD %s is now healthy", obj.Name), nil + } + } + return false, fmt.Sprintf("CRD %s is not healthy ( Conditions: %v )", obj.Name, obj.Status.Conditions), nil + + case *appsv1.StatefulSet, *appsv1beta1.StatefulSet, *appsv1beta2.StatefulSet: + objUnstructured, err := toUnstructured(obj) + if err != nil { + return false, "", err + } + statusViewer := &polymorphichelpers.StatefulSetStatusViewer{} + msg, done, err := statusViewer.Status(objUnstructured, 0) + if err != nil { + return false, "", err + } + if done { + return true, fmt.Sprintf("statefulset %q is marked healthy", objUnstructured.GetName()), nil + } + return false, fmt.Sprintf("statefulset %q is not healthy: %s", objUnstructured.GetName(), msg), nil + + case *appsv1.Deployment, *appsv1beta1.Deployment, *appsv1beta2.Deployment: + objUnstructured, err := toUnstructured(obj) + if err != nil { + return false, "", err + } + statusViewer := &polymorphichelpers.DeploymentStatusViewer{} + msg, done, err := statusViewer.Status(objUnstructured, 0) + if err != nil { + return false, "", err + } + if done { + return true, fmt.Sprintf("deployment %v is marked healthy", objUnstructured.GetName()), nil + } + return false, fmt.Sprintf("deployment %q is not healthy: %s", objUnstructured.GetName(), msg), nil + + case *batchv1.Job: + if obj.Status.Succeeded == int32(1) { + return true, fmt.Sprintf("job %q is marked healthy", obj.Name), nil + } + return false, fmt.Sprintf("job %q still running or failed", obj.Name), nil + + case *kudoapi.Instance: + // if there is no scheduled plan, then we're done + if obj.Spec.PlanExecution.PlanName == "" { + return true, fmt.Sprintf("instance %s/%s is marked healthy", obj.Namespace, obj.Name), nil + } + return false, fmt.Sprintf("instance %s/%s active plan is in state %v", obj.Namespace, obj.Name, obj.Spec.PlanExecution.Status), nil + + case *corev1.Pod: + if obj.Status.Phase == corev1.PodRunning && podutils.IsPodReady(obj) { + return true, "", nil + } + return false, fmt.Sprintf("pod %s/%s is not running yet: %s", obj.Namespace, obj.Name, obj.Status.Phase), nil + + case *corev1.Namespace: + if obj.Status.Phase == corev1.NamespaceActive { + return true, "", nil + } + return false, fmt.Sprintf("namespace %s is not active: %s", obj.Name, obj.Status.Phase), nil + + // unless we build logic for what a healthy object is, assume it's healthy when created. + default: + return true, fmt.Sprintf("unknown type %s is marked healthy by default", reflect.TypeOf(obj)), nil + } +} + +func toUnstructured(obj runtime.Object) (*unstructured.Unstructured, error) { + unstructMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj) + if err != nil { + return nil, err + } + return &unstructured.Unstructured{Object: unstructMap}, nil +} diff --git a/pkg/kudoctl/kudoinit/crd/crds.go b/pkg/kudoctl/kudoinit/crd/crds.go index b87ec80c1..14cb37833 100644 --- a/pkg/kudoctl/kudoinit/crd/crds.go +++ b/pkg/kudoctl/kudoinit/crd/crds.go @@ -13,7 +13,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/yaml" - "github.com/kudobuilder/kudo/pkg/engine/health" + "github.com/kudobuilder/kudo/pkg/kubernetes/status" "github.com/kudobuilder/kudo/pkg/kudoctl/clog" "github.com/kudobuilder/kudo/pkg/kudoctl/kube" "github.com/kudobuilder/kudo/pkg/kudoctl/kudoinit" @@ -122,8 +122,11 @@ func (c Initializer) verifyInstallation(client v1beta1.CustomResourceDefinitions result.AddErrors(fmt.Sprintf("Installed CRD %s has invalid version %s, expected %s", crd.Name, existingCrd.Spec.Version, crd.Spec.Version)) return nil } - if err := health.IsHealthy(existingCrd); err != nil { - result.AddErrors(fmt.Sprintf("Installed CRD %s is not healthy: %v", crd.Name, err)) + if healthy, msg, err := status.IsHealthy(existingCrd); !healthy || err != nil { + if err != nil { + return err + } + result.AddErrors(fmt.Sprintf("Installed CRD %s is not healthy: %v", crd.Name, msg)) return nil } clog.V(2).Printf("CRD %s is installed with version %s", crd.Name, existingCrd.Spec.Versions[0].Name) diff --git a/pkg/kudoctl/kudoinit/manager/manager.go b/pkg/kudoctl/kudoinit/manager/manager.go index 9be5adce9..30db23730 100644 --- a/pkg/kudoctl/kudoinit/manager/manager.go +++ b/pkg/kudoctl/kudoinit/manager/manager.go @@ -14,8 +14,8 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" client2 "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/kudobuilder/kudo/pkg/engine/health" "github.com/kudobuilder/kudo/pkg/kubernetes" + "github.com/kudobuilder/kudo/pkg/kubernetes/status" "github.com/kudobuilder/kudo/pkg/kudoctl/clog" "github.com/kudobuilder/kudo/pkg/kudoctl/kube" "github.com/kudobuilder/kudo/pkg/kudoctl/kudoinit" @@ -118,8 +118,11 @@ func (m *Initializer) verifyManagerInstalled(client *kube.Client, result *verifi result.AddWarnings(fmt.Sprintf("KUDO manager has an unexpected image. Expected %s but found %s", m.options.Image, mgr.Spec.Template.Spec.Containers[0].Image)) return nil } - if err := health.IsHealthy(mgr); err != nil { - result.AddErrors("KUDO manager seems to be not healthy") + if healthy, msg, err := status.IsHealthy(mgr); !healthy || err != nil { + if err != nil { + return err + } + result.AddErrors("KUDO manager is not healthy: %s", msg) return nil } clog.V(2).Printf("KUDO manager is healthy and running %s", mgr.Spec.Template.Spec.Containers[0].Image) diff --git a/pkg/kudoctl/kudoinit/prereq/namespace.go b/pkg/kudoctl/kudoinit/prereq/namespace.go index ab6cd9314..7dcc36620 100644 --- a/pkg/kudoctl/kudoinit/prereq/namespace.go +++ b/pkg/kudoctl/kudoinit/prereq/namespace.go @@ -9,7 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "github.com/kudobuilder/kudo/pkg/engine/health" + "github.com/kudobuilder/kudo/pkg/kubernetes/status" "github.com/kudobuilder/kudo/pkg/kudoctl/clog" "github.com/kudobuilder/kudo/pkg/kudoctl/kube" "github.com/kudobuilder/kudo/pkg/kudoctl/kudoinit" @@ -73,8 +73,11 @@ func (o KudoNamespace) VerifyInstallation(client *kube.Client, result *verifier. } return err } - if err := health.IsHealthy(ns); err != nil { - result.AddErrors(fmt.Sprintf("namespace %s is not healthy: %v", o.opts.Namespace, err)) + if healthy, msg, err := status.IsHealthy(ns); !healthy || err != nil { + if err != nil { + return err + } + result.AddErrors(fmt.Sprintf("namespace %s is not healthy: %v", o.opts.Namespace, msg)) } return nil } diff --git a/pkg/kudoctl/kudoinit/prereq/webhook.go b/pkg/kudoctl/kudoinit/prereq/webhook.go index 290998506..a5a56ce55 100644 --- a/pkg/kudoctl/kudoinit/prereq/webhook.go +++ b/pkg/kudoctl/kudoinit/prereq/webhook.go @@ -18,8 +18,8 @@ import ( "k8s.io/client-go/kubernetes" clientv1beta1 "k8s.io/client-go/kubernetes/typed/admissionregistration/v1beta1" - "github.com/kudobuilder/kudo/pkg/engine/health" kubeutils "github.com/kudobuilder/kudo/pkg/kubernetes" + "github.com/kudobuilder/kudo/pkg/kubernetes/status" "github.com/kudobuilder/kudo/pkg/kudoctl/clog" "github.com/kudobuilder/kudo/pkg/kudoctl/kube" "github.com/kudobuilder/kudo/pkg/kudoctl/kudoinit" @@ -316,7 +316,7 @@ func (k *KudoWebHook) validateCertManagerInstallation(client *kube.Client, resul result.AddWarnings("unable to validate cert-manager controller deployment. Spec had no containers") return nil } - if err := health.IsHealthy(&deployment); err != nil { + if healthy, _, err := status.IsHealthy(&deployment); !healthy || err != nil { result.AddWarnings("cert-manager seems not to be running correctly. Make sure cert-manager is working") return nil }