Removed --webhook option (#1497)

Summary: Now that KUDO moves towards a better support of multiple plans (e.g. `kudoctl plan trigger` command), the existing instance admission webhook becomes necessary to guarantee the plan execution consistency. More on KUDO [admission controller](https://kudo.dev/docs/developing-operators/plans.html#admission-controllers) in the documentation. This PR removes the `--webhook` option and thus makes the instance admission webhook required. This is a breaking change since the users will have to either have [cert-manager](https://cert-manager.io/) installed or use the `--unsafe-self-signed-webhook-ca` option when initializing KUDO. For existing installations, one would need to run [kudo init](https://kudo.dev/docs/cli.html#examples) to create missing secret/webhook configuration. Signed-off-by: Aleksey Dukhovniy <alex.dukhovniy@googlemail.com>
kudobuilder · May 19, 2020 · a7b98bf · a7b98bf
1 parent ec540a1
commit a7b98bf
Show file tree

Hide file tree

Showing 49 changed files with 458 additions and 497 deletions.
diff --git a/Makefile b/Makefile
@@ -13,7 +13,6 @@ BUILD_DATE_PATH := github.com/kudobuilder/kudo/pkg/version.buildDate
 DATE_FMT := "%Y-%m-%dT%H:%M:%SZ"
 BUILD_DATE := $(shell date -u -d "@$SOURCE_DATE_EPOCH" "+${DATE_FMT}" 2>/dev/null || date -u -r "${SOURCE_DATE_EPOCH}" "+${DATE_FMT}" 2>/dev/null || date -u "+${DATE_FMT}")
 LDFLAGS := -X ${GIT_VERSION_PATH}=${GIT_VERSION} -X ${GIT_COMMIT_PATH}=${GIT_COMMIT} -X ${BUILD_DATE_PATH}=${BUILD_DATE}
-ENABLE_WEBHOOKS ?= false
 GOLANGCI_LINT_VER = "1.23.8"
 SUPPORTED_PLATFORMS = amd64 arm64
 
@@ -82,7 +81,7 @@ manager-clean:
 run:
     # for local development, webhooks are disabled by default
     # if you enable them, you have to take care of providing the TLS certs locally
-	ENABLE_WEBHOOKS=${ENABLE_WEBHOOKS} go run -ldflags "${LDFLAGS}" ./cmd/manager
+	go run -ldflags "${LDFLAGS}" ./cmd/manager
 
 .PHONY: deploy
 # Install KUDO into a cluster via kubectl kudo init

diff --git a/cmd/manager/main.go b/cmd/manager/main.go
@@ -21,6 +21,7 @@ import (
 	"net/http"
 	"net/url"
 	"os"
+	"path/filepath"
 	"strings"
 	"time"
 
@@ -56,6 +57,14 @@ func parseSyncPeriod() (*time.Duration, error) {
 	return nil, nil
 }
 
+func getEnv(key, def string) string {
+	val, ok := os.LookupEnv(key)
+	if !ok {
+		val = def
+	}
+	return val
+}
+
 func main() {
 	// Get version of KUDO
 	log.Printf("KUDO Version: %#v", version.Get())
@@ -74,7 +83,7 @@ func main() {
 	}
 
 	mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
-		CertDir:    "/tmp/cert",
+		CertDir:    getEnv("KUDO_CERT_DIR", filepath.Join("/tmp", "cert")),
 		SyncPeriod: syncPeriod,
 	})
 	if err != nil {
@@ -132,23 +141,21 @@ func main() {
 	}
 	log.Print("Instance controller set up")
 
-	if strings.ToLower(os.Getenv("ENABLE_WEBHOOKS")) == "true" {
-		log.Printf("Setting up webhooks")
-
-		iac, err := kudohook.NewInstanceAdmission(mgr.GetConfig(), mgr.GetScheme())
-		if err != nil {
-			log.Printf("Unable to create an uncached client for the webhook: %v", err)
-			os.Exit(1)
-		}
+	log.Printf("Setting up admission webhook")
 
-		if err := registerWebhook("/admit", &v1beta1.Instance{}, &webhook.Admission{Handler: iac}, mgr); err != nil {
-			log.Printf("Unable to create instance admission webhook: %v", err)
-			os.Exit(1)
-		}
-		log.Printf("Instance admission webhook set up")
+	iac, err := kudohook.NewInstanceAdmission(mgr.GetConfig(), mgr.GetScheme())
+	if err != nil {
+		log.Printf("Unable to create an uncached client for the webhook: %v", err)
+		os.Exit(1)
+	}
 
-		// Add more webhooks below using the above registerWebhook method
+	if err := registerWebhook("/admit", &v1beta1.Instance{}, &webhook.Admission{Handler: iac}, mgr); err != nil {
+		log.Printf("Unable to create instance admission webhook: %v", err)
+		os.Exit(1)
 	}
+	log.Printf("Instance admission webhook set up")
+
+	// Add more webhooks below using the above registerWebhook method
 
 	// Start the KUDO manager
 	log.Print("Done! Everything is setup, starting KUDO manager now")

diff --git a/go.mod b/go.mod
@@ -24,7 +24,6 @@ require (
 	golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
 	golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
 	golang.org/x/sys v0.0.0-20200408040146-ea54a3c99b9b // indirect
-	golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect
 	gopkg.in/yaml.v2 v2.2.8
 	gotest.tools v2.2.0+incompatible
 	k8s.io/api v0.17.3

diff --git a/hack/run-e2e-tests.sh b/hack/run-e2e-tests.sh
@@ -25,11 +25,12 @@ then
         | go-junit-report -set-exit-code \
         > reports/kudo_e2e_test_report.xml
 
+    # Operators tests
     rm -rf operators
     git clone https://github.com/kudobuilder/operators
     mkdir operators/bin/
     cp ./bin/kubectl-kudo operators/bin/
-    cp ./bin/manager operators/bin/
+    sed "s/%version%/$KUDO_VERSION/" operators/kudo-test.yaml.tmpl > operators/kudo-test.yaml
     cd operators && ./bin/kubectl-kudo test --artifacts-dir /tmp/kudo-e2e-test 2>&1 \
         | tee /dev/fd/2 \
         | go-junit-report -set-exit-code \
@@ -39,10 +40,11 @@ else
 
     ./bin/kubectl-kudo test --config kudo-e2e-test.yaml
 
+    # Operators tests
     rm -rf operators
     git clone https://github.com/kudobuilder/operators
     mkdir operators/bin/
     cp ./bin/kubectl-kudo operators/bin/
-    cp ./bin/manager operators/bin/
+    sed "s/%version%/$KUDO_VERSION/" operators/kudo-test.yaml.tmpl > operators/kudo-test.yaml
     cd operators && ./bin/kubectl-kudo test --artifacts-dir /tmp/kudo-e2e-test
 fi
diff --git a/kudo-e2e-test.yaml.tmpl b/kudo-e2e-test.yaml.tmpl
@@ -1,12 +1,9 @@
 apiVersion: kudo.dev/v1beta1
 kind: TestSuite
-crdDir: ./config/crds/
-manifestDirs:
-- ./test/manifests/
 testDirs:
 - ./test/e2e
 commands:
-- command: ./bin/kubectl-kudo init --webhook InstanceValidation --unsafe-self-signed-webhook-ca --kudo-image kudobuilder/controller:%version% --kudo-image-pull-policy Never
+- command: ./bin/kubectl-kudo init --unsafe-self-signed-webhook-ca --kudo-image kudobuilder/controller:%version% --kudo-image-pull-policy Never
 startKIND: true
 kindContainers:
 - kudobuilder/controller:%version%

diff --git a/kudo-test.yaml b/kudo-test.yaml
@@ -6,7 +6,11 @@ manifestDirs:
 testDirs:
 - ./test/integration
 commands:
-  - command: ./bin/manager
+  - command: |-
+      sh -c '
+      KUDO_CERT_DIR="./test/cert" ./bin/manager
+      '
     background: true
+  - command: sleep 5
 startControlPlane: true
-parallel: 4
+parallel: 4
diff --git a/pkg/apis/kudo/v1beta1/instance_types_helpers.go b/pkg/apis/kudo/v1beta1/instance_types_helpers.go
@@ -15,6 +15,19 @@ const (
 	snapshotAnnotation           = "kudo.dev/last-applied-instance-state"
 )
 
+// ScheduledPlan returns plan status of currently active plan or nil if no plan is running. In most cases this method
+// will return the same plan status as the [GetPlanInProgress](pkg/apis/kudo/v1beta1/instance_types_helpers.go:25) below.
+// However, there is a small window where both might return different results:
+// 1. GetScheduledPlan reads the plan from i.Spec.PlanExecution.PlanName which is set and reset by the instance admission
+//   webhook
+// 2. GetPlanInProgress goes through i.Spec.PlanStatus map and returns the first found plan that is running
+//
+// (1) is set directly when the user updates the instance and reset **after** the plan is terminal
+// (2) is updated **after** each time the instance controller executes the plan
+func (i *Instance) GetScheduledPlan() *PlanStatus {
+	return i.PlanStatus(i.Spec.PlanExecution.PlanName)
+}
+
 // GetPlanInProgress returns plan status of currently active plan or nil if no plan is running
 func (i *Instance) GetPlanInProgress() *PlanStatus {
 	for _, p := range i.Status.PlanStatus {

diff --git a/pkg/controller/instance/instance_controller.go b/pkg/controller/instance/instance_controller.go
@@ -20,9 +20,7 @@ import (
 	"errors"
 	"fmt"
 	"log"
-	"os"
 	"reflect"
-	"strings"
 	"time"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -168,7 +166,7 @@ func isForPipePod(e event.DeleteEvent) bool {
 func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
 	// ---------- 1. Query the current state ----------
 
-	log.Printf("InstanceController: Received Reconcile request for instance \"%+v\"", request.Name)
+	log.Printf("InstanceController: Received Reconcile request for instance %s", request.NamespacedName)
 	instance, err := r.getInstance(request)
 	if err != nil {
 		if apierrors.IsNotFound(err) { // not retrying if instance not found, probably someone manually removed it?
@@ -188,17 +186,7 @@ func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
 		return reconcile.Result{}, err // OV not found has to be retried because it can really have been created after Instance
 	}
 
-	// ---------- 2. Try to add a finalizer (effectively happens only once after creation) ----------
-
-	if !instance.IsDeleting() {
-		if kudov1beta1.CleanupPlanExists(ov) {
-			if instance.TryAddFinalizer() {
-				log.Printf("InstanceController: Adding finalizer on instance %s/%s", instance.Namespace, instance.Name)
-			}
-		}
-	}
-
-	// ---------- 3. Check if we should start execution of new plan ----------
+	// ---------- 2. Check if we should start execution of new plan ----------
 
 	newExecutionPlan, err := newExecutionPlan(instance, ov)
 	if err != nil {
@@ -214,11 +202,11 @@ func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
 		r.Recorder.Event(instance, "Normal", "PlanStarted", fmt.Sprintf("Execution of plan %s started", convert.StringValue(newExecutionPlan)))
 	}
 
-	// ---------- 4. If there's currently active plan, continue with the execution ----------
+	// ---------- 3. If there's currently active plan, continue with the execution ----------
 
-	activePlanStatus := getPlanInProgress(instance)
+	activePlanStatus := instance.GetScheduledPlan()
 	if activePlanStatus == nil { // we have no plan in progress
-		log.Printf("InstanceController: Nothing to do, no plan in progress for instance %s/%s", instance.Namespace, instance.Name)
+		log.Printf("InstanceController: Nothing to do, no plan scheduled for instance %s/%s", instance.Namespace, instance.Name)
 		return reconcile.Result{}, nil
 	}
 
@@ -237,10 +225,10 @@ func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
 		err = r.handleError(err, instance, oldInstance)
 		return reconcile.Result{}, err
 	}
-	log.Printf("InstanceController: Going to proceed in execution of active plan '%s' on instance %s/%s", activePlan.Name, instance.Namespace, instance.Name)
+	log.Printf("InstanceController: Going to proceed with execution of the scheduled plan '%s' on instance %s/%s", activePlan.Name, instance.Namespace, instance.Name)
 	newStatus, err := workflow.Execute(activePlan, metadata, r.Client, r.Discovery, r.Config, r.Scheme)
 
-	// ---------- 5. Update instance and its status after the execution proceeded ----------
+	// ---------- 4. Update instance and its status after the execution proceeded ----------
 
 	if newStatus != nil {
 		instance.UpdateInstanceStatus(newStatus, &metav1.Time{Time: time.Now()})
@@ -536,41 +524,13 @@ func isUpgradePlan(planName string) bool {
 	return planName == v1beta1.DeployPlanName || planName == v1beta1.UpgradePlanName
 }
 
-func areWebhooksEnabled() bool {
-	return strings.ToLower(os.Getenv("ENABLE_WEBHOOKS")) == "true"
-}
-
-// getPlanInProgress method returns current plan that is in progress. As long as we don't enforce webhooks
-// (ENABLE_WEBHOOKS=true) we have TWO WAYS of deciding that:
-//
-// 1. WITHOUT the webhook, we have the old logic which iterates over all existing PlanStatuses, searching for
-//    the first one with Status == InProgress. It might happen that multiple plans are in progress (e.g. cleanup and deploy)
-// 2. WITH the webhook, current plan lives in the Spec.PlanExecution.PlanName field
-func getPlanInProgress(i *v1beta1.Instance) *kudov1beta1.PlanStatus {
-	if areWebhooksEnabled() {
-		return i.PlanStatus(i.Spec.PlanExecution.PlanName)
-	}
-	return i.GetPlanInProgress()
-}
-
-// newExecutionPlan method returns a new execution plan (if exists) or nil otherwise. As long as we don't enforce webhooks
-// (ENABLE_WEBHOOKS=true) we have TWO WAYS of deciding which plan has to executed next:
-//
-// 1. WITHOUT the webhook, we have the old logic which tries to infer the change (parameter update, new OperatorVersion etc.)
-//    by diffing current state with the one from the snapshot.
-// 2. WITH the webhook, instance admission webhook has already decided on the plan and the result is in the
-//    Spec.PlanExecution.PlanName field.
+// newExecutionPlan method returns a new execution plan (if exists) or nil otherwise. Instance admission webhook has
+// already decided on the plan and the result is saved in the Spec.PlanExecution.PlanName field
 func newExecutionPlan(i *v1beta1.Instance, ov *v1beta1.OperatorVersion) (*string, error) {
 	var plan *string
 	var err error
-	if areWebhooksEnabled() {
-		if plan, err = fetchNewExecutionPlan(i, ov); plan != nil && err == nil {
-			log.Printf("InstanceController: Fetched new execution plan '%s' from the spec for instance %s/%s", convert.StringValue(plan), i.Namespace, i.Name)
-		}
-	} else {
-		if plan, err = inferNewExecutionPlan(i, ov); plan != nil && err == nil {
-			log.Printf("InstanceController: Inferred new execution plan '%s' from instance %s/%s state", convert.StringValue(plan), i.Namespace, i.Name)
-		}
+	if plan, err = fetchNewExecutionPlan(i, ov); plan != nil && err == nil {
+		log.Printf("InstanceController: Fetched new execution plan '%s' from the spec for instance %s/%s", convert.StringValue(plan), i.Namespace, i.Name)
 	}
 
 	return plan, err
@@ -628,85 +588,3 @@ func fetchNewExecutionPlan(i *v1beta1.Instance, ov *v1beta1.OperatorVersion) (*s
 
 	return nil, nil
 }
-
-// newPlanToBeExecuted method tries to infer a new execution plan by comparing current instance state
-// with the one saved in the snapshot. It returns:
-// - "planName", when there is a new plan that needs to be executed
-// - <nil>, no new plan found e.g. a plan is already in progress
-func inferNewExecutionPlan(i *v1beta1.Instance, ov *v1beta1.OperatorVersion) (*string, error) {
-	if i.IsDeleting() {
-		log.Printf("InstanceController: Instance %s/%s is being deleted", i.Namespace, i.Name)
-		// we have a cleanup plan
-		cleanupPlanName := v1beta1.CleanupPlanName
-		if kudov1beta1.PlanExists(cleanupPlanName, ov) {
-			if planStatus := i.PlanStatus(cleanupPlanName); planStatus != nil {
-				switch planStatus.Status {
-				case kudov1beta1.ExecutionNeverRun:
-					return &cleanupPlanName, nil
-				case kudov1beta1.ExecutionComplete, kudov1beta1.ExecutionFatalError:
-					return nil, nil // we already finished the cleanup plan or there is no point in retrying
-				}
-			}
-		}
-	}
-
-	if i.GetPlanInProgress() != nil { // we're already running some plan
-		return nil, nil
-	}
-
-	// new instance, need to run deploy plan
-	if i.NoPlanEverExecuted() {
-		return convert.StringPtr(v1beta1.DeployPlanName), nil
-	}
-
-	// did the instance change so that we need to run deploy/upgrade/update plan?
-	instanceSnapshot, err := i.SnapshotSpec()
-	if err != nil {
-		return nil, err
-	}
-	if instanceSnapshot == nil {
-		// we don't have snapshot -> we never run deploy, also we cannot run update/upgrade. This should never happen
-		return nil, &v1beta1.InstanceError{Err: fmt.Errorf("unexpected state: no plan is running, no snapshot present - this should never happen :) for instance %s/%s", i.Namespace, i.Name), EventName: convert.StringPtr("UnexpectedState")}
-	}
-	if instanceSnapshot.OperatorVersion.Name != i.Spec.OperatorVersion.Name {
-		// this instance was upgraded to newer version
-		log.Printf("Instance: instance %s/%s was upgraded from %s to %s operatorVersion", i.Namespace, i.Name, instanceSnapshot.OperatorVersion.Name, i.Spec.OperatorVersion.Name)
-		plan := kudov1beta1.SelectPlan([]string{v1beta1.UpgradePlanName, v1beta1.UpdatePlanName, v1beta1.DeployPlanName}, ov)
-		if plan == nil {
-			return nil, &v1beta1.InstanceError{Err: fmt.Errorf("supposed to execute plan because instance %s/%s was upgraded but none of the deploy, upgrade, update plans found in linked operatorVersion", i.Namespace, i.Name),
-				EventName: convert.StringPtr("PlanNotFound")}
-		}
-		return plan, nil
-	}
-	// did instance parameters change, so that the corresponding plan has to be triggered?
-	if !reflect.DeepEqual(instanceSnapshot.Parameters, i.Spec.Parameters) {
-		// instance updated
-		log.Printf("Instance: instance %s/%s has updated parameters from %v to %v", i.Namespace, i.Name, instanceSnapshot.Parameters, i.Spec.Parameters)
-		paramDiff := kudov1beta1.ParameterDiff(instanceSnapshot.Parameters, i.Spec.Parameters)
-		paramDefinitions := kudov1beta1.GetExistingParamDefinitions(paramDiff, ov)
-		plan, err := planNameFromParameters(paramDefinitions, ov)
-		if err != nil {
-			return nil, &v1beta1.InstanceError{Err: fmt.Errorf("supposed to execute plan because instance %s/%s was updated but no valid plan found: %v", i.Namespace, i.Name, err), EventName: convert.StringPtr("PlanNotFound")}
-		}
-		return plan, nil
-	}
-	return nil, nil
-}
-
-// planNameFromParameters determines what plan to run based on params that changed and the related trigger plans
-func planNameFromParameters(params []v1beta1.Parameter, ov *v1beta1.OperatorVersion) (*string, error) {
-	// TODO: if the params have different trigger plans, we always select first here which might not be ideal
-	for _, p := range params {
-		if p.Trigger != "" {
-			if kudov1beta1.SelectPlan([]string{p.Trigger}, ov) != nil {
-				return convert.StringPtr(p.Trigger), nil
-			}
-			return nil, fmt.Errorf("param %s defined trigger plan %s, but plan not defined in operatorversion", p.Name, p.Trigger)
-		}
-	}
-	plan := kudov1beta1.SelectPlan([]string{v1beta1.UpdatePlanName, v1beta1.DeployPlanName}, ov)
-	if plan == nil {
-		return nil, fmt.Errorf("no default plan defined in operatorversion")
-	}
-	return plan, nil
-}