Skip to content

Commit

Permalink
Removed --webhook option (#1497)
Browse files Browse the repository at this point in the history
Summary:
Now that KUDO moves towards a better support of multiple plans (e.g. `kudoctl plan trigger` command), the existing instance admission webhook becomes necessary to guarantee the plan execution consistency. More on KUDO [admission controller](https://kudo.dev/docs/developing-operators/plans.html#admission-controllers) in the documentation.

This PR removes the `--webhook` option and thus makes the instance admission webhook required. This is a breaking change since the users will have to either have [cert-manager](https://cert-manager.io/) installed or use the `--unsafe-self-signed-webhook-ca` option when initializing KUDO. For existing installations, one would need to run [kudo init](https://kudo.dev/docs/cli.html#examples) to create missing secret/webhook configuration.

Signed-off-by: Aleksey Dukhovniy <alex.dukhovniy@googlemail.com>
  • Loading branch information
Aleksey Dukhovniy committed May 19, 2020
1 parent ec540a1 commit a7b98bf
Show file tree
Hide file tree
Showing 49 changed files with 458 additions and 497 deletions.
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ BUILD_DATE_PATH := github.com/kudobuilder/kudo/pkg/version.buildDate
DATE_FMT := "%Y-%m-%dT%H:%M:%SZ"
BUILD_DATE := $(shell date -u -d "@$SOURCE_DATE_EPOCH" "+${DATE_FMT}" 2>/dev/null || date -u -r "${SOURCE_DATE_EPOCH}" "+${DATE_FMT}" 2>/dev/null || date -u "+${DATE_FMT}")
LDFLAGS := -X ${GIT_VERSION_PATH}=${GIT_VERSION} -X ${GIT_COMMIT_PATH}=${GIT_COMMIT} -X ${BUILD_DATE_PATH}=${BUILD_DATE}
ENABLE_WEBHOOKS ?= false
GOLANGCI_LINT_VER = "1.23.8"
SUPPORTED_PLATFORMS = amd64 arm64

Expand Down Expand Up @@ -82,7 +81,7 @@ manager-clean:
run:
# for local development, webhooks are disabled by default
# if you enable them, you have to take care of providing the TLS certs locally
ENABLE_WEBHOOKS=${ENABLE_WEBHOOKS} go run -ldflags "${LDFLAGS}" ./cmd/manager
go run -ldflags "${LDFLAGS}" ./cmd/manager

.PHONY: deploy
# Install KUDO into a cluster via kubectl kudo init
Expand Down
37 changes: 22 additions & 15 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"

Expand Down Expand Up @@ -56,6 +57,14 @@ func parseSyncPeriod() (*time.Duration, error) {
return nil, nil
}

func getEnv(key, def string) string {
val, ok := os.LookupEnv(key)
if !ok {
val = def
}
return val
}

func main() {
// Get version of KUDO
log.Printf("KUDO Version: %#v", version.Get())
Expand All @@ -74,7 +83,7 @@ func main() {
}

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
CertDir: "/tmp/cert",
CertDir: getEnv("KUDO_CERT_DIR", filepath.Join("/tmp", "cert")),
SyncPeriod: syncPeriod,
})
if err != nil {
Expand Down Expand Up @@ -132,23 +141,21 @@ func main() {
}
log.Print("Instance controller set up")

if strings.ToLower(os.Getenv("ENABLE_WEBHOOKS")) == "true" {
log.Printf("Setting up webhooks")

iac, err := kudohook.NewInstanceAdmission(mgr.GetConfig(), mgr.GetScheme())
if err != nil {
log.Printf("Unable to create an uncached client for the webhook: %v", err)
os.Exit(1)
}
log.Printf("Setting up admission webhook")

if err := registerWebhook("/admit", &v1beta1.Instance{}, &webhook.Admission{Handler: iac}, mgr); err != nil {
log.Printf("Unable to create instance admission webhook: %v", err)
os.Exit(1)
}
log.Printf("Instance admission webhook set up")
iac, err := kudohook.NewInstanceAdmission(mgr.GetConfig(), mgr.GetScheme())
if err != nil {
log.Printf("Unable to create an uncached client for the webhook: %v", err)
os.Exit(1)
}

// Add more webhooks below using the above registerWebhook method
if err := registerWebhook("/admit", &v1beta1.Instance{}, &webhook.Admission{Handler: iac}, mgr); err != nil {
log.Printf("Unable to create instance admission webhook: %v", err)
os.Exit(1)
}
log.Printf("Instance admission webhook set up")

// Add more webhooks below using the above registerWebhook method

// Start the KUDO manager
log.Print("Done! Everything is setup, starting KUDO manager now")
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ require (
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
golang.org/x/sys v0.0.0-20200408040146-ea54a3c99b9b // indirect
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect
gopkg.in/yaml.v2 v2.2.8
gotest.tools v2.2.0+incompatible
k8s.io/api v0.17.3
Expand Down
6 changes: 4 additions & 2 deletions hack/run-e2e-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ then
| go-junit-report -set-exit-code \
> reports/kudo_e2e_test_report.xml

# Operators tests
rm -rf operators
git clone https://github.com/kudobuilder/operators
mkdir operators/bin/
cp ./bin/kubectl-kudo operators/bin/
cp ./bin/manager operators/bin/
sed "s/%version%/$KUDO_VERSION/" operators/kudo-test.yaml.tmpl > operators/kudo-test.yaml
cd operators && ./bin/kubectl-kudo test --artifacts-dir /tmp/kudo-e2e-test 2>&1 \
| tee /dev/fd/2 \
| go-junit-report -set-exit-code \
Expand All @@ -39,10 +40,11 @@ else

./bin/kubectl-kudo test --config kudo-e2e-test.yaml

# Operators tests
rm -rf operators
git clone https://github.com/kudobuilder/operators
mkdir operators/bin/
cp ./bin/kubectl-kudo operators/bin/
cp ./bin/manager operators/bin/
sed "s/%version%/$KUDO_VERSION/" operators/kudo-test.yaml.tmpl > operators/kudo-test.yaml
cd operators && ./bin/kubectl-kudo test --artifacts-dir /tmp/kudo-e2e-test
fi
5 changes: 1 addition & 4 deletions kudo-e2e-test.yaml.tmpl
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
apiVersion: kudo.dev/v1beta1
kind: TestSuite
crdDir: ./config/crds/
manifestDirs:
- ./test/manifests/
testDirs:
- ./test/e2e
commands:
- command: ./bin/kubectl-kudo init --webhook InstanceValidation --unsafe-self-signed-webhook-ca --kudo-image kudobuilder/controller:%version% --kudo-image-pull-policy Never
- command: ./bin/kubectl-kudo init --unsafe-self-signed-webhook-ca --kudo-image kudobuilder/controller:%version% --kudo-image-pull-policy Never
startKIND: true
kindContainers:
- kudobuilder/controller:%version%
Expand Down
8 changes: 6 additions & 2 deletions kudo-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ manifestDirs:
testDirs:
- ./test/integration
commands:
- command: ./bin/manager
- command: |-
sh -c '
KUDO_CERT_DIR="./test/cert" ./bin/manager
'
background: true
- command: sleep 5
startControlPlane: true
parallel: 4
parallel: 4
13 changes: 13 additions & 0 deletions pkg/apis/kudo/v1beta1/instance_types_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,19 @@ const (
snapshotAnnotation = "kudo.dev/last-applied-instance-state"
)

// ScheduledPlan returns plan status of currently active plan or nil if no plan is running. In most cases this method
// will return the same plan status as the [GetPlanInProgress](pkg/apis/kudo/v1beta1/instance_types_helpers.go:25) below.
// However, there is a small window where both might return different results:
// 1. GetScheduledPlan reads the plan from i.Spec.PlanExecution.PlanName which is set and reset by the instance admission
// webhook
// 2. GetPlanInProgress goes through i.Spec.PlanStatus map and returns the first found plan that is running
//
// (1) is set directly when the user updates the instance and reset **after** the plan is terminal
// (2) is updated **after** each time the instance controller executes the plan
func (i *Instance) GetScheduledPlan() *PlanStatus {
return i.PlanStatus(i.Spec.PlanExecution.PlanName)
}

// GetPlanInProgress returns plan status of currently active plan or nil if no plan is running
func (i *Instance) GetPlanInProgress() *PlanStatus {
for _, p := range i.Status.PlanStatus {
Expand Down
144 changes: 11 additions & 133 deletions pkg/controller/instance/instance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ import (
"errors"
"fmt"
"log"
"os"
"reflect"
"strings"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -168,7 +166,7 @@ func isForPipePod(e event.DeleteEvent) bool {
func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
// ---------- 1. Query the current state ----------

log.Printf("InstanceController: Received Reconcile request for instance \"%+v\"", request.Name)
log.Printf("InstanceController: Received Reconcile request for instance %s", request.NamespacedName)
instance, err := r.getInstance(request)
if err != nil {
if apierrors.IsNotFound(err) { // not retrying if instance not found, probably someone manually removed it?
Expand All @@ -188,17 +186,7 @@ func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
return reconcile.Result{}, err // OV not found has to be retried because it can really have been created after Instance
}

// ---------- 2. Try to add a finalizer (effectively happens only once after creation) ----------

if !instance.IsDeleting() {
if kudov1beta1.CleanupPlanExists(ov) {
if instance.TryAddFinalizer() {
log.Printf("InstanceController: Adding finalizer on instance %s/%s", instance.Namespace, instance.Name)
}
}
}

// ---------- 3. Check if we should start execution of new plan ----------
// ---------- 2. Check if we should start execution of new plan ----------

newExecutionPlan, err := newExecutionPlan(instance, ov)
if err != nil {
Expand All @@ -214,11 +202,11 @@ func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
r.Recorder.Event(instance, "Normal", "PlanStarted", fmt.Sprintf("Execution of plan %s started", convert.StringValue(newExecutionPlan)))
}

// ---------- 4. If there's currently active plan, continue with the execution ----------
// ---------- 3. If there's currently active plan, continue with the execution ----------

activePlanStatus := getPlanInProgress(instance)
activePlanStatus := instance.GetScheduledPlan()
if activePlanStatus == nil { // we have no plan in progress
log.Printf("InstanceController: Nothing to do, no plan in progress for instance %s/%s", instance.Namespace, instance.Name)
log.Printf("InstanceController: Nothing to do, no plan scheduled for instance %s/%s", instance.Namespace, instance.Name)
return reconcile.Result{}, nil
}

Expand All @@ -237,10 +225,10 @@ func (r *Reconciler) Reconcile(request ctrl.Request) (ctrl.Result, error) {
err = r.handleError(err, instance, oldInstance)
return reconcile.Result{}, err
}
log.Printf("InstanceController: Going to proceed in execution of active plan '%s' on instance %s/%s", activePlan.Name, instance.Namespace, instance.Name)
log.Printf("InstanceController: Going to proceed with execution of the scheduled plan '%s' on instance %s/%s", activePlan.Name, instance.Namespace, instance.Name)
newStatus, err := workflow.Execute(activePlan, metadata, r.Client, r.Discovery, r.Config, r.Scheme)

// ---------- 5. Update instance and its status after the execution proceeded ----------
// ---------- 4. Update instance and its status after the execution proceeded ----------

if newStatus != nil {
instance.UpdateInstanceStatus(newStatus, &metav1.Time{Time: time.Now()})
Expand Down Expand Up @@ -536,41 +524,13 @@ func isUpgradePlan(planName string) bool {
return planName == v1beta1.DeployPlanName || planName == v1beta1.UpgradePlanName
}

func areWebhooksEnabled() bool {
return strings.ToLower(os.Getenv("ENABLE_WEBHOOKS")) == "true"
}

// getPlanInProgress method returns current plan that is in progress. As long as we don't enforce webhooks
// (ENABLE_WEBHOOKS=true) we have TWO WAYS of deciding that:
//
// 1. WITHOUT the webhook, we have the old logic which iterates over all existing PlanStatuses, searching for
// the first one with Status == InProgress. It might happen that multiple plans are in progress (e.g. cleanup and deploy)
// 2. WITH the webhook, current plan lives in the Spec.PlanExecution.PlanName field
func getPlanInProgress(i *v1beta1.Instance) *kudov1beta1.PlanStatus {
if areWebhooksEnabled() {
return i.PlanStatus(i.Spec.PlanExecution.PlanName)
}
return i.GetPlanInProgress()
}

// newExecutionPlan method returns a new execution plan (if exists) or nil otherwise. As long as we don't enforce webhooks
// (ENABLE_WEBHOOKS=true) we have TWO WAYS of deciding which plan has to executed next:
//
// 1. WITHOUT the webhook, we have the old logic which tries to infer the change (parameter update, new OperatorVersion etc.)
// by diffing current state with the one from the snapshot.
// 2. WITH the webhook, instance admission webhook has already decided on the plan and the result is in the
// Spec.PlanExecution.PlanName field.
// newExecutionPlan method returns a new execution plan (if exists) or nil otherwise. Instance admission webhook has
// already decided on the plan and the result is saved in the Spec.PlanExecution.PlanName field
func newExecutionPlan(i *v1beta1.Instance, ov *v1beta1.OperatorVersion) (*string, error) {
var plan *string
var err error
if areWebhooksEnabled() {
if plan, err = fetchNewExecutionPlan(i, ov); plan != nil && err == nil {
log.Printf("InstanceController: Fetched new execution plan '%s' from the spec for instance %s/%s", convert.StringValue(plan), i.Namespace, i.Name)
}
} else {
if plan, err = inferNewExecutionPlan(i, ov); plan != nil && err == nil {
log.Printf("InstanceController: Inferred new execution plan '%s' from instance %s/%s state", convert.StringValue(plan), i.Namespace, i.Name)
}
if plan, err = fetchNewExecutionPlan(i, ov); plan != nil && err == nil {
log.Printf("InstanceController: Fetched new execution plan '%s' from the spec for instance %s/%s", convert.StringValue(plan), i.Namespace, i.Name)
}

return plan, err
Expand Down Expand Up @@ -628,85 +588,3 @@ func fetchNewExecutionPlan(i *v1beta1.Instance, ov *v1beta1.OperatorVersion) (*s

return nil, nil
}

// newPlanToBeExecuted method tries to infer a new execution plan by comparing current instance state
// with the one saved in the snapshot. It returns:
// - "planName", when there is a new plan that needs to be executed
// - <nil>, no new plan found e.g. a plan is already in progress
func inferNewExecutionPlan(i *v1beta1.Instance, ov *v1beta1.OperatorVersion) (*string, error) {
if i.IsDeleting() {
log.Printf("InstanceController: Instance %s/%s is being deleted", i.Namespace, i.Name)
// we have a cleanup plan
cleanupPlanName := v1beta1.CleanupPlanName
if kudov1beta1.PlanExists(cleanupPlanName, ov) {
if planStatus := i.PlanStatus(cleanupPlanName); planStatus != nil {
switch planStatus.Status {
case kudov1beta1.ExecutionNeverRun:
return &cleanupPlanName, nil
case kudov1beta1.ExecutionComplete, kudov1beta1.ExecutionFatalError:
return nil, nil // we already finished the cleanup plan or there is no point in retrying
}
}
}
}

if i.GetPlanInProgress() != nil { // we're already running some plan
return nil, nil
}

// new instance, need to run deploy plan
if i.NoPlanEverExecuted() {
return convert.StringPtr(v1beta1.DeployPlanName), nil
}

// did the instance change so that we need to run deploy/upgrade/update plan?
instanceSnapshot, err := i.SnapshotSpec()
if err != nil {
return nil, err
}
if instanceSnapshot == nil {
// we don't have snapshot -> we never run deploy, also we cannot run update/upgrade. This should never happen
return nil, &v1beta1.InstanceError{Err: fmt.Errorf("unexpected state: no plan is running, no snapshot present - this should never happen :) for instance %s/%s", i.Namespace, i.Name), EventName: convert.StringPtr("UnexpectedState")}
}
if instanceSnapshot.OperatorVersion.Name != i.Spec.OperatorVersion.Name {
// this instance was upgraded to newer version
log.Printf("Instance: instance %s/%s was upgraded from %s to %s operatorVersion", i.Namespace, i.Name, instanceSnapshot.OperatorVersion.Name, i.Spec.OperatorVersion.Name)
plan := kudov1beta1.SelectPlan([]string{v1beta1.UpgradePlanName, v1beta1.UpdatePlanName, v1beta1.DeployPlanName}, ov)
if plan == nil {
return nil, &v1beta1.InstanceError{Err: fmt.Errorf("supposed to execute plan because instance %s/%s was upgraded but none of the deploy, upgrade, update plans found in linked operatorVersion", i.Namespace, i.Name),
EventName: convert.StringPtr("PlanNotFound")}
}
return plan, nil
}
// did instance parameters change, so that the corresponding plan has to be triggered?
if !reflect.DeepEqual(instanceSnapshot.Parameters, i.Spec.Parameters) {
// instance updated
log.Printf("Instance: instance %s/%s has updated parameters from %v to %v", i.Namespace, i.Name, instanceSnapshot.Parameters, i.Spec.Parameters)
paramDiff := kudov1beta1.ParameterDiff(instanceSnapshot.Parameters, i.Spec.Parameters)
paramDefinitions := kudov1beta1.GetExistingParamDefinitions(paramDiff, ov)
plan, err := planNameFromParameters(paramDefinitions, ov)
if err != nil {
return nil, &v1beta1.InstanceError{Err: fmt.Errorf("supposed to execute plan because instance %s/%s was updated but no valid plan found: %v", i.Namespace, i.Name, err), EventName: convert.StringPtr("PlanNotFound")}
}
return plan, nil
}
return nil, nil
}

// planNameFromParameters determines what plan to run based on params that changed and the related trigger plans
func planNameFromParameters(params []v1beta1.Parameter, ov *v1beta1.OperatorVersion) (*string, error) {
// TODO: if the params have different trigger plans, we always select first here which might not be ideal
for _, p := range params {
if p.Trigger != "" {
if kudov1beta1.SelectPlan([]string{p.Trigger}, ov) != nil {
return convert.StringPtr(p.Trigger), nil
}
return nil, fmt.Errorf("param %s defined trigger plan %s, but plan not defined in operatorversion", p.Name, p.Trigger)
}
}
plan := kudov1beta1.SelectPlan([]string{v1beta1.UpdatePlanName, v1beta1.DeployPlanName}, ov)
if plan == nil {
return nil, fmt.Errorf("no default plan defined in operatorversion")
}
return plan, nil
}
Loading

0 comments on commit a7b98bf

Please sign in to comment.