Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
bin

# Test binary, build with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Kubernetes Generated files - skip generated files, except for vendored files

!vendor/**/zz_generated.*

# editor and IDE paraphernalia
.idea
*.swp
*.swo
*~
26 changes: 26 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Build the manager binary
FROM golang:1.13 as builder

WORKDIR /workspace
# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download

# Copy the go source
COPY main.go main.go
COPY ./ ./

# Build
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager main.go

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM gcr.io/distroless/static:nonroot
WORKDIR /
COPY --from=builder /workspace/manager .
USER nonroot:nonroot

ENTRYPOINT ["/manager"]
111 changes: 111 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Copyright 2020 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

.DEFAULT_GOAL:=help

REGISTRY ?= gcr.io/$(shell gcloud config get-value project)
IMAGE_NAME ?= executionhook-controller
TAG ?= dev
# Image URL to use all building/pushing image targets
CONTROLLER_IMAGE ?= $(REGISTRY)/$(IMAGE_NAME):$(TAG)
# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
CRD_OPTIONS ?= "crd:trivialVersions=true"

# Directories.
TOOLS_DIR := hack/tools
TOOLS_BIN_DIR := $(TOOLS_DIR)/bin

# Tool binaries.
KUSTOMIZE := $(TOOLS_BIN_DIR)/kustomize
CONTROLLER_GEN := $(TOOLS_BIN_DIR)/controller-gen

$(CONTROLLER_GEN): $(TOOLS_DIR)/go.mod
cd $(TOOLS_DIR); go build -tags=tools -o ./bin/controller-gen sigs.k8s.io/controller-tools/cmd/controller-gen

$(KUSTOMIZE): $(TOOLS_DIR)/go.mod
cd $(TOOLS_DIR); go build -tags=tools -o ./bin/kustomize sigs.k8s.io/kustomize/kustomize/v3

.PHONY: help
help: ## Display this help
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

.PHONY: clean-bin
clean-bin: ## Remove all generated binaries
rm -rf bin
rm -rf hack/tools/bin

.PHONY: all
all: manager

.PHONY: test
test: generate fmt vet manifests ## Run tests
go test -v ./... -coverprofile cover.out

.PHONY: manager
manager: generate fmt vet test ## Build manager binary
go build -o bin/manager main.go

.PHONY: install
install: manifests $(KUSTOMIZE) ## Install CRDs into a cluster in the current context at ~/.kube/config
$(KUSTOMIZE) build config/crd | kubectl apply -f -

.PHONY: uninstall
uninstall: manifests $(KUSTOMIZE) ## Uninstall latest version of CRDs from a cluster in the current context at ~/.kube/config
$(KUSTOMIZE) build config/crd | kubectl delete -f -

.PHONY: deploy
# hacky, works for now. TODO: ashish-amarnath make this better
deploy: manifests $(KUSTOMIZE) ## Deploy controller in the configured Kubernetes cluster in ~/.kube/config
cd config/manager && ../../$(KUSTOMIZE) edit set image controller=${CONTROLLER_IMAGE}
$(KUSTOMIZE) build config/default | kubectl apply -f -

.PHONY: manifests
manifests: $(CONTROLLER_GEN) ## Generate manifests e.g. CRD, RBAC etc.
$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./..." output:crd:artifacts:config=config/crd/bases

fmt: ## Run go fmt against code
go fmt ./...

vet: ## Run go vet against code
go vet ./...

.PHONY: modules
modules: ## Runs go mod to ensure modules are up-to-date.
go mod tidy
cd $(TOOLS_DIR); go mod tidy

.PHONY: verify-modules
verify-modules: modules
@if !(git diff --quiet HEAD -- go.sum go.mod hack/tools/go.mod hack/tools/go.sum); then \
echo "go module files are out of date"; exit 1; \
fi

generate: $(CONTROLLER_GEN) ## Generate code
$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths="./..."


.PHONY: verify-gen
verify-gen: generate manifests
@if !(git diff --quiet HEAD); then \
echo "generated code and manifest files are out of date, run make generate manifests"; exit 1; \
fi

.PHONY: docker-build
docker-build: test # Build the controller image
docker build . -t ${CONTROLLER_IMAGE}

.PHONY: docker-push
docker-push: docker-build # Push the controller image
docker push ${CONTROLLER_IMAGE}

10 changes: 10 additions & 0 deletions PROJECT
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
domain: k8s.io
repo: sigs.k8s.io/execution-hook
resources:
- group: apps
kind: ExecutionHook
version: v1alpha1
- group: apps
kind: HookAction
version: v1alpha1
version: "2"
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
This repo contains `ExecutionHook` CRDs for dynamically executing user’s commands in pods/containers
and an `ExecutionHookController` to manage the hook's lifecycle.

## Usage

Execution Hook uses the [Kubebuilder framework](https://github.com/kubernetes-sigs/kubebuilder).
See the [Kubebuilder documentation](https://book.kubebuilder.io/introduction.html) for building, running and testing the project.

## Community, discussion, contribution, and support

Learn how to engage with the Kubernetes community on the [community page](http://kubernetes.io/community/).
Expand Down
189 changes: 189 additions & 0 deletions api/v1alpha1/executionhook_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*
Copyright 2020 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
// ExecutionHookFinalizer allows ExecutionHook controller to clean up the custom resource before
// removing it from the apiserver.
ExecutionHookFinalizer = "executionhook.apps.x-k8s.io"
)

// +kubebuilder:object:root=true
// +kubebuilder:resource:path=executionhook,shortName=eh,scope=Namespaced,categories=executionhook
// +kubebuilder:subresource:status

// ExecutionHook is the Schema for the executionhook API
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Talk about guarantees of execution (at-least-once vs exactly-once) and retries, timeouts, idempotency, etc?

type ExecutionHook struct {
metav1.TypeMeta `json:",inline"`
// Metadata associated with persisted resources
metav1.ObjectMeta `json:"metadata,omitempty"`

// ExecutionHookSpec defines the desired state of ExecutionHook
Spec ExecutionHookSpec `json:"spec,omitempty"`
// ExecutionHookStatus defines the observed state of ExecutionHook
// +optional
Status *ExecutionHookStatus `json:"status,omitempty"`
}

// +kubebuilder:object:root=true

// ExecutionHookList contains a list of ExecutionHook
type ExecutionHookList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []ExecutionHook `json:"items"`
}

// ExecutionHookSpec defines the desired state of ExecutionHook
// HookActionName is copied to ExecutionHookSpec by the controller such as
// the Application Snapshot Controller.
type ExecutionHookSpec struct {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be called "PodAction" or maybe just "Action" or "Notification"

// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably want to remove meta-comments?

// Important: Run "make" to regenerate code after modifying this file

// PodSelection defines how to select pods and containers to run
// the executionhook. If multiple pod/containers are selected, the action will executed on them
// asynchronously. If execution ordering is required, caller has to implement the logic and create
// different hooks in order.
// This field is required.
PodSelection PodSelection `json:"podSelection"`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I disagree with this structure. I said something about this in the KEP. Here's my thinking.

Ideally (IMO) HookAction eventually becomes a field inside each Pod.spec.containers[] - it defines what to do when that action is triggered. Likewise, this type (ExecutionHook) should be called PodAction or similar. It triggers an action on a set of Pods. It should not be spelling out which container because the action definition itself will be attached to a container.

If/when we carry this into Beta, you'll have to change all of this to a simple pod selector. Better to change it now. This can simply be selector and be a normal pod selector. I'm not convinced we need specific pod names?

Once this is integrated to Pod, triggering an action "foo" can mean "all action specs named foo on any containers in the pod"

Copy link

@xing-yang xing-yang Jan 28, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We did have this proposed as part of the LifeCycle struct inside a container as described in alternative option 1a and 1b in the KEP. That was actually the main proposal when the KEP was initially submitted. https://github.com/kubernetes/enhancements/blob/master/keps/sig-storage/20190120-execution-hook-design.md#alternative-option-1a

We also talked about the pros and cons of having kubelet or an external controller take care of these hooks:
https://github.com/kubernetes/enhancements/blob/master/keps/sig-storage/20190120-execution-hook-design.md#controller-handlings-for-option-1a-and-1b

If we decided to go with alternative 1a/1b or some variation of those, then I don't think we need this execution-hook repo as the handling of the hooks will be either in kubelet and/or in the application snapshot controller (when that's available).

After numerous discussions, however, the decision was to start with an alpha implementation with a hook CRD and an external controller to manage the hooks. This allows us to explore other features such as application snapshot which will use this hook for quiescing. We have stated in the KEP that we'll look at doing it in kubelet before moving to beta.
https://github.com/kubernetes/enhancements/blob/master/keps/sig-storage/20190120-execution-hook-design.md#risks-and-mitigations
"The security concern is that ExecutionHook controller has the authority to execute commands in any pods. For alpha and proof of concept, we propose to use external controller to handle executionhooks. But to move to beta and graduate as GA, we will evaluate it and move it to kubelet which already has the privilege to execute commands in pod/containers."


// Name of the HookAction. This is required.
ActionName string `json:"actionName"`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to control how fast this happens across a set of pods? E.g. I have a deployment and I want this to wait 10 seconds between each pod?

}

// PodSelection contains two fields, PodContainerNamesList and PodContainerSelector,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This basically copies the type contents into a comment. Don't list the fields here. Just say that this struct represents a one-of, and define the defaults.

One-of types really want a discriminator field, e.g. "type=Selector"

// where exactly one of them must be defined so that the hook controller knows where to
// run the hook.
// TODO: Add validation to ensure exactly one of them are defined.
type PodSelection struct {
// PodContainerNamesList lists the pods/containers on which the ExecutionHook
// should be executed. If not specified, the ExecutionHook controller will find
// all pods and containers based on PodContainerSelector.
// If both PodContainerNamesList and PodContainerSelector are not
// specified, the ExecutionHook cannot be executed and it will fail.
// +optional
PodContainerNamesList []PodContainerNames `json:"podContainerNamesList,omitempty"`

// PodContainerSelector is for hook controller to find pods and containers
// based on the pod label selector and container names
// If PodContainerNamesList is specified, this field will not be used.
// +optional
PodContainerSelector *PodContainerSelector `json:"podContainerSelector,omitempty"`
}

// PodContainerNames lists the containers the ExecutionHook should be executed on in a Pod.
type PodContainerNames struct {
// This field is required
PodName string `json:"podName"`

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't make a lot of sense to me, if your selector identifies e.g. all pods in a replica set, this still makes you specify each one by name (since the name has a random suffix).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only one of "PodContainerNames" and "PodContainerSelector" will be specified but not both. So if the selector identifies all pods in the PodContainerSelector, this PodContainerNames won't be used. We need to add validation to make sure only one is specified.


// +optional
// If empty, hook action will be executed on all containers in the pod.
ContainerNames []string `json:"containerNames,omitempty"`
}

// PodContainerSelector defines the selector and containers the ExecutionHook
// should be executed on.
type PodContainerSelector struct {
// PodSelector specifies a label query over a set of pods.
// If not specified, all pods in the namespace will be slected to look for containers
// specified in the ContainerList.
// +optional
PodSelector *metav1.LabelSelector `json:"podSelector,omitempty"`

// If specified, controller will only select these containers from the selected pods based on PodSelector.
// Otherwise, all containers of the selected pods will be chosen for hook execution.
// +optional
ContainerList []string `json:"containerList,omitempty"`
}

// ExecutionHookStatus defines the observed state of ExecutionHook
type ExecutionHookStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file

// This is a list of ContainerExecutionHookStatus, with each status representing
// information about how hook is executed in a container, including pod name,
// container name, Timestamp, Succeed, etc.
// +optional
HookStatuses []ContainerExecutionHookStatus `json:"hookStatuses,omitempty"`
}

// ContainerExecutionHookStatus represents the current state of a hook for a specific
// container in a pod
type ContainerExecutionHookStatus struct {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you talk about lifecycle?

You suggest we could have one of these before the action was started - who would create that? Why wouldn't you create this struct after triggering, with that field set? Either way it is a race and you have to retrigger the action when in doubt (e.g. to get exec results)

Case     A                             B                                                                                                     
* Create with no timestamp    * Create with no timestamp
* Trigger the action          * Crash, restart
* Crash, restart              * Observe no timestamp
* Observe no timestamp        * Trigger the action
* Trigger the action again

vs

Case     A                             B
* Create with timestamp       * Create with timestamp
* Trigger the action          * Crash, restart
* Crash, restart              * Observe timestamp
* Observe timestamp           * Trigger the action
* Trigger the action again

vs

Case     A                             B
* Trigger the action          * Trigger the action
* Create with timestamp       * Crash, restart
* Crash, restart              * Trigger the action again
* Observe timestamp           * Create with timestamp                                                                                        
* Trigger the action again

// This field is required
PodName string `json:"podName"`

// This field is required
ContainerName string `json:"containerName"`

// If not set, it is nil, indicating Action has not started
// If set, it means Action has started at the specified time
// +optional
Timestamp *metav1.Time `json:"timestamp,omitempty"`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this actually mean? What guarantees can I infer from it?

What happens if I retry the action? Under what circumstances would a retry happen?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to know how long an action took from trigger to completion? I think we would..


// Succeed is set to true when the action is executed in the container successfully.
// It will be set to false if the action cannot be executed successfully after
// ActionTimeoutSeconds passes.
// +optional
Succeed *bool `json:"succeed,omitempty"`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

success


// The last error encountered when executing the action. The hook controller might
// update this field each time it retries the execution.
// +optional
Error *HookError `json:"error,omitempty"`
}

// HookError describes the error occurred from hook execution.
type HookError struct {
// Type of the error
// This is required
ErrorType ErrorType `json:"errorType"`

// Error message
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the result from the action (e.g. exec stderr)? Or is it fixed from the controller?

// +optional
Message *string `json:"message,omitempty"`

// More detailed reason why error happens
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If any of this comes from user pod (e.g. stderr) it should be size-bounded and specified.

// +optional
Reason *string `json:"reason,omitempty"`

// It indicates when the error occurred
// +optional
Timestamp *metav1.Time `json:"timestamp,omitempty"`
}

// ErrorType defines the type of error occurred from hook execution.
type ErrorType string

// More error types could be added, e.g., Forbidden, Unauthorized, AlreadyInProgress, etc.
const (
// The execution hook times out
Timeout ErrorType = "Timeout"

// The execution hook fails with an error
Error ErrorType = "Error"
)

func init() {
SchemeBuilder.Register(&ExecutionHook{}, &ExecutionHookList{})
}
Loading