Skip to content

Commit

Permalink
Merge pull request #809 from gabemontero/cancel-buildrun
Browse files Browse the repository at this point in the history
add cancel buildrun support
  • Loading branch information
openshift-merge-robot committed Jul 15, 2021
2 parents 8b6046f + a84a08d commit 5d8fb41
Show file tree
Hide file tree
Showing 12 changed files with 434 additions and 27 deletions.
2 changes: 1 addition & 1 deletion deploy/200-role.yaml
Expand Up @@ -61,7 +61,7 @@ rules:
resources: ['taskruns']
# BuildRuns are set as the owners of Tekton TaskRuns.
# With the OwnerReferencesPermissionEnforcement admission controller enabled, controllers need the "delete" permission on objects that they set owner references on.
verbs: ['get', 'list', 'watch', 'create', 'delete']
verbs: ['get', 'list', 'watch', 'create', 'delete', 'patch']

- apiGroups: ['']
resources: ['pods']
Expand Down
3 changes: 3 additions & 0 deletions deploy/crds/shipwright.io_buildruns.yaml
Expand Up @@ -104,6 +104,9 @@ spec:
description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names'
type: string
type: object
state:
description: State is used for canceling a buildrun (and maybe more later on).
type: string
timeout:
description: Timeout defines the maximum run time of this BuildRun.
format: duration
Expand Down
22 changes: 22 additions & 0 deletions docs/buildrun.md
Expand Up @@ -12,6 +12,7 @@ SPDX-License-Identifier: Apache-2.0
- [Defining the BuildRef](#defining-the-buildref)
- [Defining paramValues](#defining-paramvalues)
- [Defining the ServiceAccount](#defining-the-serviceaccount)
- [Canceling a `BuildRun`](#canceling-a-buildrun)
- [BuildRun Status](#buildrun-status)
- [Understanding the state of a BuildRun](#understanding-the-state-of-a-BuildRun)
- [Understanding failed BuildRuns](#understanding-failed-buildruns)
Expand Down Expand Up @@ -132,6 +133,24 @@ You can also use set the `spec.serviceAccount.generate` path to `true`. This wil

_**Note**_: When the SA is not defined, the `BuildRun` will default to the `default` SA in the namespace.

## Canceling a `BuildRun`

To cancel a `BuildRun` that's currently executing, update its status to mark it as canceled.

When you cancel a `BuildRun`, the underlying `TaskRun` is marked as canceled per the [Tekton cancel `TaskRun` feature](https://github.com/tektoncd/pipeline/blob/main/docs/taskruns.md).

Example of canceling a `BuildRun`:

```yaml
apiVersion: shipwright.io/v1alpha1
kind: BuildRun
metadata:
name: buildpack-nodejs-buildrun-namespaced
spec:
# [...]
state: "BuildRunCanceled"
```

## BuildRun Status

The `BuildRun` resource is updated as soon as the current image building status changes:
Expand Down Expand Up @@ -167,7 +186,9 @@ The following table illustrates the different states a BuildRun can have under i
| Status | Reason | CompletionTime is set | Description |
| --- | --- | --- | --- |
| Unknown | Pending | No | The BuildRun is waiting on a Pod in status Pending. |
| Unknown | Running | No | The BuildRun has been validate and started to perform its work. |l
| Unknown | Running | No | The BuildRun has been validate and started to perform its work. |
| Unknown | BuildRunCanceled | No | The user requested the BuildRun to be canceled. This results in the BuildRun controller requesting the TaskRun be canceled. Cancellation has not been done yet. |
| True | Succeeded | Yes | The BuildRun Pod is done. |
| False | Failed | Yes | The BuildRun failed in one of the steps. |
| False | BuildRunTimeout | Yes | The BuildRun timed out. |
Expand All @@ -180,6 +201,7 @@ The following table illustrates the different states a BuildRun can have under i
| False | ServiceAccountNotFound | Yes | The referenced service account was not found in the cluster. |
| False | BuildRegistrationFailed | Yes | The related Build in the BuildRun is on a Failed state. |
| False | BuildNotFound | Yes | The related Build in the BuildRun was not found. |
| False | BuildRunCanceled | Yes | The BuildRun and underlying TaskRun were canceled successfully. |

_Note_: We heavily rely on the Tekton TaskRun [Conditions](https://github.com/tektoncd/pipeline/blob/main/docs/taskruns.md#monitoring-execution-status) for populating the BuildRun ones, with some exceptions.

Expand Down
35 changes: 35 additions & 0 deletions pkg/apis/build/v1alpha1/buildrun_types.go
Expand Up @@ -45,8 +45,21 @@ type BuildRunSpec struct {
// image would be pushed to. It will overwrite the output image in build spec
// +optional
Output *Image `json:"output,omitempty"`

// State is used for canceling a buildrun (and maybe more later on).
// +optional
State BuildRunRequestedState `json:"state,omitempty"`
}

// BuildRunRequestedState defines the buildrun state the user can provide to override whatever is the current state.
type BuildRunRequestedState string

const (
// BuildRunStateCancel indicates that the user wants to cancel the BuildRun,
// if not already canceled or terminated
BuildRunStateCancel = "BuildRunCanceled"
)

// BuildRunStatus defines the observed state of BuildRun
type BuildRunStatus struct {
// Conditions holds the latest available observations of a resource's current state.
Expand Down Expand Up @@ -128,6 +141,28 @@ type BuildRunList struct {
Items []BuildRun `json:"items"`
}

// IsDone returns true if the BuildRun's status indicates that it is done.
func (br *BuildRun) IsDone() bool {
c := br.Status.GetCondition(Succeeded)
return c != nil && c.GetStatus() != corev1.ConditionUnknown
}

// HasStarted returns true if the BuildRun has a valid start time set in its status.
func (br *BuildRun) HasStarted() bool {
return br.Status.StartTime != nil && !br.Status.StartTime.IsZero()
}

// IsSuccessful returns true if the BuildRun's status indicates that it is done.
func (br *BuildRun) IsSuccessful() bool {
c := br.Status.GetCondition(Succeeded)
return c != nil && c.GetStatus() == corev1.ConditionTrue
}

// IsCanceled returns true if the BuildRun's spec status is set to BuildRunCanceled state.
func (br *BuildRun) IsCanceled() bool {
return br.Spec.State == BuildRunStateCancel
}

// Conditions defines a list of Condition
type Conditions []Condition

Expand Down
99 changes: 79 additions & 20 deletions pkg/reconciler/buildrun/buildrun.go
Expand Up @@ -6,6 +6,7 @@ package buildrun

import (
"context"
"encoding/json"
"fmt"
"regexp"
"strconv"
Expand All @@ -15,6 +16,7 @@ import (

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -74,24 +76,37 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu

ctxlog.Debug(ctx, "starting reconciling request from a BuildRun or TaskRun event", namespace, request.Namespace, name, request.Name)

// with build run cancel, it is now possible for a build run update to stem from something other than a task run update,
// so we can no longer assume that a build run event will not come in after the build run has a task run ref in its status
buildRun = &buildv1alpha1.BuildRun{}
getBuildRunErr := r.GetBuildRunObject(ctx, request.Name, request.Namespace, buildRun)
lastTaskRun := &v1beta1.TaskRun{}
getTaskRunErr := r.client.Get(ctx, types.NamespacedName{Name: request.Name, Namespace: request.Namespace}, lastTaskRun)

if getBuildRunErr != nil && getTaskRunErr != nil {
if !apierrors.IsNotFound(getBuildRunErr) {
return reconcile.Result{}, getBuildRunErr
}
if !apierrors.IsNotFound(getTaskRunErr) {
return reconcile.Result{}, getTaskRunErr
}
// If the BuildRun and TaskRun are not found, it might mean that we are running a Reconcile after a TaskRun was deleted. If this is the case, we need
// to identify from the request the BuildRun name associate to it and update the BuildRun Status.
r.VerifyRequestName(ctx, request, buildRun)
return reconcile.Result{}, nil
}

// if this is a build run event after we've set the task run ref, get the task run using the task run name stored in the build run
if getBuildRunErr == nil && apierrors.IsNotFound(getTaskRunErr) && buildRun.Status.LatestTaskRunRef != nil {
getTaskRunErr = r.client.Get(ctx, types.NamespacedName{Name: *buildRun.Status.LatestTaskRunRef, Namespace: request.Namespace}, lastTaskRun)
}

// for existing TaskRuns update the BuildRun Status, if there is no TaskRun, then create one
if err := r.client.Get(ctx, types.NamespacedName{Name: request.Name, Namespace: request.Namespace}, lastTaskRun); err != nil {
if apierrors.IsNotFound(err) {
err = r.GetBuildRunObject(ctx, request.Name, request.Namespace, buildRun)
if err != nil && !apierrors.IsNotFound(err) {
return reconcile.Result{}, err
} else if apierrors.IsNotFound(err) {
// If the BuildRun and TaskRun are not found, it might mean that we are running a Reconcile after a TaskRun was deleted. If this is the case, we need
// to identify from the request the BuildRun name associate to it and update the BuildRun Status.
r.VerifyRequestName(ctx, request, buildRun)
return reconcile.Result{}, nil
}
if getTaskRunErr != nil {
if apierrors.IsNotFound(getTaskRunErr) {

build = &buildv1alpha1.Build{}
err = resources.GetBuildObject(ctx, r.client, buildRun, build)
err := resources.GetBuildObject(ctx, r.client, buildRun, build)
if err != nil {
if !resources.IsClientStatusUpdateError(err) && buildRun.Status.IsFailed(buildv1alpha1.Succeeded) {
return reconcile.Result{}, nil
Expand Down Expand Up @@ -123,6 +138,14 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu
buildRun.Labels = make(map[string]string)
}

// make sure the BuildRun has not already been cancelled
if buildRun.IsCanceled() {
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, "the BuildRun is marked canceled.", buildv1alpha1.BuildRunStateCancel); updateErr != nil {
return reconcile.Result{}, updateErr
}
return reconcile.Result{}, nil
}

// Set OwnerReference for Build and BuildRun only when build.shipwright.io/build-run-deletion is set "true"
if build.GetAnnotations()[buildv1alpha1.AnnotationBuildRunDeletion] == "true" && !resources.IsOwnedByBuild(build, buildRun.OwnerReferences) {
if err := r.setOwnerReferenceFunc(build, buildRun, r.scheme); err != nil {
Expand Down Expand Up @@ -220,16 +243,31 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu
generatedTaskRun.CreationTimestamp.Time.Sub(buildRun.CreationTimestamp.Time),
)
} else {
return reconcile.Result{}, err
return reconcile.Result{}, getTaskRunErr
}
} else {
ctxlog.Info(ctx, "taskRun already exists", namespace, request.Namespace, name, request.Name)

err = r.GetBuildRunObject(ctx, lastTaskRun.Labels[buildv1alpha1.LabelBuildRun], request.Namespace, buildRun)
if err != nil && !apierrors.IsNotFound(err) {
return reconcile.Result{}, err
} else if apierrors.IsNotFound(err) {
return reconcile.Result{}, nil
if getBuildRunErr != nil && !apierrors.IsNotFound(getBuildRunErr) {
return reconcile.Result{}, getBuildRunErr
} else if apierrors.IsNotFound(getBuildRunErr) {
// this is a TR event, try getting the br from the label on the tr
err := r.GetBuildRunObject(ctx, lastTaskRun.Labels[buildv1alpha1.LabelBuildRun], request.Namespace, buildRun)
if err != nil && !apierrors.IsNotFound(err) {
return reconcile.Result{}, err
}
if err != nil && apierrors.IsNotFound(err) {
return reconcile.Result{}, nil
}
}

if buildRun.IsCanceled() && !lastTaskRun.IsCancelled() {
ctxlog.Info(ctx, "buildRun marked for cancellation, patching task run", namespace, request.Namespace, name, request.Name)
// patch tekton taskrun a la tkn to start tekton's cancelling logic
trueParam := true
if err := r.patchTaskRun(ctx, lastTaskRun, "replace", "/spec/status", v1beta1.TaskRunSpecStatusCancelled, metav1.PatchOptions{Force: &trueParam}); err != nil {
return reconcile.Result{}, err
}
}

// Check if the BuildRun is already finished, this happens if the build controller is restarted.
Expand All @@ -256,7 +294,7 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu
serviceAccount.Namespace = buildRun.Namespace

ctxlog.Info(ctx, "deleting service account", namespace, request.Namespace, name, request.Name)
if err = r.client.Delete(ctx, serviceAccount); err != nil && !apierrors.IsNotFound(err) {
if err := r.client.Delete(ctx, serviceAccount); err != nil && !apierrors.IsNotFound(err) {
ctxlog.Error(ctx, err, "Error during deletion of generated service account.")
return reconcile.Result{}, err
}
Expand Down Expand Up @@ -321,7 +359,7 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu
}

ctxlog.Info(ctx, "updating buildRun status", namespace, request.Namespace, name, request.Name)
if err = r.client.Status().Update(ctx, buildRun); err != nil {
if err := r.client.Status().Update(ctx, buildRun); err != nil {
return reconcile.Result{}, err
}
}
Expand Down Expand Up @@ -432,3 +470,24 @@ func (r *ReconcileBuildRun) createTaskRun(ctx context.Context, serviceAccount *c

return generatedTaskRun, nil
}

type patchStringValue struct {
Op string `json:"op"`
Path string `json:"path"`
Value string `json:"value"`
}

func (r *ReconcileBuildRun) patchTaskRun(ctx context.Context, tr *v1beta1.TaskRun, op, path, value string, opts metav1.PatchOptions) error {
payload := []patchStringValue{{
Op: op,
Path: path,
Value: value,
}}
data, err := json.Marshal(payload)
if err != nil {
return err
}
patch := client.RawPatch(types.JSONPatchType, data)
patchOpt := client.PatchOptions{Raw: &opts}
return r.client.Patch(ctx, tr, patch, &patchOpt)
}

0 comments on commit 5d8fb41

Please sign in to comment.