specifying onError for a step

this commit implements tep-0049 - ignore a step error When a `step` in a `task` results in a failure, the rest of the steps in the `task` are skipped and the `taskRun` is declared a failure. If you would like to ignore such step errors and continue executing the rest of the steps in the task, you can specify `onError` for such a `step`. `onError` can be set to either `continue` or `fail` as part of the step definition. If `onError` is set to `continue`, the entrypoint sets the original failed exit code of the script in the container terminated state. A `step` with `onError` set to `continue` does not fail the `taskRun` and continues executing the rest of the steps in a task. This is an alpha feature. The `enable-api-fields` feature flag must be set to `"alpha"` to specify `onError` for a `step`. This commit includes following changes: * Changing entrypoint to include three new flags `onError`, `stepMetadataDir`, and `stepMetadataDirLink`. * Adding one new function as part of the runner CreateDirWithSymlink * Creating a volume `/tekton/steps/` * Supporting a path variable $(steps.step-<stepName>.exitCode.path) and $(steps.step-unnamed-<stepIndex>.exitCode.path) * API spec `onError` while defining a step * Writing exitCode at /tekton/steps/step-<step-name>/exitCode or /tekton/steps/step-unnamed-<step-index>/exitCode * Set the exitCode of a terminated state to a non-zero exit code * Doc, unit test, and examples for this feature
tektoncd · Aug 11, 2021 · 92746a2 · 92746a2
1 parent 9b9c925
commit 92746a2
Show file tree

Hide file tree

Showing 28 changed files with 1,482 additions and 52 deletions.
diff --git a/cmd/entrypoint/main.go b/cmd/entrypoint/main.go
@@ -43,6 +43,10 @@ var (
 	results             = flag.String("results", "", "If specified, list of file names that might contain task results")
 	timeout             = flag.Duration("timeout", time.Duration(0), "If specified, sets timeout for step")
 	breakpointOnFailure = flag.Bool("breakpoint_on_failure", false, "If specified, expect steps to not skip on failure")
+	onError             = flag.String("on_error", "", "Set to \"continue\" to ignore an error and continue when a container terminates with a non-zero exit code."+
+		" Set to \"fail\" to declare a failure with a step error and stop executing the rest of the steps.")
+	stepMetadataDir     = flag.String("step_metadata_dir", "", "If specified, create directory to store the step metadata e.g. /tekton/steps/<step-name>/")
+	stepMetadataDirLink = flag.String("step_metadata_dir_link", "", "creates a symbolic link to the specified step_metadata_dir e.g. /tekton/steps/<step-index>/")
 )
 
 const (
@@ -108,6 +112,9 @@ func main() {
 		Results:             strings.Split(*results, ","),
 		Timeout:             timeout,
 		BreakpointOnFailure: *breakpointOnFailure,
+		OnError:             *onError,
+		StepMetadataDir:     *stepMetadataDir,
+		StepMetadataDirLink: *stepMetadataDirLink,
 	}
 
 	// Copy any creds injected by the controller into the $HOME directory of the current
@@ -134,9 +141,15 @@ func main() {
 			// same signature.
 			if status, ok := t.Sys().(syscall.WaitStatus); ok {
 				checkForBreakpointOnFailure(e, breakpointExitPostFile)
-				os.Exit(status.ExitStatus())
+				// ignore a step error i.e. do not exit if a container terminates with a non-zero exit code when onError is set to "continue"
+				if e.OnError != entrypoint.ContinueOnError {
+					os.Exit(status.ExitStatus())
+				}
+			}
+			// log and exit only if a step error must cause run failure
+			if e.OnError != entrypoint.ContinueOnError {
+				log.Fatalf("Error executing command (ExitError): %v", err)
 			}
-			log.Fatalf("Error executing command (ExitError): %v", err)
 		default:
 			checkForBreakpointOnFailure(e, breakpointExitPostFile)
 			log.Fatalf("Error executing command: %v", err)

diff --git a/cmd/entrypoint/post_writer.go b/cmd/entrypoint/post_writer.go
@@ -12,11 +12,43 @@ type realPostWriter struct{}
 
 var _ entrypoint.PostWriter = (*realPostWriter)(nil)
 
-func (*realPostWriter) Write(file string) {
+// Write creates a file and writes content to that file if content is specified
+// assumption here is the underlying directory structure already exists
+func (*realPostWriter) Write(file string, content string) {
 	if file == "" {
 		return
 	}
-	if _, err := os.Create(file); err != nil {
+	f, err := os.Create(file)
+	if err != nil {
 		log.Fatalf("Creating %q: %v", file, err)
 	}
+
+	if content != "" {
+		if _, err := f.WriteString(content); err != nil {
+			log.Fatalf("Writing %q: %v", file, err)
+		}
+	}
+}
+
+// CreateDirWithSymlink creates the specified directory and a symbolic link to that directory
+func (*realPostWriter) CreateDirWithSymlink(source, link string) {
+	if source == "" {
+		return
+	}
+	if err := os.MkdirAll(source, 0770); err != nil {
+		log.Fatalf("Creating directory %q: %v", source, err)
+	}
+
+	if link == "" {
+		return
+	}
+	// create a symlink if it does not exist
+	if _, err := os.Stat(link); os.IsNotExist(err) {
+		// check if a source exist before creating a symbolic link
+		if _, err := os.Stat(source); err == nil {
+			if err := os.Symlink(source, link); err != nil {
+				log.Fatalf("Creating a symlink %q: %v", link, err)
+			}
+		}
+	}
 }
diff --git a/cmd/entrypoint/post_writer_test.go b/cmd/entrypoint/post_writer_test.go
@@ -0,0 +1,76 @@
+package main
+
+import (
+	"os"
+	"testing"
+)
+
+func TestRealPostWriter_WriteFileContent(t *testing.T) {
+	tests := []struct {
+		name, file, content string
+	}{{
+		name:    "write a file content",
+		file:    "sample.txt",
+		content: "this is a sample file",
+	}, {
+		name: "write a file without specifying any path",
+	}, {
+		name: "create an empty file",
+		file: "sample.txt",
+	}}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rw := realPostWriter{}
+			rw.Write(tt.file, tt.content)
+			if tt.file != "" {
+				defer os.Remove(tt.file)
+				if _, err := os.Stat(tt.file); err != nil {
+					t.Fatalf("Failed to create a file %q", tt.file)
+				}
+				b, err := os.ReadFile(tt.file)
+				if err != nil {
+					t.Fatalf("Failed to read the file %q", tt.file)
+				}
+				if tt.content != string(b) {
+					t.Fatalf("Failed to write the desired content %q to the file %q", tt.content, tt.file)
+				}
+			}
+		})
+	}
+}
+
+func TestRealPostWriter_CreateStepPath(t *testing.T) {
+	tests := []struct {
+		name, source, link string
+	}{{
+		name:   "Create a path with a file",
+		source: "sample.txt",
+		link:   "0",
+	}, {
+		name: "Create a path without specifying any path",
+	}, {
+		name:   "Create a sym link without specifying any link path",
+		source: "sample.txt",
+	}, {
+		name: "Create a sym link without specifying any source",
+		link: "0.txt",
+	}}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rw := realPostWriter{}
+			rw.CreateDirWithSymlink(tt.source, tt.link)
+			if tt.source != "" {
+				defer os.Remove(tt.source)
+				if _, err := os.Stat(tt.source); err != nil {
+					t.Fatalf("Failed to create a file %q", tt.source)
+				}
+			}
+			if tt.source != "" && tt.link != "" {
+				defer os.Remove(tt.link)
+				if _, err := os.Stat(tt.link); err != nil {
+					t.Fatalf("Failed to create a sym link %q", tt.link)
+				}
+			}
+		})
+	}
+}
diff --git a/docs/developers/README.md b/docs/developers/README.md
@@ -142,6 +142,8 @@ of how this directory is used:
   * These folders are [part of the Tekton API](../api_compatibility_policy.md):
     * `/tekton/results` is where [results](#results) are written to
       (path available to `Task` authors via [`$(results.name.path)`](../variables.md))
+    * `/tekton/steps` is where the `step` exitCodes are written to
+      (path available to `Task` authors via [`$(steps.<stepName>.exitCode.path)`](../variables.md#variables-available-in-a-task))
   * These folders are implementation details of Tekton and **users should not
     rely on this specific behavior as it may change in the future**:
     * `/tekton/tools` contains tools like the [entrypoint binary](#entrypoint-rewriting-and-step-ordering)
@@ -466,3 +468,163 @@ flag to `alpha` in your test cluster to see your alpha integration tests
 run. When the flag in your cluster is `alpha` _all_ integration tests are executed,
 both `stable` and `alpha`. Setting the feature flag to `stable` will exclude `alpha`
 tests.
+
+## What and Why of `/tekton/steps`
+
+`/tekton/steps/` is an implicit volume mounted on a pod and created for storing the step specific information/metadata.
+There is one more subdirectory created under `/tekton/steps/` for each step in a task.
+
+Let's take an example of a task with three steps, each exiting with non-zero exit code:
+
+```yaml
+kind: TaskRun
+apiVersion: tekton.dev/v1beta1
+metadata:
+  generateName: test-taskrun-
+spec:
+  taskSpec:
+    steps:
+      - image: alpine
+        name: step0
+        onError: continue
+        script: |
+          echo "This is step 0"
+          ls -1R /tekton/steps/
+          exit 1
+      - image: alpine
+        onError: continue
+        script: |
+          echo "This is step 1"
+          ls -1R /tekton/steps/
+          exit 2
+      - image: alpine
+        name: step2
+        onError: continue
+        script: |
+          echo "This is step 2"
+          ls -1R /tekton/steps/
+          exit 3
+```
+
+The container `step-step0` for the first step `step0` shows three subdirectories (one for each step) under
+`/tekton/steps/` and all three of them are empty.
+
+```
+kubectl logs pod/test-taskrun-2rb9k-pod-bphct -c step-step0
++ echo 'This is step 0'
++ ls -1R /tekton/steps/
+This is step 0
+/tekton/steps/:
+0
+1
+2
+step-step0
+step-step2
+step-unnamed-1
+
+/tekton/steps/step-step0:
+/tekton/steps/step-step2:
+/tekton/steps/step-unnamed-1:
++ exit 1
+```
+
+The container `step-unnamed-1` for the second step which has no name shows three subdirectories (one for each step)
+under `/tekton/steps/` along with the `exitCode` file under the first step directory which has finished executing:
+
+```
+kubectl logs pod/test-taskrun-2rb9k-pod-bphct -c step-unnamed-1
+This is step 1
++ echo 'This is step 1'
++ ls -1R /tekton/steps/
+/tekton/steps/:
+0
+1
+2
+step-step0
+step-step2
+step-unnamed-1
+
+/tekton/steps/step-step0:
+exitCode
+
+/tekton/steps/step-step2:
+
+/tekton/steps/step-unnamed-1:
++ exit 2
+```
+
+The container `step-step2` for the third step `step2` shows three subdirectories (one for each step) under
+`/tekton/steps/` along with the `exitCode` file under the first and second step directory since both are done executing:
+
+```
+kubectl logs pod/test-taskrun-2rb9k-pod-bphct -c step-step2
+This is step 2
++ echo 'This is step 2'
++ ls -1R /tekton/steps/
+/tekton/steps/:
+0
+1
+2
+step-step0
+step-step2
+step-unnamed-1
+
+/tekton/steps/step-step0:
+exitCode
+
+/tekton/steps/step-step2:
+
+/tekton/steps/step-unnamed-1:
+exitCode
++ exit 3
+```
+
+The entrypoint is modified to include an additional two flags representing the step specific directory and a symbolic
+link:
+
+```
+step_metadata_dir - the dir specified in this flag is created to hold a step specific metadata
+step_metadata_dir_link - the dir specified in this flag is created as a symbolic link to step_metadata_dir
+```
+
+`step_metadata_dir` is set to `/tekton/steps/step-step0` and `step_metadata_dir_link` is set to `/tekton/steps/0` for
+the entrypoint of the first step in the above example task.
+
+Notice an additional entries `0`, `1`, and `2` showing under `/tekton/steps/`. These are symbolic links created which are
+linked with their respective step directories, `step-step0`, `step-unnamed-1`, and `step-step2`. These symbolic links
+are created to provide simplified access to the step metadata directories i.e., instead of referring to a directory with
+the step name, access it via the step index. The step index becomes complex and hard to keep track of in a task with
+a long list of steps, for example, a task with 20 steps. Creating the step metadata directory using a step name
+and creating a symbolic link using the step index gives the user flexibility, and an option to choose whatever works
+best for them.
+
+
+## How to access the exit code of a step from any subsequent step in a task
+
+The entrypoint now allows exiting with an error and continue running rest of the steps in a task i.e., it is possible
+for a step to exit with a non-zero exit code. Now, it is possible to design a task with a step which can take an action
+depending on the exit code of any prior steps. The user can access the exit code of a step by reading the file pointed
+by the path variable `$(steps.step-<step-name>.exitCode.path)` or `$(steps.step-unnamed-<step-index>.exitCode.path)`.
+For example:
+
+* `$(steps.step-my-awesome-step.exitCode.path)` where the step name is `my-awesome-step`.
+* `$(steps.step-unnamed-0.exitCode.path)` where the first step in a task has no name.
+
+The exit code of a step is stored in a file named `exitCode` under a directory `/tekton/steps/step-<step-name>/` or
+`/tekton/steps/step-unnamed-<step-index>/` which is reserved for any other step specific information in the future.
+
+If you would like to use the tekton internal path, you can access the exit code by reading the file
+(which is not recommended though since the path might change in the future):
+
+```shell
+cat /tekton/steps/step-<step-name>/exitCode
+```
+
+And, access the step exit code without a step name:
+
+```shell
+cat /tekton/steps/step-unnamed-<step-index>/exitCode
+```
+
+Or, you can access the step metadata directory via symlink, for example, use `cat /tekton/steps/0/exitCode` for the
+first step in a task.