From f94f1d1a814242bdc3cce48472318a489239f90a Mon Sep 17 00:00:00 2001 From: Ruslan Aksenov Date: Sat, 27 Jun 2020 20:48:58 -0700 Subject: [PATCH] Release v0.25.0 --- docs/go-quick-start.md | 15 +- docs/go-sdk-video-tutorial.md | 2 +- docs/installing-server.md | 8 +- docs/java-quick-start.md | 143 +++- docs/learn-cli.md | 51 +- docs/learn-workflow-filtering.md | 8 +- .../version-0.25.0/go-activities.md | 123 +++ .../go-activity-async-completion.md | 59 ++ .../version-0.25.0/go-child-workflows.md | 59 ++ .../version-0.25.0/go-continue-as-new.md | 22 + .../version-0.25.0/go-create-workflows.md | 148 ++++ .../version-0.25.0/go-distributed-cron.md | 90 ++ .../version-0.25.0/go-error-handling.md | 60 ++ .../version-0.25.0/go-execute-activity.md | 96 +++ versioned_docs/version-0.25.0/go-queries.md | 92 +++ .../version-0.25.0/go-quick-start.md | 207 +++++ versioned_docs/version-0.25.0/go-retries.md | 99 +++ .../version-0.25.0/go-sdk-video-tutorial.md | 142 ++++ versioned_docs/version-0.25.0/go-sessions.md | 139 ++++ .../version-0.25.0/go-side-effect.md | 32 + versioned_docs/version-0.25.0/go-signals.md | 53 ++ versioned_docs/version-0.25.0/go-tracing.md | 90 ++ .../version-0.25.0/go-versioning.md | 157 ++++ versioned_docs/version-0.25.0/go-workers.md | 63 ++ .../version-0.25.0/go-workflow-testing.md | 181 ++++ .../version-0.25.0/installing-server.md | 63 ++ .../version-0.25.0/java-activity-interface.md | 35 + .../version-0.25.0/java-distributed-cron.md | 82 ++ .../java-implementing-activities.md | 127 +++ .../java-implementing-workflows.md | 255 ++++++ .../version-0.25.0/java-quick-start.md | 774 ++++++++++++++++++ .../java-starting-workflow-executions.md | 49 ++ .../version-0.25.0/java-versioning.md | 92 +++ .../version-0.25.0/java-workflow-interface.md | 119 +++ .../version-0.25.0/learn-activities.md | 83 ++ .../version-0.25.0/learn-archival.md | 73 ++ versioned_docs/version-0.25.0/learn-cli.md | 316 +++++++ .../learn-cross-dc-replication.md | 103 +++ versioned_docs/version-0.25.0/learn-events.md | 34 + .../version-0.25.0/learn-glossary.md | 136 +++ .../version-0.25.0/learn-queries.md | 18 + .../learn-server-configuration.md | 359 ++++++++ .../version-0.25.0/learn-task-lists.md | 30 + .../version-0.25.0/learn-topology.md | 54 ++ .../learn-workflow-filtering.md | 291 +++++++ .../version-0.25.0/learn-workflows.md | 129 +++ versioned_docs/version-0.25.0/license.md | 28 + versioned_docs/version-0.25.0/mdx.md | 17 + versioned_docs/version-0.25.0/overview.md | 44 + .../version-0.25.0/use-cases-batch-job.md | 11 + .../version-0.25.0/use-cases-big-ml.md | 9 + .../version-0.25.0/use-cases-cicd.md | 15 + .../version-0.25.0/use-cases-dsl.md | 15 + .../version-0.25.0/use-cases-event-driven.md | 16 + .../version-0.25.0/use-cases-interactive.md | 8 + .../use-cases-operational-management.md | 10 + .../version-0.25.0/use-cases-orchestration.md | 18 + .../use-cases-partitioned-scan.md | 14 + .../version-0.25.0/use-cases-periodic.md | 18 + .../version-0.25.0/use-cases-polling.md | 15 + .../version-0.25.0/use-cases-provisioning.md | 16 + .../version-0.25.0-sidebars.json | 250 ++++++ versions.json | 1 + 63 files changed, 5819 insertions(+), 47 deletions(-) create mode 100644 versioned_docs/version-0.25.0/go-activities.md create mode 100644 versioned_docs/version-0.25.0/go-activity-async-completion.md create mode 100644 versioned_docs/version-0.25.0/go-child-workflows.md create mode 100644 versioned_docs/version-0.25.0/go-continue-as-new.md create mode 100644 versioned_docs/version-0.25.0/go-create-workflows.md create mode 100644 versioned_docs/version-0.25.0/go-distributed-cron.md create mode 100644 versioned_docs/version-0.25.0/go-error-handling.md create mode 100644 versioned_docs/version-0.25.0/go-execute-activity.md create mode 100644 versioned_docs/version-0.25.0/go-queries.md create mode 100644 versioned_docs/version-0.25.0/go-quick-start.md create mode 100644 versioned_docs/version-0.25.0/go-retries.md create mode 100644 versioned_docs/version-0.25.0/go-sdk-video-tutorial.md create mode 100644 versioned_docs/version-0.25.0/go-sessions.md create mode 100644 versioned_docs/version-0.25.0/go-side-effect.md create mode 100644 versioned_docs/version-0.25.0/go-signals.md create mode 100644 versioned_docs/version-0.25.0/go-tracing.md create mode 100644 versioned_docs/version-0.25.0/go-versioning.md create mode 100644 versioned_docs/version-0.25.0/go-workers.md create mode 100644 versioned_docs/version-0.25.0/go-workflow-testing.md create mode 100644 versioned_docs/version-0.25.0/installing-server.md create mode 100644 versioned_docs/version-0.25.0/java-activity-interface.md create mode 100644 versioned_docs/version-0.25.0/java-distributed-cron.md create mode 100644 versioned_docs/version-0.25.0/java-implementing-activities.md create mode 100644 versioned_docs/version-0.25.0/java-implementing-workflows.md create mode 100644 versioned_docs/version-0.25.0/java-quick-start.md create mode 100644 versioned_docs/version-0.25.0/java-starting-workflow-executions.md create mode 100644 versioned_docs/version-0.25.0/java-versioning.md create mode 100644 versioned_docs/version-0.25.0/java-workflow-interface.md create mode 100644 versioned_docs/version-0.25.0/learn-activities.md create mode 100644 versioned_docs/version-0.25.0/learn-archival.md create mode 100644 versioned_docs/version-0.25.0/learn-cli.md create mode 100644 versioned_docs/version-0.25.0/learn-cross-dc-replication.md create mode 100644 versioned_docs/version-0.25.0/learn-events.md create mode 100644 versioned_docs/version-0.25.0/learn-glossary.md create mode 100644 versioned_docs/version-0.25.0/learn-queries.md create mode 100644 versioned_docs/version-0.25.0/learn-server-configuration.md create mode 100644 versioned_docs/version-0.25.0/learn-task-lists.md create mode 100644 versioned_docs/version-0.25.0/learn-topology.md create mode 100644 versioned_docs/version-0.25.0/learn-workflow-filtering.md create mode 100644 versioned_docs/version-0.25.0/learn-workflows.md create mode 100644 versioned_docs/version-0.25.0/license.md create mode 100644 versioned_docs/version-0.25.0/mdx.md create mode 100644 versioned_docs/version-0.25.0/overview.md create mode 100644 versioned_docs/version-0.25.0/use-cases-batch-job.md create mode 100644 versioned_docs/version-0.25.0/use-cases-big-ml.md create mode 100644 versioned_docs/version-0.25.0/use-cases-cicd.md create mode 100644 versioned_docs/version-0.25.0/use-cases-dsl.md create mode 100644 versioned_docs/version-0.25.0/use-cases-event-driven.md create mode 100644 versioned_docs/version-0.25.0/use-cases-interactive.md create mode 100644 versioned_docs/version-0.25.0/use-cases-operational-management.md create mode 100644 versioned_docs/version-0.25.0/use-cases-orchestration.md create mode 100644 versioned_docs/version-0.25.0/use-cases-partitioned-scan.md create mode 100644 versioned_docs/version-0.25.0/use-cases-periodic.md create mode 100644 versioned_docs/version-0.25.0/use-cases-polling.md create mode 100644 versioned_docs/version-0.25.0/use-cases-provisioning.md create mode 100644 versioned_sidebars/version-0.25.0-sidebars.json diff --git a/docs/go-quick-start.md b/docs/go-quick-start.md index 7368054961..028704ccc6 100644 --- a/docs/go-quick-start.md +++ b/docs/go-quick-start.md @@ -6,7 +6,8 @@ title: Quick Start This topic helps you install the Temporal server and implement a workflow. ## Install Temporal Server Locally -To run samples locally you need to run Temporal server locally using [instructions](/docs/installing-server). + +To run samples locally you need to run Temporal server locally using [instructions](/docs/installing-server). ## Start with an empty directory @@ -23,16 +24,18 @@ cd tutorial-go-sdk ## Initialize Go Modules and SDK Package Dependency Initialize Go modules + ``` > go mod init github.com/temporalio/tutorial-go-sdk go: creating new go.mod: module github.com/temporalio/tutorial-go-sdk ``` Add dependency to Temporal Go SDK + ```bash -> go get go.temporal.io/temporal@v0.23.1 -go: downloading go.temporal.io/temporal v0.23.1 -go: go.temporal.io/temporal upgrade => v0.23.1 +> go get go.temporal.io/temporal@v0.25.0 +go: downloading go.temporal.io/temporal v0.25.0 +go: go.temporal.io/temporal upgrade => v0.25.0 ``` ## Implement Activities @@ -182,7 +185,7 @@ Run your worker app which hosts workflow and activity implementations ## Start workflow execution ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 wf start --tl tutorial_tl -w Greet_Temporal_1 --wt Greetings --et 3600 --dt 10 +> docker run --network=host --rm temporalio/tctl:0.25.0 wf start --tl tutorial_tl -w Greet_Temporal_1 --wt Greetings --et 3600 --dt 10 Started Workflow Id: Greet_Temporal_1, run Id: b4f8957a-565c-40ad-8495-15a41338f8f4 ``` @@ -201,4 +204,4 @@ Greeting sent to user: Temporal ## Try Go SDK Samples Check [Go SDK Samples](https://github.com/temporalio/temporal-go-samples) -and try simple Temporal usage scenario. +and try simple Temporal usage scenario. diff --git a/docs/go-sdk-video-tutorial.md b/docs/go-sdk-video-tutorial.md index 6e1aebb87f..2e674d7d97 100644 --- a/docs/go-sdk-video-tutorial.md +++ b/docs/go-sdk-video-tutorial.md @@ -138,5 +138,5 @@ func main() { Commands: ```bash -docker run --network=host --rm temporalio/tctl:0.23.1 wf start --tl tutorial_tl -w Greet_Temporal_1 --wt Greetings --et 3600 --dt 10 +docker run --network=host --rm temporalio/tctl:0.25.0 wf start --tl tutorial_tl -w Greet_Temporal_1 --wt Greetings --et 3600 --dt 10 ``` diff --git a/docs/installing-server.md b/docs/installing-server.md index b7b93e3c89..f679a77cbc 100644 --- a/docs/installing-server.md +++ b/docs/installing-server.md @@ -15,7 +15,7 @@ Follow the docker-compose installation instructions found here: [https://docs.do Download the Temporal docker-compose file to preferred location (i.e. `quick_start` directory): ```bash -curl -L https://github.com/temporalio/temporal/releases/download/v0.23.1/docker.tar.gz | tar -xz --strip-components 1 docker/docker-compose.yml +curl -L https://github.com/temporalio/temporal/releases/download/v0.25.0/docker.tar.gz | tar -xz --strip-components 1 docker/docker-compose.yml ls # docker-compose.yml @@ -31,7 +31,7 @@ The output should look similar to: ``` Creating network "quick_start_default" with the default driver -Pulling temporal (temporalio/temporal-auto-setup:0.23.1)... +Pulling temporal (temporalio/temporal-auto-setup:0.25.0)... ... ... temporal_1 | Description: Default namespace for Temporal Server @@ -59,5 +59,5 @@ At this point Temporal Server is running! You can also see the web interface on ## Write Workflows and Activities using Client SDK Try out [Java SDK](/docs/java-quick-start). - -Try out [Go SDK](/docs/go-quick-start). + +Try out [Go SDK](/docs/go-quick-start). diff --git a/docs/java-quick-start.md b/docs/java-quick-start.md index 3ad730ebee..c1e1084654 100644 --- a/docs/java-quick-start.md +++ b/docs/java-quick-start.md @@ -6,7 +6,8 @@ title: Quick Start This topic helps you install the Temporal server and implement a workflow. ## Install Temporal Server Locally -To run samples locally you need to run Temporal server locally using [instructions](/docs/installing-server). + +To run samples locally you need to run Temporal server locally using [instructions](/docs/installing-server). ## Implement Hello World Java Workflow @@ -15,15 +16,20 @@ To run samples locally you need to run Temporal server locally using [instructio Go to the [Maven Repository Temporal Java Client Page](https://oss.sonatype.org/#nexus-search;quick~temporal-sdk) and find the latest version of the library. Include it as a dependency into your Java project. For example if you are using Gradle the dependency looks like: + ``` compile group: 'io.temporal', name: 'temporal-sdk', version: '' ``` + Also add the following dependencies that temporal-sdk relies on: + ``` compile group: 'commons-configuration', name: 'commons-configuration', version: '1.9' compile group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.3' ``` + Make sure that the following code compiles: + ```java import io.temporal.workflow.Workflow; import io.temporal.workflow.WorkflowMethod; @@ -32,7 +38,7 @@ import org.slf4j.Logger; public class GettingStarted { private static Logger logger = Workflow.getLogger(GettingStarted.class); - + @WorkflowInterface interface HelloWorld { @WorkflowMethod @@ -41,10 +47,12 @@ public class GettingStarted { } ``` + If you are having problems setting up the build files use the [Temporal Java Samples](https://github.com/temporalio/temporal-java-samples) GitHub repository as a reference. Also add the following logback config file somewhere in your classpath: + ```xml @@ -64,6 +72,7 @@ Also add the following logback config file somewhere in your classpath: ### Implement Hello World Workflow Let's add `HelloWorldImpl` with the `sayHello` method that just logs the "Hello ..." and returns. + ```java import io.temporal.worker.Worker; import io.temporal.workflow.Workflow; @@ -89,8 +98,10 @@ public class GettingStarted { } } ``` + To link the workflow implementation to the Temporal framework, it should be registered with a worker that connects to a Temporal Service. By default the worker connects to the locally running Temporal service. + ```java public static void main(String[] args) { // gRPC stubs wrapper that talks to the local docker instance of temporal service. @@ -104,45 +115,59 @@ a Temporal Service. By default the worker connects to the locally running Tempor factory.start(); } ``` + ### Execute Hello World Workflow using the CLI Now run the worker program. Following is an example log: + ```text 18:39:45.522 [main] INFO i.t.i.WorkflowServiceStubsImpl - Created GRPC client for channel: ManagedChannelOrphanWrapper{delegate=ManagedChannelImpl{logId=1, target=127.0.0.1:7233}} 18:39:45.674 [main] INFO io.temporal.internal.worker.Poller - start(): Poller{options=PollerOptions{maximumPollRateIntervalMilliseconds=1000, maximumPollRatePerSecond=0.0, pollBackoffCoefficient=2.0, pollBackoffInitialInterval=PT0.1S, pollBackoffMaximumInterval=PT1M, pollThreadCount=1, pollThreadNamePrefix='Workflow Poller taskList="HelloWorldTaskList", namespace="default"'}, identity=unknown-mac} 18:39:45.676 [main] INFO io.temporal.internal.worker.Poller - start(): Poller{options=PollerOptions{maximumPollRateIntervalMilliseconds=1000, maximumPollRatePerSecond=0.0, pollBackoffCoefficient=2.0, pollBackoffInitialInterval=PT0.1S, pollBackoffMaximumInterval=PT1M, pollThreadCount=1, pollThreadNamePrefix='null'}, identity=95963a78-641d-434b-841e-a2efe7f8a19f} ``` + No Hello printed. This is expected because a worker is just a workflow code host. The workflow has to be started to execute. Let's use Temporal CLI to start the workflow: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow start --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" Started Workflow Id: ef8c6cd6-de62-4481-8398-623865467696, run Id: 26eafcde-6cab-4836-9ad4-888a74e172e1 ``` + The last line of output of the program should now be: + ``` 18:40:28.354 [workflow-1029765531] INFO main - Hello World! ``` + Let's start another workflow execution: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow start --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" Started Workflow Id: 7bdfba1d-b3f4-4665-88c2-cec73301dd52, run Id: d6c99e2d-7d76-458f-956b-a2f72af292bf ``` + The last two lines of output of the program should now be: + ```text 18:40:28.354 [workflow-1029765531] INFO main - Hello World! 18:40:51.678 [workflow-1538256693] INFO main - Hello Temporal! ``` + ### List Workflows and Workflow History Let's list our workflows in the CLI: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow list +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow list WORKFLOW TYPE | WORKFLOW ID | RUN ID | START TIME | EXECUTION TIME | END TIME HelloWorld_sayHello | 7bdfba1d-b3f4-4665-88c2-cec73301dd52 | d6c99e2d-7d76-458f-956b-a2f72af292bf | 01:40:51 | 01:40:51 | 01:40:51 HelloWorld_sayHello | ef8c6cd6-de62-4481-8398-623865467696 | 26eafcde-6cab-4836-9ad4-888a74e172e1 | 01:40:28 | 01:40:28 | 01:40:28 ``` + Now let's look at the workflow execution history: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow showid 1965109f-607f-4b14-a5f2-24399a7b8fa7 +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow showid 1965109f-607f-4b14-a5f2-24399a7b8fa7 1 EventTypeWorkflowExecutionStarted {WorkflowType:{Name:HelloWorld_sayHello}, ParentInitiatedEventId:0, TaskList:{Name:HelloWorldTaskList, Kind:TaskListKindNormal}, Input:["Temporal"], @@ -168,33 +193,42 @@ Now let's look at the workflow execution history: 5 EventTypeWorkflowExecutionCompleted {Result:[], DecisionTaskCompletedEventId:4} ``` + Even for such a trivial workflow, the history gives a lot of useful information. For complex workflows this is a really useful tool for production and development troubleshooting. History can be automatically archived to a long-term blob store (for example Amazon S3) upon workflow completion for compliance, analytical, and troubleshooting purposes. ### Workflow Id Uniqueness Before proceeding to a more complex workflow implementation, let's take a look at the workflow Id semantic. When starting a workflow without providing an Id, the client generates one in the form of a UUID. In most real-life scenarios this is not a desired behavior. The business Id should be used instead. Here, we'll specify the Id when starting a workflow: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow start --workflow_id "HelloTemporal1" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloTemporal1" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" Started Workflow Id: HelloTemporal1, run Id: 78ca0a3f-8cd2-46a2-8d23-076c3f0f187c ``` + Now the list operation is more meaningful as the WORKFLOW ID is our business Id: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow list +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow list WORKFLOW TYPE | WORKFLOW ID | RUN ID | START TIME | EXECUTION TIME | END TIME HelloWorld_sayHello | HelloTemporal1 | 78ca0a3f-8cd2-46a2-8d23-076c3f0f187c | 01:47:24 | 01:47:24 | 01:47:25 ``` + After the previous one completes, let's try to start another workflow with the same Id: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow start --workflow_id "HelloTemporal1" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloTemporal1" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" Started Workflow Id: HelloTemporal1, run Id: 9b5e36a3-9868-4de5-bbdf-eda9cedcd865 ``` + After the second start the workflow list is: + ```bash WORKFLOW TYPE | WORKFLOW ID | RUN ID | START TIME | EXECUTION TIME | END TIME HelloWorld_sayHello | HelloTemporal1 | 37a740e5-838c-4020-aed6-1111b0689c38 | 21:11:47 | 21:11:47 | 21:11:47 HelloWorld_sayHello | HelloTemporal1 | 75170c60-6d72-48c6-b509-7c9d9f25a8a8 | 21:04:46 | 21:04:46 | 21:04:46 ``` + It might be clear why every workflow has two Ids: Workflow Id and Run Id. Because the Workflow Id can be reused, the Run Id uniquely identifies a particular run of a workflow. Run Id is system generated and cannot be controlled by client code. Note - Under no circumstances does Temporal allow more than one instance of an open workflow with the same Id. Multiple workflow Ids are required in the case that paralell invocations wish to be supported (such as an Actor patern) @@ -202,8 +236,9 @@ Note - Under no circumstances does Temporal allow more than one instance of an o ### CLI Help See the CLI help command for all of the options supported: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow help start +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow help start NAME: tctl workflow start - start a new workflow execution @@ -231,9 +266,11 @@ OPTIONS: --memo value Optional info that can be shown in list workflow, in JSON format. If there are multiple JSON, concatenate them and separate by a space. The order must be the same as memo_key. --memo_file value Optional info that can be listed in list workflow, from JSON format file. If there are multiple JSON, concatenate them and separate by a space or newline. The order must be same as memo_key. ``` + ## Signals So far our workflow is not very interesting. Let's change it to listen on an external event and update state accordingly. + ```java @WorkflowInterface public interface HelloWorld { @@ -265,47 +302,62 @@ So far our workflow is not very interesting. Let's change it to listen on an ext } } ``` + The workflow interface now has a new method annotated with @SignalMethod. It is a callback method that is invoked -every time a new signal of "HelloWorld_updateGreeting" is delivered to a workflow. The workflow interface can have only -one @WorkflowMethod which is a _main_ function of the workflow and as many signal methods as needed. +every time a new signal of "HelloWorld*updateGreeting" is delivered to a workflow. The workflow interface can have only +one @WorkflowMethod which is a \_main* function of the workflow and as many signal methods as needed. The updated workflow implementation demonstrates a few important Temporal concepts. The first is that workflow is stateful and can have fields of any complex type. Another is that the `Workflow.await` function that blocks until the function it receives as a parameter evaluates to true. The condition is going to be evaluated only on workflow state changes, so it is not a busy wait in traditional sense. + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow start --workflow_id "HelloSignal" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloSignal" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" Started Workflow Id: HelloSignal, run Id: 6fa204cb-f478-469a-9432-78060b83b6cd ``` + Program output: + ```text 16:53:56.120 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! ``` + Let's send a signal using CLI: + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Hi\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Hi\" Signal workflow succeeded. ``` + Program output: + ```text 16:53:56.120 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! 16:54:57.901 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 2: Hi World! ``` + Try sending the same signal with the same input again. Note that the output doesn't change. This happens because the await condition doesn't unblock when it sees the same value. But a new greeting unblocks it: + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Welcome\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Welcome\" Signal workflow succeeded. ``` + Program output: + ```text 16:53:56.120 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! 16:54:57.901 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 2: Hi World! 16:56:24.400 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 3: Welcome World! ``` + Now shut down the worker and send the same signal again: + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Welcome\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Welcome\" Signal workflow succeeded. ``` + Note that sending signals as well as starting workflows does not need a worker running. The requests are queued inside the Temporal service. Now bring the worker back. Note that it doesn't log anything besides the standard startup messages. @@ -314,8 +366,9 @@ Note that the restart of the worker didn't affect the workflow execution. It is This is the most important feature of Temporal. The workflow code doesn't need to deal with worker failures at all. Its state is fully recovered to its current state that includes all the local variables and threads. Let's look at the line where the workflow is blocked: + ```bash -> docker run --network=host --rm temporalio/tctl:0.23.1 workflow stack --workflow_id "Hello2" +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow stack --workflow_id "Hello2" Query result: "workflow-root: (BLOCKED on await) io.temporal.internal.sync.SyncDecisionContext.await(SyncDecisionContext.java:546) @@ -325,12 +378,14 @@ io.temporal.samples.hello.GettingStarted$HelloWorldImpl.sayHello(GettingStarted. sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)" ``` + Yes, indeed the workflow is blocked on await. This feature works for any open workflow, greatly simplifying troubleshooting in production. Let's complete the workflow by sending a signal with a "Bye" greeting: ```text 16:58:22.962 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 4: Bye World! ``` + Note that the value of the count variable was not lost during the restart. Also note that while a single worker instance is used for this @@ -343,6 +398,7 @@ So far we have learned that the workflow code is fault tolerant and can update i Temporal provides a query feature that supports synchronously returning any information from a workflow to an external caller. Update the workflow code to: + ```java @WorkflowInterface public interface HelloWorld { @@ -382,41 +438,50 @@ Update the workflow code to: } } ``` + The new `getCount` method annotated with `@QueryMethod` was added to the workflow interface definition. It is allowed to have multiple query methods per workflow interface. The main restriction on the implementation of the query method is that it is not allowed to modify workflow state in any form. It also is not allowed to block its thread in any way. It usually just returns a value derived from the fields of the workflow object. Let's run the updated worker and send a couple signals to it: + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow start --workflow_id "HelloQuery" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloQuery" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" Started Workflow Id: HelloQuery, run Id: 1925f668-45b5-4405-8cba-74f7c68c3135 -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Hi\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Hi\" Signal workflow succeeded. -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Welcome\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Welcome\" Signal workflow succeeded. ``` + The worker output: + ```text 17:35:50.485 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! 17:36:10.483 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 2: Hi World! 17:36:16.204 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 3: Welcome World! ``` + Now let's query the workflow using the CLI: + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow query --workflow_id "HelloQuery" --query_type "getCount" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow query --workflow_id "HelloQuery" --query_type "getCount" Query result as JSON: 3 ``` + One limitation of the query is that it requires a worker process running because it is executing callback code. An interesting feature of the query is that it works for completed workflows as well. Let's complete the workflow by sending "Bye" and query it. + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Bye\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Bye\" Signal workflow succeeded. -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow query --workflow_id "HelloQuery" --query_type "getCount" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow query --workflow_id "HelloQuery" --query_type "getCount" Query result as JSON: 4 ``` + The Query method can accept parameters. This might be useful if only part of the workflow state should be returned. ## Activities @@ -429,18 +494,21 @@ directly call any external APIs; it can do this only through activities. The wor Let's change our program to print the greeting from an activity on every change. First let's define an activities interface and implement it: + ```java @ActivityInterface public interface HelloWorldActivities { void say(String message); } ``` + `@ActivityInterface` annotation is required for an activity interface. Each method that belongs to an activity interface defines a separate activity type. Activity implementation is just a normal [POJO](https://en.wikipedia.org/wiki/Plain_old_Java_object). The `out` stream is passed as a parameter to the constructor to demonstrate that the activity object can have any dependencies. Examples of real application dependencies are database connections and service clients. + ```java public class HelloWordActivitiesImpl implements HelloWorldActivities { private final PrintStream out; @@ -455,9 +523,11 @@ activity object can have any dependencies. Examples of real application dependen } } ``` + Let's create a separate main method for the activity worker. It is common to have a single worker that hosts both activities and workflows, but here we keep them separate to demonstrate how Temporal deals with worker failures. To make the activity implementation known to Temporal, register it with the worker: + ```java public class GettingStartedActivityWorker { @@ -471,9 +541,11 @@ public class GettingStartedActivityWorker { } } ``` + A single instance of an activity object is registered per activity interface type. This means that the activity implementation should be thread-safe since the activity method can be simultaneously called from multiple threads. Let's modify the workflow code to invoke the activity instead of logging: + ```java public static class HelloWorldImpl implements HelloWorld { @@ -502,18 +574,22 @@ Let's modify the workflow code to invoke the activity instead of logging: } } ``` + Activities are invoked through a stub that implements their interface. So an invocation is just a method call on an activity stub. Now run the workflow worker. Do not run the activity worker yet. Then start a new workflow execution: + ```bash -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow start --workflow_id "HelloActivityWorker" --tasklist HelloWorldTaskList --workflow_type HelloWorld_sayHello --execution_timeout 3600 --input \"World\" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloActivityWorker" --tasklist HelloWorldTaskList --workflow_type HelloWorld_sayHello --execution_timeout 3600 --input \"World\" Started Workflow Id: HelloActivityWorker, run Id: ff015637-b5af-43e8-b3f6-8b6c7b919b62 ``` + The workflow is started, but nothing visible happens. This is expected as the activity worker is not running. What are the options to understand the currently running workflow state? The first option is look at the stack trace: + ```text -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow stack --workflow_id "HelloActivityWorker" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow stack --workflow_id "HelloActivityWorker" Query result as JSON: "workflow-root: (BLOCKED on Feature.get)io.temporal.internal.sync.CompletablePromiseImpl.get(CompletablePromiseImpl.java:71) io.temporal.internal.sync.ActivityStubImpl.execute(ActivityStubImpl.java:58) @@ -526,13 +602,15 @@ sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) " ``` + It shows that the workflow code is blocked on the "say" method of a Proxy object that implements the activity stub. You can restart the workflow worker if you want to make sure that restarting it does not change that. It works for activities of any duration. It is okay for the workflow code to block on an activity invocation for a month for example. Another way to see what exactly happened in the workflow execution is to look at the workflow execution history: + ```text -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow show --workflow_id "HelloActivityWorker" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow show --workflow_id "HelloActivityWorker" 1 WorkflowExecutionStarted {WorkflowType:{Name:HelloWorld_sayHello}, TaskList:{Name:HelloWorldTaskList}, Input:["World"], @@ -563,11 +641,13 @@ temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow show - HeartbeatTimeoutSeconds:100, DecisionTaskCompletedEventId:4} ``` + The last event in the workflow history is `ActivityTaskScheduled`. It is recorded when workflow invoked the activity, but it wasn't picked up by an activity worker yet. Another useful API is `DescribeWorkflowExecution` which, among other information, contains the list of outstanding activities: + ```text -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow describe --workflow_id "HelloActivityWorker" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow describe --workflow_id "HelloActivityWorker" { "ExecutionConfiguration": { "taskList": { @@ -605,13 +685,17 @@ temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow describ ] } ``` + Let's start the activity worker. It starts and immediately prints: + ```text 1: Hello World! ``` + Let's look at the workflow execution history: + ```text -temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow show --workflow_id "HelloActivityWorker" +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow show --workflow_id "HelloActivityWorker" 1 WorkflowExecutionStarted {WorkflowType:{Name:HelloWorld_sayHello}, TaskList:{Name:HelloWorldTaskList}, Input:["World"], @@ -658,12 +742,14 @@ temporal: docker run --network=host --rm temporalio/tctl:0.23.1 workflow show - StartedEventId:9, Identity:37694@maxim-C02XD0AAJGH6} ``` + _ActivityTaskStarted_ event is recorded when the activity task is picked up by an activity worker. The Identity field contains the Id of the worker (you can set it to any value on worker startup). _ActivityTaskCompleted_ event is recorded when activity completes. It contains the result of the activity execution. Let's look at various failure scenarios. Modify activity task timeout: + ```java @ActivityInterface public interface HelloWorldActivities { @@ -684,4 +770,5 @@ Let's look at various failure scenarios. Modify activity task timeout: } } ``` + (To be continued ...) diff --git a/docs/learn-cli.md b/docs/learn-cli.md index 3e3693a1d0..74bdeb6ee4 100644 --- a/docs/learn-cli.md +++ b/docs/learn-cli.md @@ -9,23 +9,25 @@ workflow, show workflow history, and signal workflow. ## Using the CLI -The Temporal CLI can be used directly from the Docker Hub image *temporalio/tctl* or by building the CLI tool +The Temporal CLI can be used directly from the Docker Hub image _temporalio/tctl_ or by building the CLI tool locally. Example of using the docker image to describe a namespace: + ``` -docker run --rm temporalio/tctl:0.23.1 --namespace samples-namespace namespace describe +docker run --rm temporalio/tctl:0.25.0 --namespace samples-namespace namespace describe ``` On Docker versions 18.03 and later, you may get a "connection refused" error. You can work around this by setting the host to "host.docker.internal" (see [here](https://docs.docker.com/docker-for-mac/networking/#use-cases-and-workarounds) for more info). ``` -docker run --network=host --rm temporalio/tctl:0.23.1 --namespace samples-namespace namespace describe +docker run --network=host --rm temporalio/tctl:0.25.0 --namespace samples-namespace namespace describe ``` To build the CLI tool locally, clone the [Temporal server repo](https://github.com/temporalio/temporal) and run `make bins`. This produces an executable called `tctl`. With a local build, the same command to describe a namespace would look like this: + ``` ./tctl --namespace samples-namespace namespace describe ``` @@ -40,6 +42,7 @@ Setting environment variables for repeated parameters can shorten the CLI comman - **TEMPORAL_CLI_NAMESPACE** - default workflow namespace, so you don't need to specify `--namespace` ## Quick Start + Run `./tctl` for help on top level commands and global options Run `./tctl namespace` for help on namespace operations Run `./tctl workflow` for help on workflow operations @@ -49,30 +52,39 @@ Run `./tctl tasklist` for help on tasklist operations **Note:** make sure you have a Temporal server running before using CLI ### Namespace operation examples + - Register a new namespace named "samples-namespace": + ``` ./tctl --namespace samples-namespace namespace register --global_namespace false # OR using short alias ./tctl --ns samples-namespace n re --gd false ``` + - View "samples-namespace" details: + ``` ./tctl --namespace samples-namespace namespace describe ``` ### Workflow operation examples + The following examples assume the TEMPORAL_CLI_NAMESPACE environment variable is set. #### Run workflow + Start a workflow and see its progress. This command doesn't finish until workflow completes. + ``` ./tctl workflow run --tl helloWorldGroup --wt main.Workflow --et 60 -i '"temporal"' # view help messages for workflow run ./tctl workflow run -h ``` + Brief explanation: To run a workflow, the user must specify the following: + 1. Tasklist name (--tl) 2. Workflow type (--wt) 3. Execution start to close timeout in seconds (--et) @@ -85,11 +97,13 @@ and takes a string as input with the `-i '"temporal"'` parameter. Single quotes (Run `make && ./bin/helloworld -m worker` in temporal-go-samples to start the worker) #### Show running workers of a tasklist + ``` ./tctl tasklist desc --tl helloWorldGroup ``` #### Start workflow + ``` ./tctl workflow start --tl helloWorldGroup --wt main.Workflow --et 60 -i '"temporal"' @@ -99,15 +113,17 @@ and takes a string as input with the `-i '"temporal"'` parameter. Single quotes # for a workflow with multiple inputs, separate each json with space/newline like ./tctl workflow start --tl helloWorldGroup --wt main.WorkflowWith3Args --et 60 -i '"your_input_string" 123 {"Name":"my-string", "Age":12345}' ``` + The workflow `start` command is similar to the `run` command, but immediately returns the workflow_id and run_id after starting the workflow. Use the `show` command to view the workflow's history/progress. ##### Reuse the same workflow id when starting/running a workflow Use option `--workflowidreusepolicy` or `--wrp` to configure the workflow id reuse policy. -**Option 0 AllowDuplicateFailedOnly:** Allow starting a workflow execution using the same workflow Id when a workflow with the same workflow Id is not already running and the last execution close state is one of *[terminated, cancelled, timedout, failed]*. +**Option 0 AllowDuplicateFailedOnly:** Allow starting a workflow execution using the same workflow Id when a workflow with the same workflow Id is not already running and the last execution close state is one of _[terminated, cancelled, timedout, failed]_. **Option 1 AllowDuplicate:** Allow starting a workflow execution using the same workflow Id when a workflow with the same workflow Id is not already running. **Option 2 RejectDuplicate:** Do not allow starting a workflow execution using the same workflow Id as a previous workflow. + ``` # use AllowDuplicateFailedOnly option to start a workflow ./tctl workflow start --tl helloWorldGroup --wt main.Workflow --et 60 -i '"temporal"' --wid "" --wrp 0 @@ -117,14 +133,17 @@ Use option `--workflowidreusepolicy` or `--wrp` to configure the workflow id reu ``` ##### Start a workflow with a memo + Memos are immutable key/value pairs that can be attached to a workflow run when starting the workflow. These are visible when listing workflows. More information on memos can be found [here](/docs/learn-workflow-filtering#memo-vs-search-attributes). + ``` tctl wf start -tl helloWorldGroup -wt main.Workflow -et 60 -i '"temporal"' -memo_key ‘“Service” “Env” “Instance”’ -memo ‘“serverName1” “test” 5’ ``` #### Show workflow history + ``` ./tctl workflow show -w 3ea6b242-b23c-4279-bb13-f215661b4717 -r 866ae14c-88cf-4f1e-980f-571e031d71b0 # a shortcut of this is (without -w -r flag) @@ -137,6 +156,7 @@ tctl wf start -tl helloWorldGroup -wt main.Workflow -et 60 -i '"temporal"' -memo ``` #### Show workflow execution information + ``` ./tctl workflow describe -w 3ea6b242-b23c-4279-bb13-f215661b4717 -r 866ae14c-88cf-4f1e-980f-571e031d71b0 # a shortcut of this is (without -w -r flag) @@ -149,6 +169,7 @@ tctl wf start -tl helloWorldGroup -wt main.Workflow -et 60 -i '"temporal"' -memo ``` #### List closed or open workflow executions + ``` ./tctl workflow list @@ -165,6 +186,7 @@ Use **--query** to list workflows with SQL like query: This will return all open workflows with workflowType as "main.SampleParentWorkflow". #### Query workflow execution + ``` # use custom query type ./tctl workflow query -w -r --qt @@ -176,6 +198,7 @@ This will return all open workflows with workflowType as "main.SampleParentWorkf ``` #### Signal, cancel, terminate workflow + ``` # signal ./tctl workflow signal -w -r -n -i '"signal-value"' @@ -186,14 +209,17 @@ This will return all open workflows with workflowType as "main.SampleParentWorkf # terminate ./tctl workflow terminate -w -r --reason ``` + Terminating a running workflow execution will record a WorkflowExecutionTerminated event as the closing event in the history. No more decision tasks will be scheduled for a terminated workflow execution. Canceling a running workflow execution will record a WorkflowExecutionCancelRequested event in the history, and a new decision task will be scheduled. The workflow has a chance to do some clean up work after cancellation. #### Signal, cancel, terminate workflows as a batch job + Batch job is based on List Workflow Query(**--query**). It supports signal, cancel and terminate as batch job type. For terminating workflows as batch job, it will terminte the children recursively. Start a batch job(using signal as batch type): + ``` tctl --ns samples-namespace wf batch start --query "WorkflowType='main.SampleParentWorkflow' AND CloseTime=missing" --reason "test" --bt signal --sig testname This batch job will be operating on 5 workflows. @@ -204,29 +230,38 @@ Please confirm[Yes/No]:yes } ``` + You need to remember the JobId or use List command to get all your batch jobs: + ``` tctl --ns samples-namespace wf batch list ``` Describe the progress of a batch job: + ``` tctl --ns samples-namespace wf batch desc -jid ``` + Terminate a batch job: + ``` tctl --ns samples-namespace wf batch terminate -jid ``` + Note that the operation performed by a batch will not be rolled back by terminating the batch. However, you can use reset to rollback your workflows. #### Restart, reset workflow + The Reset command allows resetting a workflow to a particular point and continue running from there. There are a lot of use cases: + - Rerun a failed workflow from the beginning with the same start parameters. - Rerun a failed workflow from the failing point without losing the achieved progress(history). - After deploying new code, reset an open workflow to let the workflow run to different flows. You can reset to some predefined event types: + ``` ./tctl workflow reset -w -r --reset_type --reason "some_reason" ``` @@ -236,24 +271,31 @@ You can reset to some predefined event types: - LastContinuedAsNew: reset to the end of the history for the previous run. If you are familiar with the Temporal history event, You can also reset to any decision finish event by using: + ``` ./tctl workflow reset -w -r --event_id --reason "some_reason" ``` + Some things to note: + - When reset, a new run will be kicked off with the same workflowId. But if there is a running execution for the workflow(workflowId), the current run will be terminated. - decision_finish_event_id is the Id of events of the type: DecisionTaskComplete/DecisionTaskFailed/DecisionTaskTimeout. - To restart a workflow from the beginning, reset to the first decision task finish event. To reset multiple workflows, you can use batch reset command: + ``` ./tctl workflow reset-batch --input_file --reset_type --reason "some_reason" ``` + #### Recovery from bad deployment -- auto-reset workflow + If a bad deployment lets a workflow run into a wrong state, you might want to reset the workflow to the point that the bad deployment started to run. But usually it is not easy to find out all the workflows impacted, and every reset point for each workflow. In this case, auto-reset will automatically reset all the workflows given a bad deployment identifier. Let's get familiar with some concepts. Each deployment will have an identifier, we call it "**Binary Checksum**" as it is usually generated by the md5sum of a binary file. For a workflow, each binary checksum will be associated with an **auto-reset point**, which contains a **runId**, an **eventID**, and the **created_time** that binary/deployment made the first decision for the workflow. To find out which **binary checksum** of the bad deployment to reset, you should be aware of at least one workflow running into a bad state. Use the describe command with **--reset_points_only** option to show all the reset points: + ``` ./tctl wf desc -w --reset_points_only +----------------------------------+--------------------------------+--------------------------------------+---------+ @@ -266,6 +308,7 @@ To find out which **binary checksum** of the bad deployment to reset, you should ``` Then use this command to tell Temporal to auto-reset all workflows impacted by the bad deployment. The command will store the bad binary checksum into namespace info and trigger a process to reset all your workflows. + ``` ./tctl --ns namespace update --add_bad_binary aae748fdc557a3f873adbe1dd066713f --reason "rollback bad deployment" ``` diff --git a/docs/learn-workflow-filtering.md b/docs/learn-workflow-filtering.md index aff841733b..619134b55c 100644 --- a/docs/learn-workflow-filtering.md +++ b/docs/learn-workflow-filtering.md @@ -31,7 +31,7 @@ type StartWorkflowOptions struct { } ``` -In the Java client, the *WorkflowOptions.Builder* has similar methods for [memo](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/client/WorkflowOptions.Builder.html#setMemo-java.util.Map-) and [search attributes](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/client/WorkflowOptions.Builder.html#setSearchAttributes-java.util.Map-). +In the Java client, the _WorkflowOptions.Builder_ has similar methods for [memo](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/client/WorkflowOptions.Builder.html#setMemo-java.util.Map-) and [search attributes](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/client/WorkflowOptions.Builder.html#setSearchAttributes-java.util.Map-). Some important distinctions between memo and search attributes: @@ -95,7 +95,7 @@ Note that **Keyword** and **String** are concepts taken from Elasticsearch. Each For example, key RunId with value "2dd29ab7-2dd8-4668-83e0-89cae261cfb1" - as a **Keyword** will only be matched by RunId = "2dd29ab7-2dd8-4668-83e0-89cae261cfb1" (or in the future with [regular expressions](https://github.com/uber/cadence/issues/1137)) -- as a **String** will be matched by RunId = "2dd8", which may cause unwanted matches +- as a **String** will be matched by RunId = "2dd8", which may cause unwanted matches **Note:** String type can not be used in Order By query. @@ -166,7 +166,7 @@ map[string]interface{}{ } ``` -There is no support for removing a field. To achieve a similar effect, set the field to a sentinel value. For example, to remove “CustomKeywordField”, update it to “impossibleVal”. Then searching `CustomKeywordField != ‘impossibleVal’` will match workflows with CustomKeywordField not equal to "impossibleVal", which **includes** workflows without the CustomKeywordField set. +There is no support for removing a field. To achieve a similar effect, set the field to a sentinel value. For example, to remove “CustomKeywordField”, update it to “impossibleVal”. Then searching `CustomKeywordField != ‘impossibleVal’` will match workflows with CustomKeywordField not equal to "impossibleVal", which **includes** workflows without the CustomKeywordField set. Use `workflow.GetInfo` to get current search attributes. @@ -282,7 +282,7 @@ Queries are supported in [Temporal Web](https://github.com/temporalio/temporal-w ## Local Testing 1. Increase Docker memory to higher than 6GB. Navigate to Docker -> Preferences -> Advanced -> Memory -2. Get the Temporal Docker compose file. Run `curl -L https://github.com/temporalio/temporal/releases/download/v0.23.1/docker.tar.gz | tar -xz --strip-components 1 docker/docker-compose-es.yml` +2. Get the Temporal Docker compose file. Run `curl -L https://github.com/temporalio/temporal/releases/download/v0.25.0/docker.tar.gz | tar -xz --strip-components 1 docker/docker-compose-es.yml` 3. Start Temporal Docker (which contains Apache Kafka, Apache Zookeeper, and Elasticsearch) using `docker-compose -f docker-compose-es.yml up` 4. From the Docker output log, make sure Elasticsearch and Temporal started correctly. If you encounter an insufficient disk space error, try `docker system prune -a --volumes` 5. Register a local namespace and start using it. `tctl --ns samples-namespace n re` diff --git a/versioned_docs/version-0.25.0/go-activities.md b/versioned_docs/version-0.25.0/go-activities.md new file mode 100644 index 0000000000..2178855f02 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-activities.md @@ -0,0 +1,123 @@ +--- +id: go-activities +title: Activities +--- + +An activity is the implementation of a particular task in the business logic. + +Activities are implemented as functions. Data can be passed directly to an activity via function +parameters. The parameters can be either basic types or structs, with the only requirement being that +the parameters must be serializable. Though it is not required, we recommend that the first parameter +of an activity function is of type `context.Context`, in order to allow the activity to interact with +other framework methods. The function must return an `error` value, and can optionally return a result +value. The result value can be either a basic type or a struct with the only requirement being that +it is serializable. + +The values passed to activities through invocation parameters or returned through the result value +are recorded in the execution history. The entire execution history is transferred from the Temporal +service to workflow workers with every event that the workflow logic needs to process. A large execution +history can thus adversely impact the performance of your workflow. Therefore, be mindful of the amount +of data you transfer via activity invocation parameters or return values. Otherwise, no additional +limitations exist on activity implementations. + +## Overview + +The following example demonstrates a simple activity that accepts a string parameter, appends a word +to it, and then returns a result. + +```go +package sample + +import ( + "context" + + "go.uber.org/zap" + + "go.temporal.io/temporal/activity" +) + +// SimpleActivity is a sample Temporal activity function that takes one parameter and +// returns a string containing the parameter value. +func SimpleActivity(ctx context.Context, value string) (string, error) { + activity.GetLogger(ctx).Info("SimpleActivity called.", zap.String("Value", value)) + return "Processed: " + value, nil +} +``` +Let's take a look at each component of this activity. + +### Declaration + +In the Temporal programing model, an activity is implemented with a function. The function declaration specifies the parameters the activity accepts as well as any values it might return. An activity function can take zero or many activity specific parameters and can return one or two values. It must always at least return an error value. The activity function can accept as parameters and return as results any serializable type. + +`func SimpleActivity(ctx context.Context, value string) (string, error)` + +The first parameter to the function is context.Context. This is an optional parameter and can be omitted. This parameter is the standard Go context. +The second string parameter is a custom activity specific parameter that can be used to pass data into the activity on start. An activity can have one or more such parameters. All parameters to an activity function must be serializable, which essentially means that params can’t be channels, functions, variadic, or unsafe pointers. +The activity declares two return values: string and error. The string return value is used to return the result of the activity. The error return value is used to indicate that an error was encountered during execution. + +### Implementation + +You can write activity implementation code in the same way that you would any other Go service code. +Additionally, you can use the usual loggers and metrics controllers, and the standard Go concurrency +constructs. + +#### Heart Beating + +For long-running activities, Temporal provides an API for the activity code to report both liveness and +progress back to the Temporal managed service. + +```go +progress := 0 +for hasWork { + // Send heartbeat message to the server. + activity.RecordHeartbeat(ctx, progress) + // Do some work. + ... + progress++ +} +``` +When an activity times out due to a missed heartbeat, the last value of the details (`progress` in the +above sample) is returned from the `workflow.ExecuteActivity` function as the details field of `TimeoutError` +with `TimeoutType` set to `Heartbeat`. + +You can also heartbeat an activity from an external source: + +```go +// The client is a heavyweight object that should be created once per process. +serviceClient, err := client.NewClient(client.Options{ + HostPort: HostPort, + Namespace: Namespace, + MetricsScope: scope, +}) + +// Record heartbeat. +err := serviceClient.RecordActivityHeartbeat(ctx, taskToken, details) +``` +The parameters of the `RecordActivityHeartbeat` function are: + +* `taskToken`: The value of the binary `TaskToken` field of the `ActivityInfo` struct retrieved inside +the activity. +* `details`: The serializable payload containing progress information. + +#### Cancellation + +When an activity is cancelled, or its workflow execution has completed or failed, the context passed +into its function is cancelled, which sets its channel’s closed state to `Done`. An activity can use that +to perform any necessary cleanup and abort its execution. Cancellation is only delivered to activities +that call `RecordActivityHeartbeat`. + +### Registration + +To make the activity visible to the worker process hosting it, the activity must be registered via a +call to `activity.Register`. + +```go +activity.Register(SimpleActivity) +``` +This call creates an in-memory mapping inside the worker process between the fully qualified function +name and the implementation. If a worker receives a request to start an activity execution for an +activity type it does not know, it will fail that request. + +## Failing an Activity + +To mark an activity as failed, the activity function must return an error via the `error` return value. diff --git a/versioned_docs/version-0.25.0/go-activity-async-completion.md b/versioned_docs/version-0.25.0/go-activity-async-completion.md new file mode 100644 index 0000000000..6058f537a7 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-activity-async-completion.md @@ -0,0 +1,59 @@ +--- +id: go-activity-async-completion +title: Asynchronous Activity Completion +--- + +There are certain scenarios when completing an activity upon completion of its function is not possible +or desirable. For example, you might have an application that requires user input in order to complete +the activity. You could implement the activity with a polling mechanism, but a simpler and less +resource-intensive implementation is to asynchronously complete a Temporal activity. + +There two parts to implementing an asynchronously completed activity: + +1. The activity provides the information necessary for completion from an external system and notifies +the Temporal service that it is waiting for that outside callback. +2. The external service calls the Temporal service to complete the activity. + +The following example demonstrates the first part: + +```go +// Retrieve the activity information needed to asynchronously complete the activity. +activityInfo := activity.GetInfo(ctx) +taskToken := activityInfo.TaskToken + +// Send the taskToken to the external service that will complete the activity. +... + +// Return from the activity a function indicating that Temporal should wait for an async completion +// message. +return "", activity.ErrResultPending +``` + +The following code demonstrates how to complete the activity successfully: + +```go +// Instantiate a Temporal service client. +// The same client can be used to complete or fail any number of activities. +// The client is a heavyweight object that should be created once per process. +serviceClient, err := client.NewClient(client.Options{}) + +// Complete the activity. +client.CompleteActivity(taskToken, result, nil) +``` + +To fail the activity, you would do the following: + +```go +// Fail the activity. +client.CompleteActivity(taskToken, nil, err) +``` + +Following are the parameters of the `CompleteActivity` function: + +* `taskToken`: The value of the binary `TaskToken` field of the `ActivityInfo` struct retrieved inside +the activity. +* `result`: The return value to record for the activity. The type of this value must match the type +of the return value declared by the activity function. +* `err`: The error code to return if the activity terminates with an error. + +If `error` is not null, the value of the `result` field is ignored. diff --git a/versioned_docs/version-0.25.0/go-child-workflows.md b/versioned_docs/version-0.25.0/go-child-workflows.md new file mode 100644 index 0000000000..49c568ccdc --- /dev/null +++ b/versioned_docs/version-0.25.0/go-child-workflows.md @@ -0,0 +1,59 @@ +--- +id: go-child-workflows +title: Child Workflows +--- + +`workflow.ExecuteChildWorkflow` enables the scheduling of other workflows from within a workflow's +implementation. The parent workflow has the ability to monitor and impact the lifecycle of the child +workflow, similar to the way it does for an activity that it invoked. + +```go +cwo := workflow.ChildWorkflowOptions{ + // Do not specify WorkflowId if you want Temporal to generate a unique Id for the child execution. + WorkflowId: "BID-SIMPLE-CHILD-WORKFLOW", + ExecutionStartToCloseTimeout: time.Minute * 30, +} +ctx = workflow.WithChildWorkflowOptions(ctx, cwo) + +var result string +future := workflow.ExecuteChildWorkflow(ctx, SimpleChildWorkflow, value) +if err := future.Get(ctx, &result); err != nil { + workflow.GetLogger(ctx).Error("SimpleChildWorkflow failed.", zap.Error(err)) + return err +} +``` +Let's take a look at each component of this call. + +Before calling `workflow.ExecuteChildworkflow()`, you must configure `ChildWorkflowOptions` for the +invocation. These options customize various execution timeouts, and are passed in by creating a child +context from the initial context and overwriting the desired values. The child context is then passed +into the `workflow.ExecuteChildWorkflow()` call. If multiple activities are sharing the same option +values, then the same context instance can be used when calling `workflow.ExecuteChildworkflow()`. + +The first parameter in the call is the required `workflow.Context` object. This type is a copy of +`context.Context` with the `Done()` method returning `workflow.Channel` instead of the native Go `chan`. + +The second parameter is the function that we registered as a workflow function. This parameter can +also be a string representing the fully qualified name of the workflow function. The benefit of this +is that when you pass in the actual function object, the framework can validate workflow parameters. + +The remaining parameters are passed to the workflow as part of the call. In our example, we have a +single parameter: `value`. This list of parameters must match the list of parameters declared by +the workflow function. + +The method call returns immediately and returns a `workflow.Future`. This allows you to execute more +code without having to wait for the scheduled workflow to complete. + +When you are ready to process the results of the workflow, call the `Get()` method on the returned future +object. The parameters to this method is the `ctx` object we passed to the +`workflow.ExecuteChildWorkflow()` call and an output parameter that will receive the output of the +workflow. The type of the output parameter must match the type of the return value declared by the +workflow function. The `Get()` method will block until the workflow completes and results are +available. + +The `workflow.ExecuteChildWorkflow()` function is similar to `workflow.ExecuteActivity()`. All of the +patterns described for using `workflow.ExecuteActivity()` apply to the `workflow.ExecuteChildWorkflow()` +function as well. + +When a parent workflow is cancelled by the user, the child workflow can be cancelled or abandoned +based on a configurable child policy. diff --git a/versioned_docs/version-0.25.0/go-continue-as-new.md b/versioned_docs/version-0.25.0/go-continue-as-new.md new file mode 100644 index 0000000000..56fcbe8e2c --- /dev/null +++ b/versioned_docs/version-0.25.0/go-continue-as-new.md @@ -0,0 +1,22 @@ +--- +id: go-continue-as-new +title: ContinueAsNew +--- + +Workflows that need to rerun periodically could naively be implemented as a big **for** loop with +a sleep where the entire logic of the workflow is inside the body of the **for** loop. The problem +with this approach is that the history for that workflow will keep growing to a point where it +reaches the maximum size enforced by the service. + +**ContinueAsNew** is the low level construct that enables implementing such workflows without the +risk of failures down the road. The operation atomically completes the current execution and starts +a new execution of the workflow with the same **workflow Id**. The new execution will not carry +over any history from the old execution. To trigger this behavior, the workflow function should +terminate by returning the special **ContinueAsNewError** error: + +```go +func SimpleWorkflow(workflow.Context ctx, value string) error { + ... + return temporal.NewContinueAsNewError(ctx, SimpleWorkflow, value) +} +``` diff --git a/versioned_docs/version-0.25.0/go-create-workflows.md b/versioned_docs/version-0.25.0/go-create-workflows.md new file mode 100644 index 0000000000..878f70a199 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-create-workflows.md @@ -0,0 +1,148 @@ +--- +id: go-create-workflows +title: Creating Workflows +--- + +The workflow is the implementation of the coordination logic. The Temporal programming framework +(aka client library) allows you to write the workflow coordination logic as simple procedural code +that uses standard Go data modeling. The client library takes care of the communication between +the worker service and the Temporal service, and ensures state persistence between events even in +case of worker failures. Furthermore, any particular execution is not tied to a particular worker +machine. Different steps of the coordination logic can end up executing on different worker +instances, with the framework ensuring that the necessary state is recreated on the worker executing +the step. + +However, in order to facilitate this operational model, both the Temporal programming framework and +the managed service impose some requirements and restrictions on the implementation of the +coordination logic. The details of these requirements and restrictions are described in the +**Implementation** section below. + +## Overview + +The sample code below shows a simple implementation of a workflow that executes one activity. The +workflow also passes the sole parameter it receives as part of its initialization as a parameter +to the activity. + +``` go +package sample + +import ( + "time" + + "go.temporal.io/temporal/workflow" + "go.uber.org/zap" +) + +// SimpleWorkflow is a sample Temporal workflow function that takes one +// string parameter 'value' and returns an error. +func SimpleWorkflow(ctx workflow.Context, value string) error { + ao := workflow.ActivityOptions{ + TaskList: "sampleTaskList", + ScheduleToCloseTimeout: time.Second * 60, + ScheduleToStartTimeout: time.Second * 60, + StartToCloseTimeout: time.Second * 60, + HeartbeatTimeout: time.Second * 10, + WaitForCancellation: false, + } + ctx = workflow.WithActivityOptions(ctx, ao) + + var result string + err := workflow.ExecuteActivity(ctx, SimpleActivity, value).Get(ctx, &result) + if err != nil { + return err + } + + workflow.GetLogger(ctx).Info("Done", zap.String("result", result)) + return nil +} +``` + +## Declaration + +In the Temporal programing model, a workflow is implemented with a function. The function declaration +specifies the parameters the workflow accepts as well as any values it might return. + +``` go + func SimpleWorkflow(ctx workflow.Context, value string) error +``` + +Let’s deconstruct the declaration above: + +* The first parameter to the function is **ctx workflow.Context**. This is a required parameter for + all workflow functions and is used by the Temporal client library to pass execution context. + Virtually all the client library functions that are callable from the workflow functions require + this **ctx** parameter. This **context** parameter is the same concept as the standard + **context.Context** provided by Go. The only difference between **workflow.Context** and + **context.Context** is that the **Done()** function in **workflow.Context** returns + **workflow.Channel** instead the standard go **chan**. +* The second parameter, **string**, is a custom workflow parameter that can be used to pass data + into the workflow on start. A workflow can have one or more such parameters. All parameters to a + workflow function must be serializable, which essentially means that params can’t be channels, + functions, variadic, or unsafe pointers. +* Since it only declares error as the return value, this means that the workflow does not return a + value. The **error** return value is used to indicate an error was encountered during execution + and the workflow should be terminated. + +## Implementation + +In order to support the synchronous and sequential programming model for the workflow +implementation, there are certain restrictions and requirements on how the workflow implementation +must behave in order to guarantee correctness. The requirements are that: + +* Execution must be deterministic +* Execution must be idempotent + +A straightforward way to think about these requirements is that the workflow code is as follows: + +* Workflow code can only read and manipulate local state or state received as return values from + Temporal client library functions. +* Workflow code should not affect changes in external systems other than through invocation + of activities. +* Workflow code should interact with **time** only through the functions provided by the Temporal + client library (i.e. **workflow.Now()**, **workflow.Sleep()**). +* Workflow code should not create and interact with goroutines directly, it should instead use the + functions provided by the Temporal client library (i.e., **workflow.Go()** instead of **go**, + **workflow.Channel** instead of **chan**, **workflow.Selector** instead of **select**). +* Workflow code should do all logging via the logger provided by the Temporal client library + (i.e., **workflow.GetLogger()**). +* Workflow code should not iterate over maps using range because the order of map iteration is randomized. + +Now that we have laid the ground rules, we can take a look at some of the special functions and types +used for writing Temporal workflows and how to implement some common patterns. + +### Special Temporal SDK functions and types + +The Temporal client library provides a number of functions and types as alternatives to some native +Go functions and types. Usage of these replacement functions/types is necessary in order to ensure +that the workflow code execution is deterministic and repeatable within an execution context. + +Coroutine related constructs: + +* **workflow.Go** : This is a replacement for the the **go** statement. +* **workflow.Channel** : This is a replacement for the native **chan** type. Temporal provides + support for both buffered and unbuffered channels. +* **workflow.Selector** : This is a replacement for the **select** statement. + +Time related functions: + +* **workflow.Now()** : This is a replacement for **time.Now()**. +* **workflow.Sleep()** : This is a replacement for **time.Sleep()**. + +### Failing a workflow + +To mark a workflow as failed, all that needs to happen is for the workflow function to return an +error via the **err** return value. + +## Registration + +For some client code to be able to invoke a workflow type, the worker process needs to be aware of +all the implementations it has access to. A workflow is registered with the following call: + +``` go +workflow.Register(SimpleWorkflow) +``` + +This call essentially creates an in-memory mapping inside the worker process between the fully +qualified function name and the implementation. It is safe to call this registration method from +an **init()** function. If the worker receives tasks for a workflow type it does not know, it will +fail that task. However, the failure of the task will not cause the entire workflow to fail. diff --git a/versioned_docs/version-0.25.0/go-distributed-cron.md b/versioned_docs/version-0.25.0/go-distributed-cron.md new file mode 100644 index 0000000000..c6f82b4cd2 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-distributed-cron.md @@ -0,0 +1,90 @@ +--- +id: go-distributed-cron +title: Distributed CRON +--- + +It is relatively straightforward to turn any Temporal workflow into a Cron workflow. All you need +is to supply a cron schedule when starting the workflow using the CronSchedule +parameter of +[StartWorkflowOptions](https://pkg.go.dev/go.temporal.io/temporal/internal#StartWorkflowOptions). + +You can also start a workflow using the Temporal CLI with an optional cron schedule using the `--cron` argument. + +For workflows with CronSchedule: + +* Cron schedule is based on UTC time. For example cron schedule "15 8 \* \* \*" + will run daily at 8:15am UTC. +* If a workflow failed and a RetryPolicy is supplied to the StartWorkflowOptions + as well, the workflow will retry based on the RetryPolicy. While the workflow is + retrying, the server will not schedule the next cron run. +* Temporal server only schedules the next cron run after the current run is + completed. If the next schedule is due while a workflow is running (or retrying), + then it will skip that schedule. +* Cron workflows will not stop until they are terminated or cancelled. + +Temporal supports the standard cron spec: + +```go +// CronSchedule - Optional cron schedule for workflow. If a cron schedule is specified, the workflow will run +// as a cron based on the schedule. The scheduling will be based on UTC time. The schedule for next run only happen +// after the current run is completed/failed/timeout. If a RetryPolicy is also supplied, and the workflow failed +// or timed out, the workflow will be retried based on the retry policy. While the workflow is retrying, it won't +// schedule its next run. If next schedule is due while the workflow is running (or retrying), then it will skip that +// schedule. Cron workflow will not stop until it is terminated or cancelled (by returning temporal.CanceledError). +// The cron spec is as following: +// ┌───────────── minute (0 - 59) +// │ ┌───────────── hour (0 - 23) +// │ │ ┌───────────── day of the month (1 - 31) +// │ │ │ ┌───────────── month (1 - 12) +// │ │ │ │ ┌───────────── day of the week (0 - 6) (Sunday to Saturday) +// │ │ │ │ │ +// │ │ │ │ │ +// * * * * * +CronSchedule string +``` + +The [crontab guru site](https://crontab.guru/) is useful for testing your cron expressions. + +## Convert existing cron workflow + +Before CronSchedule was available, the previous approach to implementing cron +workflows was to use a delay timer as the last step and then return +`ContinueAsNew`. One problem with that implementation is that if the workflow +fails or times out, the cron would stop. + +To convert those workflows to make use of Temporal CronSchedule, all you need is to +remove the delay timer and return without using +`ContinueAsNew`. Then start the workflow with the desired CronSchedule. + + +## Retrieve last successful result + +Sometimes it is useful to obtain the progress of previous successful runs. +This is supported by two new APIs in the client library: +`HasLastCompletionResult` and `GetLastCompletionResult`. Below is an example of how +to use this in Go: + +```go +func CronWorkflow(ctx workflow.Context) (CronResult, error) { + startTimestamp := time.Time{} // By default start from 0 time. + if workflow.HasLastCompletionResult(ctx) { + var progress CronResult + if err := workflow.GetLastCompletionResult(ctx, &progress); err == nil { + startTimestamp = progress.LastSyncTimestamp + } + } + endTimestamp := workflow.Now(ctx) + + // Process work between startTimestamp (exclusive), endTimestamp (inclusive). + // Business logic implementation goes here. + + result := CronResult{LastSyncTimestamp: endTimestamp} + return result, nil +} +``` + +Note that this works even if one of the cron schedule runs failed. The +next schedule will still get the last successful result if it ever successfully +completed at least once. For example, for a daily cron workflow, if the first day +run succeeds and the second day fails, then the third day run will still get +the result from first day's run using these APIs. diff --git a/versioned_docs/version-0.25.0/go-error-handling.md b/versioned_docs/version-0.25.0/go-error-handling.md new file mode 100644 index 0000000000..e78ee8ff26 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-error-handling.md @@ -0,0 +1,60 @@ +--- +id: go-error-handling +title: Error Handling +--- + +An activity, or child workflow, might fail, and you could handle errors differently based on different +error cases. If the activity returns an error as `errors.New()` or `fmt.Errorf()`, those errors will +be converted to `*temporal.ApplicationError` and wrapped inside `*temporal.ActivityTaskError` or `*temporal.ChildWorkflowExecutionError`. If the activity returns an error as +`temporal.NewRetryableApplicationError("error message", details)`, that error will be returned as `*temporal.ApplicationError`. +There are other types of errors such as `*temporal.TimeoutError`, `*temporal.CanceledError` and +`*temporal.PanicError`. Following is an example of what your error code might look like: + +```go +err := workflow.ExecuteActivity(ctx, MyActivity, ...).Get(ctx, nil) +if err != nil { + var applicationErr *ApplicationError + if errors.As(err, &applicationError) { + // retrieve error message + fmt.Println(applicationError.Error()) + + // handle activity errors (created via NewApplicationError() API) + var detailMsg string // assuming activity return error by NewApplicationError("message", true, "string details") + applicationErr.Details(&detailMsg) // extract strong typed details + + // handle activity errors (errors created other than using NewApplicationError() API) + switch err.OriginalType() { + case "CustomErrTypeA": + // handle CustomErrTypeA + case CustomErrTypeB: + // handle CustomErrTypeB + default: + // newer version of activity could return new errors that workflow was not aware of. + } + } + + var canceledErr *CanceledError + if errors.As(err, &canceledErr) { + // handle cancellation + } + + var timeoutErr *TimeoutError + if errors.As(err, &timeoutErr) { + // handle timeout, could check timeout type by timeoutErr.TimeoutType() + switch err.TimeoutType() { + case commonpb.ScheduleToStart: + // Handle ScheduleToStart timeout. + case commonpb.StartToClose: + // Handle StartToClose timeout. + case commonpb.Heartbeat: + // Handle heartbeat timeout. + default: + } + } + + var panicErr *PanicError + if errors.As(err, &panicErr) { + // handle panic, message and stack trace are available by panicErr.Error() and panicErr.StackTrace() + } +} +``` diff --git a/versioned_docs/version-0.25.0/go-execute-activity.md b/versioned_docs/version-0.25.0/go-execute-activity.md new file mode 100644 index 0000000000..f9f64b2115 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-execute-activity.md @@ -0,0 +1,96 @@ +--- +id: go-execute-activity +title: Executing Activities +--- + +The primary responsibility of a workflow implementation is to schedule activities for execution. The +most straightforward way to do this is via the library method `workflow.ExecuteActivity`. The following +sample code demonstrates making this call: + +```go +ao := workflow.ActivityOptions{ + TaskList: "sampleTaskList", + ScheduleToCloseTimeout: time.Second * 60, + ScheduleToStartTimeout: time.Second * 60, + StartToCloseTimeout: time.Second * 60, + HeartbeatTimeout: time.Second * 10, + WaitForCancellation: false, +} +ctx = workflow.WithActivityOptions(ctx, ao) + +var result string +err := workflow.ExecuteActivity(ctx, SimpleActivity, value).Get(ctx, &result) +if err != nil { + return err +} +``` +Let's take a look at each component of this call. + +## Activity options + +Before calling `workflow.ExecuteActivity()`, you must configure `ActivityOptions` for the +invocation. These options customize various execution timeouts, and are passed in by creating a child +context from the initial context and overwriting the desired values. The child context is then passed +into the `workflow.ExecuteActivity()` call. If multiple activities are sharing the same option +values, then the same context instance can be used when calling `workflow.ExecuteActivity()`. + +## Activity timeouts + +There can be various kinds of timeouts associated with an activity. Temporal guarantees that activities +are executed *at most once*, so an activity either succeeds or fails with one of the following timeouts: + +Timeout | Description +--- | --- +`StartToCloseTimeout` | Maximum time that a worker can take to process a task after it has received the task. +`ScheduleToStartTimeout` | Time a task can wait to be picked up by an activity worker after a workflow schedules it. If there are no workers available to process this task for the specified duration, the task will time out. +`ScheduleToCloseTimeout` | Time a task can take to complete after it is scheduled by a workflow. This is usually greater than the sum of `StartToClose` and `ScheduleToStart` timeouts. +`HeartbeatTimeout` | If a task doesn't heartbeat to the Temporal service for this duration, it will be considered to have failed. This is useful for long-running tasks. + +## ExecuteActivity call + +The first parameter in the call is the required `workflow.Context` object. This type is a copy of +`context.Context` with the `Done()` method returning `workflow.Channel` instead of the native Go `chan`. + +The second parameter is the function that we registered as an activity function. This parameter can +also be a string representing the fully qualified name of the activity function. The benefit of passing +in the actual function object is that the framework can validate activity parameters. + +The remaining parameters are passed to the activity as part of the call. In our example, we have a +single parameter: `value`. This list of parameters must match the list of parameters declared by +the activity function. The Temporal client library will validate this. + +The method call returns immediately and returns a `workflow.Future`. This allows you to execute more +code without having to wait for the scheduled activity to complete. + +When you are ready to process the results of the activity, call the `Get()` method on the future +object returned. The parameters to this method are the `ctx` object we passed to the +`workflow.ExecuteActivity()` call and an output parameter that will receive the output of the +activity. The type of the output parameter must match the type of the return value declared by the +activity function. The `Get()` method will block until the activity completes and results are +available. + +You can retrieve the result value returned by `workflow.ExecuteActivity()` from the future and use +it like any normal result from a synchronous function call. The following sample code demonstrates how +you can use the result if it is a string value: + +```go +var result string +if err := future.Get(ctx, &result); err != nil { + return err +} + +switch result { +case "apple": + // Do something. +case "banana": + // Do something. +default: + return err +} +``` +In this example, we called the `Get()` method on the returned future immediately after `workflow.ExecuteActivity()`. +However, this is not necessary. If you want to execute multiple activities in parallel, you can +repeatedly call `workflow.ExecuteActivity()`, store the returned futures, and then wait for all +activities to complete by calling the `Get()` methods of the future at a later time. + +To implement more complex wait conditions on returned future objects, use the `workflow.Selector` class. diff --git a/versioned_docs/version-0.25.0/go-queries.md b/versioned_docs/version-0.25.0/go-queries.md new file mode 100644 index 0000000000..59c3588a8d --- /dev/null +++ b/versioned_docs/version-0.25.0/go-queries.md @@ -0,0 +1,92 @@ +--- +id: go-queries +title: Queries +--- + +If a workflow execution has been stuck at a state for longer than an expected period of time, you +might want to query the current call stack. You can use the Temporal CLI to perform this query. For +example: + +`tctl --namespace samples-namespace workflow query -w my_workflow_id -r my_run_id -qt __stack_trace` + +This command uses `__stack_trace`, which is a built-in query type supported by the Temporal client +library. You can add custom query types to handle queries such as querying the current state of a +workflow, or querying how many activities the workflow has completed. To do this, you need to set +up a query handler using `workflow.SetQueryHandler`. + +The handler must be a function that returns two values: +1. A serializable result +2. An error + +The handler function can receive any number of input parameters, but all input parameters must be +serializable. The following sample code sets up a query handler that handles the query type of +`current_state`: +```go +func MyWorkflow(ctx workflow.Context, input string) error { + currentState := "started" // This could be any serializable struct. + err := workflow.SetQueryHandler(ctx, "current_state", func() (string, error) { + return currentState, nil + }) + if err != nil { + currentState = "failed to register query handler" + return err + } + // Your normal workflow code begins here, and you update the currentState as the code makes progress. + currentState = "waiting timer" + err = NewTimer(ctx, time.Hour).Get(ctx, nil) + if err != nil { + currentState = "timer failed" + return err + } + + currentState = "waiting activity" + ctx = WithActivityOptions(ctx, myActivityOptions) + err = ExecuteActivity(ctx, MyActivity, "my_input").Get(ctx, nil) + if err != nil { + currentState = "activity failed" + return err + } + currentState = "done" + return nil +} +``` +You can now query `current_state` by using the CLI: + +`tctl --namespace samples-namespace workflow query -w my_workflow_id -r my_run_id -qt current_state` + +You can also issue a query from code using the `QueryWorkflow()` API on a Temporal client object. + +## Consistent Query + +Query has two consistency levels, eventual and strong. Consider if you were to signal a workflow and then +immediately query the workflow: + +`tctl --namespace samples-namespace workflow signal -w my_workflow_id -r my_run_id -n signal_name -if ./input.json` + +`tctl --namespace samples-namespace workflow query -w my_workflow_id -r my_run_id -qt current_state` + +In this example if signal were to change workflow state, query may or may not see that state update reflected +in the query result. This is what it means for query to be eventually consistent. + +Query has another consistency level called strong consistency. A strongly consistent query is guaranteed +to be based on workflow state which includes all events that came before the query was issued. An event +is considered to have come before a query if the call creating the external event returned success before +the query was issued. External events which are created while the query is outstanding may or may not +be reflected in the workflow state the query result is based on. + +In order to run consistent query through the cli do the following: + +`tctl --namespace samples-namespace workflow query -w my_workflow_id -r my_run_id -qt current_state --qcl strong` + +In order to run a query using the go client do the following: + +```go +resp, err := client.QueryWorkflowWithOptions(ctx, &client.QueryWorkflowWithOptionsRequest{ + WorkflowId: workflowId, + RunId: runId, + QueryType: queryType, + QueryConsistencyLevel: shared.QueryConsistencyLevelStrong.Ptr(), +}) +``` + +When using strongly consistent query you should expect higher latency than eventually consistent query. diff --git a/versioned_docs/version-0.25.0/go-quick-start.md b/versioned_docs/version-0.25.0/go-quick-start.md new file mode 100644 index 0000000000..028704ccc6 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-quick-start.md @@ -0,0 +1,207 @@ +--- +id: go-quick-start +title: Quick Start +--- + +This topic helps you install the Temporal server and implement a workflow. + +## Install Temporal Server Locally + +To run samples locally you need to run Temporal server locally using [instructions](/docs/installing-server). + +## Start with an empty directory + +Create directory for the project + +``` +mkdir tutorial-go-sdk +``` + +``` +cd tutorial-go-sdk +``` + +## Initialize Go Modules and SDK Package Dependency + +Initialize Go modules + +``` +> go mod init github.com/temporalio/tutorial-go-sdk +go: creating new go.mod: module github.com/temporalio/tutorial-go-sdk +``` + +Add dependency to Temporal Go SDK + +```bash +> go get go.temporal.io/temporal@v0.25.0 +go: downloading go.temporal.io/temporal v0.25.0 +go: go.temporal.io/temporal upgrade => v0.25.0 +``` + +## Implement Activities + +### Get User Activity + +Create file get_user.go + +```go +package main + +import ( + "context" + + "go.temporal.io/temporal/activity" +) + +// GetUser is the implementation for Temporal activity +func GetUser(ctx context.Context) (string, error) { + logger := activity.GetLogger(ctx) + logger.Info("GetUser activity called") + return "Temporal", nil +} +``` + +### Send Greeting Activity + +Create file send_greeting.go + +```go +package main + +import ( + "context" + "fmt" + + "go.temporal.io/temporal/activity" +) + +// SendGreeting is the implementation for Temporal activity +func SendGreeting(ctx context.Context, user string) error { + logger := activity.GetLogger(ctx) + logger.Info("SendGreeting activity called") + + fmt.Printf("Greeting sent to user: %v\n", user) + return nil +} +``` + +## Implement Greetings Workflow + +Create file greetings.go + +```go +package main + +import ( + "time" + + "go.temporal.io/temporal/workflow" + "go.uber.org/zap" +) + +// Greetings is the implementation for Temporal workflow +func Greetings(ctx workflow.Context) error { + logger := workflow.GetLogger(ctx) + logger.Info("Workflow Greetings started") + + ao := workflow.ActivityOptions{ + ScheduleToStartTimeout: time.Hour, + StartToCloseTimeout: time.Hour, + } + ctx = workflow.WithActivityOptions(ctx, ao) + + var user string + err := workflow.ExecuteActivity(ctx, GetUser).Get(ctx, &user) + if err != nil { + return err + } + + err = workflow.ExecuteActivity(ctx, SendGreeting, user).Get(ctx, nil) + if err != nil { + return err + } + + logger.Info("Greetings workflow complete", zap.String("user", user)) + return nil +} +``` + +## Host Workflows and Activities inside Worker + +Create file main.go + +```go +package main + +import ( + "go.uber.org/zap" + + "go.temporal.io/temporal/client" + "go.temporal.io/temporal/worker" +) + +func main() { + logger, err := zap.NewDevelopment() + if err != nil { + panic(err) + } + + logger.Info("Zap logger created") + + // The client is a heavyweight object that should be created once + serviceClient, err := client.NewClient(client.Options{ + Logger: logger, + }) + + if err != nil { + logger.Fatal("Unable to start worker", zap.Error(err)) + } + + worker := worker.New(serviceClient, "tutorial_tl", worker.Options{}) + + worker.RegisterWorkflow(Greetings) + worker.RegisterActivity(GetUser) + worker.RegisterActivity(SendGreeting) + + err = worker.Start() + if err != nil { + logger.Fatal("Unable to start worker", zap.Error(err)) + } + + select {} +} +``` + +## Start Worker + +Run your worker app which hosts workflow and activity implementations + +```bash +> go run *.go +2020-04-07T22:44:53.073-0700 INFO tutorial-go-sdk/main.go:19 Zap logger created +2020-04-07T22:44:53.111-0700 INFO internal/internal_worker.go:1021 Started Worker {"Namespace": "default", "TaskList": "tutorial_tl", "WorkerID": "59260@local@"} +``` + +## Start workflow execution + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 wf start --tl tutorial_tl -w Greet_Temporal_1 --wt Greetings --et 3600 --dt 10 +Started Workflow Id: Greet_Temporal_1, run Id: b4f8957a-565c-40ad-8495-15a41338f8f4 +``` + +## Workflow Completes Execution + +``` +2020-04-07T22:46:32.424-0700 INFO workflows/greetings.go:14 Workflow Greetings started {"Namespace": "default", "TaskList": "tutorial_tl", "WorkerID": "59260@local@", "WorkflowType": "Greetings", "WorkflowID": "Greet_Temporal_1", "RunID": "b4f8957a-565c-40ad-8495-15a41338f8f4"} +2020-04-07T22:46:32.424-0700 DEBUG internal/internal_event_handlers.go:466 ExecuteActivity {"Namespace": "default", "TaskList": "tutorial_tl", "WorkerID": "59260@local@", "WorkflowType": "Greetings", "WorkflowID": "Greet_Temporal_1", "RunID": "b4f8957a-565c-40ad-8495-15a41338f8f4", "ActivityID": "0", "ActivityType": "GetUser"} +2020-04-07T22:46:32.452-0700 INFO activities/get_user.go:12 GetUser activity called {"Namespace": "default", "TaskList": "tutorial_tl", "WorkerID": "59260@local@", "ActivityID": "0", "ActivityType": "GetUser", "WorkflowType": "Greetings", "WorkflowID": "Greet_Temporal_1", "RunID": "b4f8957a-565c-40ad-8495-15a41338f8f4"} +2020-04-07T22:46:32.485-0700 DEBUG internal/internal_event_handlers.go:466 ExecuteActivity {"Namespace": "default", "TaskList": "tutorial_tl", "WorkerID": "59260@local@", "WorkflowType": "Greetings", "WorkflowID": "Greet_Temporal_1", "RunID": "b4f8957a-565c-40ad-8495-15a41338f8f4", "ActivityID": "1", "ActivityType": "SendGreeting"} +2020-04-07T22:46:32.505-0700 INFO activities/send_greeting.go:13 SendGreeting activity called {"Namespace": "default", "TaskList": "tutorial_tl", "WorkerID": "59260@local@", "ActivityID": "1", "ActivityType": "SendGreeting", "WorkflowType": "Greetings", "WorkflowID": "Greet_Temporal_1", "RunID": "b4f8957a-565c-40ad-8495-15a41338f8f4"} +Greeting sent to user: Temporal +2020-04-07T22:46:32.523-0700 INFO workflows/greetings.go:33 Greetings workflow complete {"Namespace": "default", "TaskList": "tutorial_tl", "WorkerID": "59260@local@", "WorkflowType": "Greetings", "WorkflowID": "Greet_Temporal_1", "RunID": "b4f8957a-565c-40ad-8495-15a41338f8f4", "user": "Temporal"} +``` + +## Try Go SDK Samples + +Check [Go SDK Samples](https://github.com/temporalio/temporal-go-samples) +and try simple Temporal usage scenario. diff --git a/versioned_docs/version-0.25.0/go-retries.md b/versioned_docs/version-0.25.0/go-retries.md new file mode 100644 index 0000000000..211cb92e45 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-retries.md @@ -0,0 +1,99 @@ +--- +id: go-retries +title: Activity and Workflow Retries +--- + +Activities and workflows can fail due to various intermediate conditions. In those cases, we want +to retry the failed activity or child workflow or even the parent workflow. This can be achieved +by supplying an optional retry policy. A retry policy looks like the following: + +``` go +// RetryPolicy defines the retry policy. +RetryPolicy struct { + // Backoff interval for the first retry. If coefficient is 1.0 then it is used for all retries. + // Required, no default value. + InitialInterval time.Duration + + // Coefficient used to calculate the next retry backoff interval. + // The next retry interval is previous interval multiplied by this coefficient. + // Must be 1 or larger. Default is 2.0. + BackoffCoefficient float64 + + // Maximum backoff interval between retries. Exponential backoff leads to interval increase. + // This value is the cap of the interval. Default is 100x of initial interval. + MaximumInterval time.Duration + + // Maximum number of attempts. When exceeded the retries stop even if not expired yet. + // If not set or set to 0, it means unlimited + MaximumAttempts int32 + + // Non-Retriable errors. This is optional. Temporal server will stop retry if error type matches this list. + // Note: + // - cancellation is not a failure, so it won't be retried, + // - only StartToClose or Heartbeat timeouts are retryable. + NonRetryableErrorTypes []string +} +``` + +To enable retry, supply a custom retry policy to `ActivityOptions` or `ChildWorkflowOptions` +when you execute them. + +``` go +expiration := time.Minute * 10 +retryPolicy := &workflow.RetryPolicy{ + InitialInterval: time.Second, + BackoffCoefficient: 2, + MaximumInterval: expiration, + MaximumAttempts: 5, +} +ao := workflow.ActivityOptions{ + ScheduleToStartTimeout: expiration, + StartToCloseTimeout: expiration, + HeartbeatTimeout: time.Second * 30, + RetryPolicy: retryPolicy, // Enable retry. +} +ctx = workflow.WithActivityOptions(ctx, ao) +activityFuture := workflow.ExecuteActivity(ctx, SampleActivity, params) +``` + +If activity heartbeat its progress before it failed, the retry attempt will contain the progress +so activity implementation could resume from failed progress like: + +``` go +func SampleActivity(ctx context.Context, inputArg InputParams) error { + startIdx := inputArg.StartIndex + if activity.HasHeartbeatDetails(ctx) { + // Recover from finished progress. + var finishedIndex int + if err := activity.GetHeartbeatDetails(ctx, &finishedIndex); err == nil { + startIdx = finishedIndex + 1 // Start from next one. + } + } + + // Normal activity logic... + for i:=startIdx; i + +Source code: + +```go +package activities + +import ( + "context" + + "go.temporal.io/temporal/activity" +) + +// GetUser is the implementation for Temporal activity +func GetUser(ctx context.Context) (string, error) { + logger := activity.GetLogger(ctx) + logger.Info("GetUser activity called") + return "Temporal", nil +} +``` + +```go +package activities + +import ( + "context" + "fmt" + + "go.temporal.io/temporal/activity" +) + +// SendGreeting is the implementation for Temporal activity +func SendGreeting(ctx context.Context, user string) error { + logger := activity.GetLogger(ctx) + logger.Info("SendGreeting activity called") + + fmt.Printf("Greeting sent to user: %v\n", user) + return nil +} +``` + +```go +package workflows + +import ( + "time" + + "github.com/samarabbas/tutorial-go-sdk/activities" + "go.temporal.io/temporal/workflow" + "go.uber.org/zap" +) + +// Greetings is the implementation for Temporal workflow +func Greetings(ctx workflow.Context) error { + logger := workflow.GetLogger(ctx) + logger.Info("Workflow Greetings started") + + ao := workflow.ActivityOptions{ + ScheduleToStartTimeout: time.Hour, + StartToCloseTimeout: time.Hour, + } + ctx = workflow.WithActivityOptions(ctx, ao) + + var user string + err := workflow.ExecuteActivity(ctx, activities.GetUser).Get(ctx, &user) + if err != nil { + return err + } + + err = workflow.ExecuteActivity(ctx, activities.SendGreeting, user).Get(ctx, nil) + if err != nil { + return err + } + + logger.Info("Greetings workflow complete", zap.String("user", user)) + return nil +} +``` + +```go +package main + +import ( + "github.com/uber-go/tally" + "go.uber.org/zap" + + "github.com/samarabbas/tutorial-go-sdk/activities" + "github.com/samarabbas/tutorial-go-sdk/workflows" + + "go.temporal.io/temporal/client" + "go.temporal.io/temporal/worker" +) + +func main() { + logger, err := zap.NewDevelopment() + if err != nil { + panic(err) + } + + logger.Info("Zap logger created") + scope := tally.NoopScope + + // The client is a heavyweight object that should be created once + serviceClient, err := client.NewClient(client.Options{ + HostPort: client.DefaultHostPort, + Namespace: client.DefaultNamespace, + MetricsScope: scope, + }) + if err != nil { + logger.Fatal("Unable to start worker", zap.Error(err)) + } + + worker := worker.New(serviceClient, "tutorial_tl", worker.Options{ + Logger: logger, + }) + + worker.RegisterWorkflow(workflows.Greetings) + worker.RegisterActivity(activities.GetUser) + worker.RegisterActivity(activities.SendGreeting) + + err = worker.Start() + if err != nil { + logger.Fatal("Unable to start worker", zap.Error(err)) + } + + select {} +} +``` + +Commands: + +```bash +docker run --network=host --rm temporalio/tctl:0.25.0 wf start --tl tutorial_tl -w Greet_Temporal_1 --wt Greetings --et 3600 --dt 10 +``` diff --git a/versioned_docs/version-0.25.0/go-sessions.md b/versioned_docs/version-0.25.0/go-sessions.md new file mode 100644 index 0000000000..977cf69892 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-sessions.md @@ -0,0 +1,139 @@ +--- +id: go-sessions +title: Sessions +--- + +The session framework provides a straightforward interface for scheduling multiple activities on a single worker without requiring you to manually specify the task list name. It also includes features like **concurrent session limitation** and **worker failure detection**. + +## Use Cases + +- **File Processing**: You may want to implement a workflow that can download a file, process it, and then upload the modified version. If these three steps are implemented as three different activities, all of them should be executed by the same worker. + +- **Machine Learning Model Training**: Training a machine learning model typically involves three stages: download the data set, optimize the model, and upload the trained parameter. Since the models may consume a large amount of resources (GPU memory for example), the number of models processed on a host needs to be limited. + +## Basic Usage + +Before using the session framework to write your workflow code, you need to configure your worker to process sessions. To do that, set the `EnableSessionWorker` field of `worker.Options` to `true` when starting your worker. + +The most important APIs provided by the session framework are `workflow.CreateSession()` and `workflow.CompleteSession()`. The basic idea is that all the activities executed within a session will be processed by the same worker and these two APIs allow you to create new sessions and close them after all activities finish executing. + +Here's a more detailed description of these two APIs: +```go +type SessionOptions struct { + // ExecutionTimeout: required, no default. + // Specifies the maximum amount of time the session can run. + ExecutionTimeout time.Duration + + // CreationTimeout: required, no default. + // Specifies how long session creation can take before returning an error. + CreationTimeout time.Duration +} + +func CreateSession(ctx Context, sessionOptions *SessionOptions) (Context, error) +``` + +`CreateSession()` takes in `workflow.Context`, `sessionOptions` and returns a new context which contains metadata information of the created session (referred to as the **session context** below). When it's called, it will check the task list name specified in the `ActivityOptions` (or in the `StartWorkflowOptions` if the task list name is not specified in `ActivityOptions`), and create the session on one of the workers which is polling that task list. + +The returned session context should be used to execute all activities belonging to the session. The context will be cancelled if the worker executing this session dies or `CompleteSession()` is called. When using the returned session context to execute activities, a `workflow.ErrSessionFailed` error may be returned if the session framework detects that the worker executing this session has died. The failure of your activities won't affect the state of the session, so you still need to handle the errors returned from your activities and call `CompleteSession()` if necessary. + +`CreateSession()` will return an error if the context passed in already contains an open session. If all the workers are currently busy and unable to handle new sessions, the framework will keep retrying until the `CreationTimeout` you specified in `SessionOptions` has passed before returning an error (check the **Concurrent Session Limitation** section for more details). + +```go +func CompleteSession(ctx Context) +``` + +`CompleteSession()` releases the resources reserved on the worker, so it's important to call it as soon as you no longer need the session. It will cancel the session context and therefore all the activities using that session context. Note that it's safe to call `CompleteSession()` on a failed session, meaning that you can call it from a `defer` function after the session is successfully created. + +### Sample Code + +```go +func FileProcessingWorkflow(ctx workflow.Context, fileID string) (err error) { + ao := workflow.ActivityOptions{ + ScheduleToStartTimeout: time.Second * 5, + StartToCloseTimeout: time.Minute, + } + ctx = workflow.WithActivityOptions(ctx, ao) + + so := &workflow.SessionOptions{ + CreationTimeout: time.Minute, + ExecutionTimeout: time.Minute, + } + sessionCtx, err := workflow.CreateSession(ctx, so) + if err != nil { + return err + } + defer workflow.CompleteSession(sessionCtx) + + var fInfo *fileInfo + err = workflow.ExecuteActivity(sessionCtx, downloadFileActivityName, fileID).Get(sessionCtx, &fInfo) + if err != nil { + return err + } + + var fInfoProcessed *fileInfo + err = workflow.ExecuteActivity(sessionCtx, processFileActivityName, *fInfo).Get(sessionCtx, &fInfoProcessed) + if err != nil { + return err + } + + return workflow.ExecuteActivity(sessionCtx, uploadFileActivityName, *fInfoProcessed).Get(sessionCtx, nil) +} +``` + +## Session Metadata + +```go +type SessionInfo struct { + // A unique Id for the session + SessionID string + + // The hostname of the worker that is executing the session + HostName string + + // ... other unexported fields +} + +func GetSessionInfo(ctx Context) *SessionInfo +``` + +The session context also stores some session metadata, which can be retrieved by the `GetSessionInfo()` API. If the context passed in doesn't contain any session metadata, this API will return a `nil` pointer. + +## Concurrent Session Limitation + +To limit the number of concurrent sessions running on a worker, set the `MaxConcurrentSessionExecutionSize` field of `worker.Options` to the desired value. By default this field is set to a very large value, so there's no need to manually set it if no limitation is needed. + +If a worker hits this limitation, it won't accept any new `CreateSession()` requests until one of the existing sessions is completed. `CreateSession()` will return an error if the session can't be created within `CreationTimeout`. + +## Recreate Session + +For long-running sessions, you may want to use the `ContinueAsNew` feature to split the workflow into multiple runs when all activities need to be executed by the same worker. The `RecreateSession()` API is designed for such a use case. + +```go +func RecreateSession(ctx Context, recreateToken []byte, sessionOptions *SessionOptions) (Context, error) +``` + +Its usage is the same as `CreateSession()` except that it also takes in a `recreateToken`, which is needed to create a new session on the same worker as the previous one. You can get the token by calling the `GetRecreateToken()` method of the `SessionInfo` object. + +```go +token := workflow.GetSessionInfo(sessionCtx).GetRecreateToken() +``` + +## Q & A + +### Is there a complete example? +Yes, the [file processing example](https://github.com/temporalio/temporal-go-samples/blob/master/cmd/samples/fileprocessing/workflow.go) in the temporal-go-samples repo has been updated to use the session framework. + +### What happens to my activity if the worker dies? +If your activity has already been scheduled, it will be cancelled. If not, you will get a `workflow.ErrSessionFailed` error when you call `workflow.ExecuteActivity()`. + +### Is the concurrent session limitation per process or per host? +It's per worker process, so make sure there's only one worker process running on the host if you plan to use that feature. + + +## Future Work + +* +Right now a session is considered failed if the worker process dies. However, for some use cases, you may only care whether worker host is alive or not. For these uses cases, the session should be automatically re-established if the worker process is restarted. + +* +The current implementation assumes that all sessions are consuming the same type of resource and there's only one global limitation. Our plan is to allow you to specify what type of resource your session will consume and enforce different limitations on different types of resources. diff --git a/versioned_docs/version-0.25.0/go-side-effect.md b/versioned_docs/version-0.25.0/go-side-effect.md new file mode 100644 index 0000000000..ad48c4b797 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-side-effect.md @@ -0,0 +1,32 @@ +--- +id: go-side-effect +title: SideEffect +--- + +`workflow.SideEffect` is useful for short, nondeterministic code snippets, such as getting a random +value or generating a UUID. It executes the provided function once and records its result into the +workflow history. `workflow.SideEffect` does not re-execute upon replay, but instead returns the +recorded result. It can be seen as an "inline" activity. Something to note about `workflow.SideEffect` +is that, unlike the Temporal guarantee of at-most-once execution for activities, there is no such +guarantee with `workflow.SideEffect`. Under certain failure conditions, `workflow.SideEffect` can +end up executing a function more than once. + +The only way to fail `SideEffect` is to panic, which causes a decision task failure. After the +timeout, Temporal reschedules and then re-executes the decision task, giving `SideEffect` another chance +to succeed. Do not return any data from `SideEffect` other than through its recorded return value. + +The following sample demonstrates how to use `SideEffect`: + +```go +encodedRandom := SideEffect(func(ctx workflow.Context) interface{} { + return rand.Intn(100) +}) + +var random int +encodedRandom.Get(&random) +if random < 50 { + .... +} else { + .... +} +``` diff --git a/versioned_docs/version-0.25.0/go-signals.md b/versioned_docs/version-0.25.0/go-signals.md new file mode 100644 index 0000000000..7e1691a04a --- /dev/null +++ b/versioned_docs/version-0.25.0/go-signals.md @@ -0,0 +1,53 @@ +--- +id: go-signals +title: Signals +--- + +**Signals** provide a mechanism to send data directly to a running workflow. Previously, you had +two options for passing data to the workflow implementation: + +* Via start parameters +* As return values from activities + +With start parameters, we could only pass in values before workflow execution began. + +Return values from activities allowed us to pass information to a running workflow, but this +approach comes with its own complications. One major drawback is reliance on polling. This means +that the data needs to be stored in a third-party location until it's ready to be picked up by +the activity. Further, the lifecycle of this activity requires management, and the activity +requires manual restart if it fails before acquiring the data. + +**Signals**, on the other hand, provide a fully asynchronous and durable mechanism for providing data to +a running workflow. When a signal is received for a running workflow, Temporal persists the event +and the payload in the workflow history. The workflow can then process the signal at any time +afterwards without the risk of losing the information. The workflow also has the option to stop +execution by blocking on a **signal channel**. + +```go +var signalVal string +signalChan := workflow.GetSignalChannel(ctx, signalName) + +s := workflow.NewSelector(ctx) +s.AddReceive(signalChan, func(c workflow.Channel, more bool) { + c.Receive(ctx, &signalVal) + workflow.GetLogger(ctx).Info("Received signal!", zap.String("signal", signalName), zap.String("value", signalVal)) +}) +s.Select(ctx) + +if len(signalVal) > 0 && signalVal != "SOME_VALUE" { + return errors.New("signalVal") +} +``` + +In the example above, the workflow code uses **workflow.GetSignalChannel** to open a +**workflow.Channel** for the named signal. We then use a **workflow.Selector** to wait on this +channel and process the payload received with the signal. + +## SignalWithStart + +You may not know if a workflow is running and can accept a signal. The +[client.SignalWithStartWorkflow](https://pkg.go.dev/go.temporal.io/temporal/client#Client) API +[client.SignalWithStartWorkflow](https://pkg.go.dev/go.temporal.io/temporal/client#Client) API +allows you to send a signal to the current workflow instance if one exists or to create a new +run and then send the signal. `SignalWithStartWorkflow` therefore doesn't take a run Id as a +parameter. \ No newline at end of file diff --git a/versioned_docs/version-0.25.0/go-tracing.md b/versioned_docs/version-0.25.0/go-tracing.md new file mode 100644 index 0000000000..4713eb3913 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-tracing.md @@ -0,0 +1,90 @@ +--- +id: go-tracing +title: Tracing and Context Propagation +--- + +## Tracing + +The Go client provides distributed tracing support through [OpenTracing](https://opentracing.io/). Tracing can be +configured by providing an [opentracing.Tracer](https://pkg.go.dev/github.com/opentracing/opentracing-go#Tracer) +implementation in [ClientOptions](https://pkg.go.dev/go.temporal.io/temporal/internal#ClientOptions) +and [WorkerOptions](https://pkg.go.dev/go.temporal.io/temporal/internal#WorkerOptions) during client and worker instantiation, +respectively. Tracing allows +you to view the call graph of a workflow along with its activities, child workflows etc. For more details on how to +configure and leverage tracing, see the [OpenTracing documentation](https://opentracing.io/docs/getting-started/). +The OpenTracing support has been validated using [Jaeger](https://www.jaegertracing.io/), but other implementations +mentioned [here](https://opentracing.io/docs/supported-tracers/) should also work. Tracing support utilizes generic context +propagation support provided by the client. + +## Context Propagation + +We provide a standard way to propagate custom context across a workflow. +[ClientOptions](https://pkg.go.dev/go.temporal.io/temporal/internal#ClientOptions) and [WorkerOptions](https://pkg.go.dev/go.temporal.io/temporal/internal#WorkerOptions) +allow configuring a context propagator. The context propagator extracts and passes on information present in the `context.Context` +and `workflow.Context` objects across the workflow. Once a context propagator is configured, you should be able to access the required values +in the context objects as you would normally do in Go. +For a sample, the Go client implements a [tracing context propagator](https://github.com/temporalio/temporal-go-sdk/blob/master/internal/tracer.go). + +### Server-Side Headers Support + +On the server side, Temporal provides a mechanism to propagate what it calls headers across different workflow +transitions. + +```proto +message Header { + map fields = 1; +} +``` + +The client leverages this to pass around selected context information. [HeaderReader](https://pkg.go.dev/go.temporal.io/temporal/internal#HeaderReader) +and [HeaderWriter](https://pkg.go.dev/go.temporal.io/temporal/internal#HeaderWriter) are interfaces +that allow reading and writing to the Temporal server headers. The client already provides [implementations](https://github.com/temporalio/temporal-go-sdk/blob/master/internal/headers.go) +for these. `HeaderWriter` sets a field in the header. Headers is a map, so setting a value for the the same key +multiple times will overwrite the previous values. `HeaderReader` iterates through the headers map and runs the +provided handler function on each key/value pair, allowing you to deal with the fields you are interested in. + +```go +type HeaderWriter interface { + Set(string, []byte) +} + +type HeaderReader interface { + ForEachKey(handler func(string, []byte) error) error +} +``` + +### Context Propagators + +Context propagators require implementing the following four methods to propagate selected context across a workflow: + +- `Inject` is meant to pick out the context keys of interest from a Go [context.Context](https://golang.org/pkg/context/#Context) object and write that into the +headers using the [HeaderWriter](https://pkg.go.dev/go.temporal.io/temporal/internal#HeaderWriter) interface +- `InjectFromWorkflow` is the same as above, but operates on a [workflow.Context](https://pkg.go.dev/go.temporal.io/temporal/internal#Context) object +- `Extract` reads the headers and places the information of interest back into the [context.Context](https://golang.org/pkg/context/#Context) object +- `ExtractToWorkflow` is the same as above, but operates on a [workflow.Context](https://pkg.go.dev/go.temporal.io/temporal/internal#Context) object + +The [tracing context propagator](https://github.com/temporalio/temporal-go-sdk/blob/master/internal/tracer.go) +shows a sample implementation of context propagation. + +```go +type ContextPropagator interface { + Inject(context.Context, HeaderWriter) error + + Extract(context.Context, HeaderReader) (context.Context, error) + + InjectFromWorkflow(Context, HeaderWriter) error + + ExtractToWorkflow(Context, HeaderReader) (Context, error) +} +``` + +## Q & A + +### Is there a complete example? + +The [context propagation sample](https://github.com/temporalio/temporal-go-samples/blob/master/ctxpropagation/workflow.go) +configures a custom context propagator and shows context propagation of custom keys across a workflow and an activity. + +### Can I configure multiple context propagators? + +Yes, we recommended that you configure multiple context propagators with each propagator meant to propagate a particular type of context. diff --git a/versioned_docs/version-0.25.0/go-versioning.md b/versioned_docs/version-0.25.0/go-versioning.md new file mode 100644 index 0000000000..4af9220d0d --- /dev/null +++ b/versioned_docs/version-0.25.0/go-versioning.md @@ -0,0 +1,157 @@ +--- +id: go-versioning +title: Versioning +--- + +The definition code of a Temporal workflow must be deterministic because Temporal uses event sourcing +to reconstruct the workflow state by replaying the saved history event data on the workflow +definition code. This means that any incompatible update to the workflow definition code could cause +a non-deterministic issue if not handled correctly. + +## workflow.GetVersion() + +Consider the following workflow definition: + +```go +func MyWorkflow(ctx workflow.Context, data string) (string, error) { + ao := workflow.ActivityOptions{ + ScheduleToStartTimeout: time.Minute, + StartToCloseTimeout: time.Minute, + } + ctx = workflow.WithActivityOptions(ctx, ao) + var result1 string + err := workflow.ExecuteActivity(ctx, ActivityA, data).Get(ctx, &result1) + if err != nil { + return "", err + } + var result2 string + err = workflow.ExecuteActivity(ctx, ActivityB, result1).Get(ctx, &result2) + return result2, err +} +``` +Now let's say we have replaced ActivityA with ActivityC, and deployed the updated code. If there +is an existing workflow execution that was started by the original version of the workflow code, where +ActivityA had already completed and the result was recorded to history, the new version of the workflow +code will pick up that workflow execution and try to resume from there. However, the workflow will fail +because the new code expects a result for ActivityC from the history data, but instead it gets the +result for ActivityA. This causes the workflow to fail on the non-deterministic error. + +Thus we use `workflow.GetVersion().` + +```go +var err error +v := workflow.GetVersion(ctx, "Step1", workflow.DefaultVersion, 1) +if v == workflow.DefaultVersion { + err = workflow.ExecuteActivity(ctx, ActivityA, data).Get(ctx, &result1) +} else { + err = workflow.ExecuteActivity(ctx, ActivityC, data).Get(ctx, &result1) +} +if err != nil { + return "", err +} + +var result2 string +err = workflow.ExecuteActivity(ctx, ActivityB, result1).Get(ctx, &result2) +return result2, err +``` +When `workflow.GetVersion()` is run for the new workflow execution, it records a marker in the workflow +history so that all future calls to `GetVersion` for this change Id--`Step 1` in the example--on this +workflow execution will always return the given version number, which is `1` in the example. + +If you make an additional change, such as replacing ActivityC with ActivityD, you need to +add some additional code: + +```go +v := workflow.GetVersion(ctx, "Step1", workflow.DefaultVersion, 2) +if v == workflow.DefaultVersion { + err = workflow.ExecuteActivity(ctx, ActivityA, data).Get(ctx, &result1) +} else if v == 1 { + err = workflow.ExecuteActivity(ctx, ActivityC, data).Get(ctx, &result1) +} else { + err = workflow.ExecuteActivity(ctx, ActivityD, data).Get(ctx, &result1) +} +``` +Note that we have changed `maxSupported` from 1 to 2. A workflow that had already passed this +`GetVersion()` call before it was introduced will return `DefaultVersion`. A workflow that was run +with `maxSupported` set to 1, will return 1. New workflows will return 2. + +After you are sure that all of the workflow executions prior to version 1 have completed, you can +remove the code for that version. It should now look like the following: + +```go +v := workflow.GetVersion(ctx, "Step1", 1, 2) +if v == 1 { + err = workflow.ExecuteActivity(ctx, ActivityC, data).Get(ctx, &result1) +} else { + err = workflow.ExecuteActivity(ctx, ActivityD, data).Get(ctx, &result1) +} +``` +You'll note that `minSupported` has changed from `DefaultVersion` to `1`. If an older version of the +workflow execution history is replayed on this code, it will fail because the minimum expected version +is 1. After you are sure that all of the workflow executions for version 1 have completed, then you +can remove 1 so that your code would look like the following: + +```go +_ := workflow.GetVersion(ctx, "Step1", 2, 2) +err = workflow.ExecuteActivity(ctx, ActivityD, data).Get(ctx, &result1) +``` +Note that we have preserved the call to `GetVersion()`. There are two reasons to preserve this call: + +1. This ensures that if there is a workflow execution still running for an older version, it will +fail here and not proceed. +2. If you need to make additional changes for `Step1`, such as changing ActivityD to ActivityE, you +only need to update `maxVersion` from 2 to 3 and branch from there. + +You only need to preserve the first call to `GetVersion()` for each `changeID`. All subsequent calls to +`GetVersion()` with the same change Id are safe to remove. If necessary, you can remove the first +`GetVersion()` call, but you need to ensure the following: + +* All executions with an older version are completed. +* You can no longer use `Step1` for the changeId. If you need to make changes to that same part in +the future, such as change from ActivityD to ActivityE, you would need to use a different changeId +like `Step1-fix2`, and start minVersion from DefaultVersion again. The code would look like the +following: + +```go +v := workflow.GetVersion(ctx, "Step1-fix2", workflow.DefaultVersion, 1) +if v == workflow.DefaultVersion { + err = workflow.ExecuteActivity(ctx, ActivityD, data).Get(ctx, &result1) +} else { + err = workflow.ExecuteActivity(ctx, ActivityE, data).Get(ctx, &result1) +} +``` +Upgrading a workflow is straightforward if you don't need to preserve your currently running +workflow executions. You can simply terminate all of the currently running workflow executions and +suspend new ones from being created while you deploy the new version of your workflow code, which does +not use `GetVersion()`, and then resume workflow creation. However, that is often not the case, and +you need to take care of the currently running workflow executions, so using `GetVersion()` to update +your code is the method to use. + +However, if you want your currently running workflows to proceed based on the current workflow logic, +but you want to ensure new workflows are running on new logic, you can define your workflow as a +new `WorkflowType`, and change your start path (calls to `StartWorkflow()`) to start the new workflow +type. + +## Sanity checking + +The Temporal client SDK performs a sanity check to help prevent obvious incompatible changes. +The sanity check verifies whether a decision made in replay matches the event recorded in history, +in the same order. The decision is generated by calling any of the following methods: + +* workflow.ExecuteActivity() +* workflow.ExecuteChildWorkflow() +* workflow.NewTimer() +* workflow.Sleep() +* workflow.SideEffect() +* workflow.RequestCancelWorkflow() +* workflow.SignalExternalWorkflow() + +Adding, removing, or reordering any of the above methods triggers the sanity check and results in +a non-deterministic error. + +The sanity check does not perform a thorough check. For example, it does not check on the activity's +input arguments or the timer duration. If the check is enforced on every property, then it becomes +too restricted and harder to maintain the workflow code. For example, if you move your activity code +from one package to another package, that changes the `ActivityType`, which technically becomes a different +activity. But, we don't want to fail on that change, so we only check the function name part of the +`ActivityType`. diff --git a/versioned_docs/version-0.25.0/go-workers.md b/versioned_docs/version-0.25.0/go-workers.md new file mode 100644 index 0000000000..3963fa23a2 --- /dev/null +++ b/versioned_docs/version-0.25.0/go-workers.md @@ -0,0 +1,63 @@ +--- +id: go-workers +title: Worker Service +--- + +A worker or *worker service* is a service that hosts the workflow and activity implementations. The worker polls the *Temporal service* for tasks, performs those tasks, and communicates task execution results back to the *Temporal service*. Worker services are developed, deployed, and operated by Temporal customers. + +You can run a Temporal worker in a new or an existing service. Use the framework APIs to start the Temporal worker and link in all activity and workflow implementations that you require the service to execute. + +```go +package main + +import ( + "os" + "os/signal" + + "github.com/uber-go/tally" + "go.uber.org/zap" + + "go.temporal.io/temporal/client" + "go.temporal.io/temporal/worker" + "go.temporal.io/temporal/workflow" +) + +var ( + Tasklist = "samples_tl" +) + +func main() { + logger, err := zap.NewDevelopment() + if err != nil { + panic(err) + } + + // The client and worker are heavyweight objects that should be created once per process. + serviceClient, err := client.NewClient(client.Options{ + HostPort: client.DefaultHostPort, + Logger: logger, + }) + if err != nil { + logger.Fatal("Unable to create client", zap.Error(err)) + } + defer serviceClient.Close() + + worker := worker.New(serviceClient, Tasklist, worker.Options{}) + + worker.RegisterWorkflow(MyWorkflow) + worker.RegisterActivity(MyActivity) + + err = worker.Start() + if err != nil { + logger.Fatal("Unable to start worker", zap.Error(err)) + } +} + +func MyWorkflow(context workflow.Context) error { + return nil +} + +func MyActivity() error { + return nil +} +``` diff --git a/versioned_docs/version-0.25.0/go-workflow-testing.md b/versioned_docs/version-0.25.0/go-workflow-testing.md new file mode 100644 index 0000000000..c1b3cad54c --- /dev/null +++ b/versioned_docs/version-0.25.0/go-workflow-testing.md @@ -0,0 +1,181 @@ +--- +id: go-workflow-testing +title: Testing +--- + +The Temporal Go client library provides a test framework to facilitate testing workflow implementations. +The framework is suited for implementing unit tests as well as functional tests of the workflow logic. + +The following code implements unit tests for the `SimpleWorkflow` sample: + +```go +package sample + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + + "go.temporal.io/temporal/activity" + "go.temporal.io/temporal/testsuite" +) + +type UnitTestSuite struct { + suite.Suite + testsuite.WorkflowTestSuite + + env *testsuite.TestWorkflowEnvironment +} + +func (s *UnitTestSuite) SetupTest() { + s.env = s.NewTestWorkflowEnvironment() +} + +func (s *UnitTestSuite) AfterTest(suiteName, testName string) { + s.env.AssertExpectations(s.T()) +} + +func (s *UnitTestSuite) Test_SimpleWorkflow_Success() { + s.env.ExecuteWorkflow(SimpleWorkflow, "test_success") + + s.True(s.env.IsWorkflowCompleted()) + s.NoError(s.env.GetWorkflowError()) +} + +func (s *UnitTestSuite) Test_SimpleWorkflow_ActivityParamCorrect() { + s.env.OnActivity(SimpleActivity, mock.Anything, mock.Anything).Return( + func(ctx context.Context, value string) (string, error) { + s.Equal("test_success", value) + return value, nil + }) + s.env.ExecuteWorkflow(SimpleWorkflow, "test_success") + + s.True(s.env.IsWorkflowCompleted()) + s.NoError(s.env.GetWorkflowError()) +} + +func (s *UnitTestSuite) Test_SimpleWorkflow_ActivityFails() { + s.env.OnActivity(SimpleActivity, mock.Anything, mock.Anything).Return( + "", errors.New("SimpleActivityFailure")) + s.env.ExecuteWorkflow(SimpleWorkflow, "test_failure") + + s.True(s.env.IsWorkflowCompleted()) + + s.NotNil(s.env.GetWorkflowError()) + _, ok := s.env.GetWorkflowError().(*cadence.GenericError) + s.True(ok) + s.Equal("SimpleActivityFailure", s.env.GetWorkflowError().Error()) +} + +func TestUnitTestSuite(t *testing.T) { + suite.Run(t, new(UnitTestSuite)) +} +``` + +## Setup + +To run unit tests, we first define a "test suite" struct that absorbs both the +basic suite functionality from [testify](https://pkg.go.dev/github.com/stretchr/testify/suite) +via `suite.Suite` and the suite functionality from the Temporal test framework via +`testsuite.WorkflowTestSuite`. Because every test in this test suite will test our workflow, we +add a property to our struct to hold an instance of the test environment. This allows us to initialize +the test environment in a setup method. For testing workflows, we use a `testsuite.TestWorkflowEnvironment`. + +Next, we implement a `SetupTest` method to setup a new test environment before each test. Doing so +ensures that each test runs in its own isolated sandbox. We also implement an `AfterTest` function +where we assert that all mocks we set up were indeed called by invoking `s.env.AssertExpectations(s.T())`. + +Finally, we create a regular test function recognized by "go test" and pass the struct to `suite.Run`. + +## A Simple Test + +The most simple test case we can write is to have the test environment execute the workflow and then +evaluate the results. + +```go +func (s *UnitTestSuite) Test_SimpleWorkflow_Success() { + s.env.ExecuteWorkflow(SimpleWorkflow, "test_success") + + s.True(s.env.IsWorkflowCompleted()) + s.NoError(s.env.GetWorkflowError()) +} +``` +Calling `s.env.ExecuteWorkflow(...)` executes the workflow logic and any invoked activities inside the +test process. The first parameter of `s.env.ExecuteWorkflow(...)` contains the workflow functions, +and any subsequent parameters contain values for custom input parameters declared by the workflow +function. + +> Note that unless the activity invocations are mocked or activity implementation +> replaced (see [Activity mocking and overriding](#activity-mocking-and-overriding)), the test environment +> will execute the actual activity code including any calls to outside services. + +After executing the workflow in the above example, we assert that the workflow ran through completion +via the call to `s.env.IsWorkflowComplete()`. We also assert that no errors were returned by asserting +on the return value of `s.env.GetWorkflowError()`. If our workflow returned a value, we could have +retrieved that value via a call to `s.env.GetWorkflowResult(&value)` and had additional asserts on that +value. + +## Activity mocking and overriding + +When running unit tests on workflows, we want to test the workflow logic in isolation. Additionally, +we want to inject activity errors during our test runs. The test framework provides two mechanisms +that support these scenarios: activity mocking and activity overriding. Both of these mechanisms allow +you to change the behavior of activities invoked by your workflow without the need to modify the actual +workflow code. + +Let's take a look at a test that simulates a test that fails via the "activity mocking" mechanism. + +```go +func (s *UnitTestSuite) Test_SimpleWorkflow_ActivityFails() { + s.env.OnActivity(SimpleActivity, mock.Anything, mock.Anything).Return( + "", errors.New("SimpleActivityFailure")) + s.env.ExecuteWorkflow(SimpleWorkflow, "test_failure") + + s.True(s.env.IsWorkflowCompleted()) + + s.NotNil(s.env.GetWorkflowError()) + _, ok := s.env.GetWorkflowError().(*cadence.GenericError) + s.True(ok) + s.Equal("SimpleActivityFailure", s.env.GetWorkflowError().Error()) +} +``` +This test simulates the execution of the activity `SimpleActivity` that is invoked by our workflow +`SimpleWorkflow` returning an error. We accomplish this by setting up a mock on the test environment +for the `SimpleActivity` that returns an error. + +```go +s.env.OnActivity(SimpleActivity, mock.Anything, mock.Anything).Return( + "", errors.New("SimpleActivityFailure")) +``` +With the mock set up we can now execute the workflow via the s.env.ExecuteWorkflow(...) method and +assert that the workflow completed successfully and returned the expected error. + +Simply mocking the execution to return a desired value or error is a pretty powerful mechanism to +isolate workflow logic. However, sometimes we want to replace the activity with an alternate implementation +to support a more complex test scenario. Let's assume we want to validate that the activity gets called +with the correct parameters. + +```go +func (s *UnitTestSuite) Test_SimpleWorkflow_ActivityParamCorrect() { + s.env.OnActivity(SimpleActivity, mock.Anything, mock.Anything).Return( + func(ctx context.Context, value string) (string, error) { + s.Equal("test_success", value) + return value, nil + }) + s.env.ExecuteWorkflow(SimpleWorkflow, "test_success") + + s.True(s.env.IsWorkflowCompleted()) + s.NoError(s.env.GetWorkflowError()) +} +``` + +In this example, we provide a function implementation as the parameter to `Return`. This allows us to +provide an alternate implementation for the activity `SimpleActivity`. The framework will execute this +function whenever the activity is invoked and pass on the return value from the function as the result +of the activity invocation. Additionally, the framework will validate that the signature of the “mock” +function matches the signature of the original activity function. + +Since this can be an entire function, there is no limitation as to what we can do here. In this +example, we assert that the “value” param has the same content as the value param we passed to the workflow. diff --git a/versioned_docs/version-0.25.0/installing-server.md b/versioned_docs/version-0.25.0/installing-server.md new file mode 100644 index 0000000000..f679a77cbc --- /dev/null +++ b/versioned_docs/version-0.25.0/installing-server.md @@ -0,0 +1,63 @@ +--- +id: installing-server +title: Installing Temporal +sidebar_label: Installing Temporal +--- + +### Prerequisites + +Follow the Docker installation instructions found here: [https://docs.docker.com/engine/install](https://docs.docker.com/engine/install) + +Follow the docker-compose installation instructions found here: [https://docs.docker.com/compose/install](https://docs.docker.com/compose/install) + +### Run Temporal Server Using docker-compose + +Download the Temporal docker-compose file to preferred location (i.e. `quick_start` directory): + +```bash +curl -L https://github.com/temporalio/temporal/releases/download/v0.25.0/docker.tar.gz | tar -xz --strip-components 1 docker/docker-compose.yml + +ls +# docker-compose.yml +``` + +Start Temporal Service: + +```bash +docker-compose up +``` + +The output should look similar to: + +``` +Creating network "quick_start_default" with the default driver +Pulling temporal (temporalio/temporal-auto-setup:0.25.0)... +... +... +temporal_1 | Description: Default namespace for Temporal Server +temporal_1 | OwnerEmail: +temporal_1 | NamespaceData: map[string]string(nil) +temporal_1 | Status: NamespaceStatusRegistered +temporal_1 | RetentionInDays: 1 +temporal_1 | EmitMetrics: false +temporal_1 | ActiveClusterName: active +temporal_1 | Clusters: active +temporal_1 | HistoryArchivalStatus: Enabled +temporal_1 | HistoryArchivalURI: file:///tmp/temporal_archival/development +temporal_1 | VisibilityArchivalStatus: Disabled +temporal_1 | Bad binaries to reset: +temporal_1 | +-----------------+----------+------------+--------+ +temporal_1 | | BINARY CHECKSUM | OPERATOR | START TIME | REASON | +temporal_1 | +-----------------+----------+------------+--------+ +temporal_1 | +-----------------+----------+------------+--------+ +temporal_1 | + echo 'Default namespace registration complete.' +temporal_1 | Default namespace registration complete. +``` + +At this point Temporal Server is running! You can also see the web interface on [localhost:8088](http://localhost:8088/) + +## Write Workflows and Activities using Client SDK + +Try out [Java SDK](/docs/java-quick-start). + +Try out [Go SDK](/docs/go-quick-start). diff --git a/versioned_docs/version-0.25.0/java-activity-interface.md b/versioned_docs/version-0.25.0/java-activity-interface.md new file mode 100644 index 0000000000..b340b5581f --- /dev/null +++ b/versioned_docs/version-0.25.0/java-activity-interface.md @@ -0,0 +1,35 @@ +--- +id: java-activity-interface +title: Activity Interface +--- + +An activity is a manifestation of a particular task in the business logic. + +Activities are defined as methods of a plain Java interface annotated with `@ActivityInterface`. +Each method defines a single activity type. A single workflow can use more than one activity interface and call more +that one activity method from the same interface. +The only requirement is that activity method arguments and return values are serializable to a byte array using the provided +[DataConverter](https://static.javadoc.io/com.uber.cadence/cadence-client/2.4.1/index.html?com/uber/cadence/converter/DataConverter.html) +interface. The default implementation uses a JSON serializer, but an alternative implementation can be easily configured. + +Following is an example of an interface that defines four activities: + +```java +@ActivityInterface +public interface FileProcessingActivities { + + void upload(String bucketName, String localName, String targetName); + + String download(String bucketName, String remoteName); + + @ActivityMethod(name="transcode_file") + String processFile(String localName); + + void deleteLocalFile(String fileName); +} + +``` +We recommend to use a single value type argument for activity methods. In this way, adding new arguments as fields +to the value type is a backwards-compatible change. + +An optional `@ActivityMethod` annotation can be used to override a default activity name. diff --git a/versioned_docs/version-0.25.0/java-distributed-cron.md b/versioned_docs/version-0.25.0/java-distributed-cron.md new file mode 100644 index 0000000000..4cbc0ddece --- /dev/null +++ b/versioned_docs/version-0.25.0/java-distributed-cron.md @@ -0,0 +1,82 @@ +--- +id: java-distributed-cron +title: Distributed CRON +--- + +It is relatively straightforward to turn any Temporal workflow into a Cron workflow. All you need +is to supply a cron schedule when starting the workflow using the CronSchedule +parameter of +[StartWorkflowOptions](https://static.javadoc.io/com.uber.cadence/cadence-client/2.5.1/com/uber/cadence/client/WorkflowOptions.html). + +You can also start a workflow using the Temporal CLI with an optional cron schedule using the `--cron` argument. + +For workflows with CronSchedule: + +* CronSchedule is based on UTC time. For example cron schedule "15 8 \* \* \*" + will run daily at 8:15am UTC. +* If a workflow failed and a RetryPolicy is supplied to the StartWorkflowOptions + as well, the workflow will retry based on the RetryPolicy. While the workflow is + retrying, the server will not schedule the next cron run. +* Temporal server only schedules the next cron run after the current run is + completed. If the next schedule is due while a workflow is running (or retrying), + then it will skip that schedule. +* Cron workflows will not stop until they are terminated or cancelled. + +Temporal supports the standard cron spec: + +```go +// CronSchedule - Optional cron schedule for workflow. If a cron schedule is specified, the workflow will run +// as a cron based on the schedule. The scheduling will be based on UTC time. The schedule for the next run only happens +// after the current run is completed/failed/timeout. If a RetryPolicy is also supplied, and the workflow failed +// or timed out, the workflow will be retried based on the retry policy. While the workflow is retrying, it won't +// schedule its next run. If the next schedule is due while the workflow is running (or retrying), then it will skip that +// schedule. Cron workflow will not stop until it is terminated or cancelled (by returning temporal.CanceledError). +// The cron spec is as follows: +// ┌───────────── minute (0 - 59) +// │ ┌───────────── hour (0 - 23) +// │ │ ┌───────────── day of the month (1 - 31) +// │ │ │ ┌───────────── month (1 - 12) +// │ │ │ │ ┌───────────── day of the week (0 - 6) (Sunday to Saturday) +// │ │ │ │ │ +// │ │ │ │ │ +// * * * * * +CronSchedule string +``` + +The [crontab guru site](https://crontab.guru/) is useful for testing your cron expressions. + +## Convert an existing cron workflow + +Before CronSchedule was available, the previous approach to implementing cron +workflows was to use a delay timer as the last step and then return +`ContinueAsNew`. One problem with that implementation is that if the workflow +fails or times out, the cron would stop. + +To convert those workflows to make use of Temporal CronSchedule, all you need is to remove the delay timer and return without using +`ContinueAsNew`. Then start the workflow with the desired CronSchedule. + + +## Retrieve last successful result + +Sometimes it is useful to obtain the progress of previous successful runs. +This is supported by two new APIs in the client library: +`HasLastCompletionResult` and `GetLastCompletionResult`. Below is an example of how +to use this in Java: + +```java +public String cronWorkflow() { + String lastProcessedFileName = Workflow.getLastCompletionResult(String.class); + + // Process work starting from the lastProcessedFileName. + // Business logic implementation goes here. + // Updates lastProcessedFileName to the new value. + + return lastProcessedFileName; +} +``` + +Note that this works even if one of the cron schedule runs failed. The +next schedule will still get the last successful result if it ever successfully +completed at least once. For example, for a daily cron workflow, if the first day +run succeeds and the second day fails, then the third day run will still get +the result from first day's run using these APIs. diff --git a/versioned_docs/version-0.25.0/java-implementing-activities.md b/versioned_docs/version-0.25.0/java-implementing-activities.md new file mode 100644 index 0000000000..40a69c9cb5 --- /dev/null +++ b/versioned_docs/version-0.25.0/java-implementing-activities.md @@ -0,0 +1,127 @@ +--- +id: java-implementing-activities +title: Implementing activities +--- + +Activity implementation is an implementation of an activity interface. A single instance of the activities implementation +is shared across multiple simultaneous activity invocations. Therefore, the activity implementation code must be *thread safe*. + +The values passed to activities through invocation parameters or returned through a result value are recorded in the execution history. +The entire execution history is transferred from the Temporal service to workflow workers when a workflow state needs to recover. +A large execution history can thus adversely impact the performance of your workflow. Therefore, be mindful of the amount of data you transfer via activity invocation parameters or return values. Otherwise, no additional limitations exist on activity implementations. + +```java +public class FileProcessingActivitiesImpl implements FileProcessingActivities { + + private final AmazonS3 s3Client; + + private final String localDirectory; + + void upload(String bucketName, String localName, String targetName) { + File f = new File(localName); + s3Client.putObject(bucket, remoteName, f); + } + + String download(String bucketName, String remoteName, String localName) { + // Implementation omitted for brevity. + return downloadFileFromS3(bucketName, remoteName, localDirectory + localName); + } + + String processFile(String localName) { + // Implementation omitted for brevity. + return compressFile(localName); + } + + void deleteLocalFile(String fileName) { + File f = new File(localDirectory + fileName); + f.delete(); + } +} +``` + +## Accessing Activity Info + +The [Activity](https://static.javadoc.io/com.uber.cadence/cadence-client/2.4.1/index.html?com/uber/cadence/activity/Activity.html) +class provides static getters to access information about the workflow that invoked it. Note that this information is stored in a thread local variable. Therefore, calls to Activity accessors succeed only in the thread that invoked the activity function. + +```java +public class FileProcessingActivitiesImpl implements FileProcessingActivities { + + @Override + public String download(String bucketName, String remoteName, String localName) { + log.info("namespace=" + Activity.getNamespace()); + WorkflowExecution execution = Activity.getWorkflowExecution(); + log.info("workflowId=" + execution.getWorkflowId()); + log.info("runId=" + execution.getRunId()); + ActivityTask activityTask = Activity.getTask(); + log.info("activityId=" + activityTask.getActivityId()); + log.info("activityTimeout=" + activityTask.getStartToCloseTimeoutSeconds()); + return downloadFileFromS3(bucketName, remoteName, localDirectory + localName); + } + ... + } +``` + +## Asynchronous Activity Completion + +Sometimes an activity lifecycle goes beyond a synchronous method invocation. For example, a request can be put in a queue +and later a reply comes and is picked up by a different worker process. The whole request-reply interaction can be modeled +as a single Temporal activity. + +To indicate that an activity should not be completed upon its method return, call `Activity.doNotCompleteOnReturn()` from the +original activity thread. Then later, when replies come, complete the activity using [ActivityCompletionClient](https://static.javadoc.io/com.uber.cadence/cadence-client/2.4.1/index.html?com/uber/cadence/client/ActivityCompletionClient.html). +To correlate activity invocation with completion, use either `TaskToken` or workflow and activity Ids. + +```java +public class FileProcessingActivitiesImpl implements FileProcessingActivities { + + public String download(String bucketName, String remoteName, String localName) { + byte[] taskToken = Activity.getTaskToken(); // Used to correlate reply. + asyncDownloadFileFromS3(taskToken, bucketName, remoteName, localDirectory + localName); + Activity.doNotCompleteOnReturn(); + return "ignored"; // Return value is ignored when doNotCompleteOnReturn was called. + } + ... +} +``` +When the download is complete, the download service potentially calls back from a different process: +```java + public void completeActivity(byte[] taskToken, R result) { + completionClient.complete(taskToken, result); + } + + public void failActivity(byte[] taskToken, Exception failure) { + completionClient.completeExceptionally(taskToken, failure); + } +``` + +## Activity Heart Beating + +Some activities are long running. To react to a crash quickly, use a heartbeat mechanism. +The `Activity.heartbeat` function lets the Temporal service know that the activity is still alive. You can piggyback +`details` on an activity heartbeat. If an activity times out, the last value of `details` is included +in the `ActivityTimeoutException` delivered to a workflow. Then the workflow can pass the details to +the next activity invocation. This acts as a periodic checkpoint mechanism for the progress of an activity. +```java +public class FileProcessingActivitiesImpl implements FileProcessingActivities { + + @Override + public String download(String bucketName, String remoteName, String localName) { + InputStream inputStream = openInputStream(file); + try { + byte[] bytes = new byte[MAX_BUFFER_SIZE]; + while ((read = inputStream.read(bytes)) != -1) { + totalRead += read; + f.write(bytes, 0, read); + /* + * Let the service know about the download progress. + */ + Activity.heartbeat(totalRead); + } + } finally { + inputStream.close(); + } + } + ... +} +``` diff --git a/versioned_docs/version-0.25.0/java-implementing-workflows.md b/versioned_docs/version-0.25.0/java-implementing-workflows.md new file mode 100644 index 0000000000..cc6f2e44f1 --- /dev/null +++ b/versioned_docs/version-0.25.0/java-implementing-workflows.md @@ -0,0 +1,255 @@ +--- +id: java-implementing-workflows +title: Implementing Workflows +--- + +A workflow implementation implements a workflow interface. Each time a new workflow execution is started, +a new instance of the workflow implementation object is created. Then, one of the methods +(depending on which workflow type has been started) annotated with `@WorkflowMethod` is invoked. As soon as this method +returns, the workflow execution is closed. While workflow execution is open, it can receive calls to signal and query methods. +No additional calls to workflow methods are allowed. The workflow object is stateful, so query and signal methods +can communicate with the other parts of the workflow through workflow object fields. + +## Workflow Implementation Constraints + +Temporal uses the [Microsoft Azure Event Sourcing pattern](https://docs.microsoft.com/en-us/azure/architecture/patterns/event-sourcing) to recover +the state of a workflow object including its threads and local variable values. +In essence, every time a workflow state has to be restored, its code is re-executed from the beginning. When replaying, side +effects (such as activity invocations) are ignored because they are already recorded in the workflow event history. +When writing workflow logic, the replay is not visible, so the code should be written since it executes only once. +This design puts the following constraints on the workflow implementation: + +- Do not use any mutable global variables because multiple instances of workflows are executed in parallel. +- Do not call any non-deterministic functions like non seeded random or UUID.randomUUID() directly from the workflow code. + +Always do the following in the workflow implementation code: +- Don’t perform any IO or service calls as they are not usually deterministic. Use activities for this. +- Only use `Workflow.currentTimeMillis()` to get the current time inside a workflow. +- Do not use native Java `Thread` or any other multi-threaded classes like `ThreadPoolExecutor`. Use `Async.function` or `Async.procedure` +to execute code asynchronously. +- Don't use any synchronization, locks, and other standard Java blocking concurrency-related classes besides those provided +by the Workflow class. There is no need in explicit synchronization because multi-threaded code inside a workflow is +executed one thread at a time and under a global lock. + - Call `WorkflowThread.sleep` instead of `Thread.sleep`. + - Use `Promise` and `CompletablePromise` instead of `Future` and `CompletableFuture`. + - Use `WorkflowQueue` instead of `BlockingQueue`. +- Use `Workflow.getVersion` when making any changes to the workflow code. Without this, any deployment of updated workflow code +might break already open workflows. +- Don’t access configuration APIs directly from a workflow because changes in the configuration might affect a workflow execution path. +Pass it as an argument to a workflow function or use an activity to load it. + +Workflow method arguments and return values are serializable to a byte array using the provided +[DataConverter](https://static.javadoc.io/com.uber.cadence/cadence-client/2.4.1/index.html?com/uber/cadence/converter/DataConverter.html) +interface. The default implementation uses JSON serializer, but you can use any alternative serialization mechanism. + +The values passed to workflows through invocation parameters or returned through a result value are recorded in the execution history. +The entire execution history is transferred from the Temporal service to workflow workers with every event that the workflow logic needs to process. +A large execution history can thus adversely impact the performance of your workflow. +Therefore, be mindful of the amount of data that you transfer via activity invocation parameters or return values. +Otherwise, no additional limitations exist on activity implementations. + +## Calling Activities + +`Workflow.newActivityStub` returns a client-side stub that implements an activity interface. +It takes activity type and activity options as arguments. Activity options are needed only if some of the required +timeouts are not specified through the `@ActivityMethod` annotation. + +Calling a method on this interface invokes an activity that implements this method. +An activity invocation synchronously blocks until the activity completes, fails, or times out. Even if activity +execution takes a few months, the workflow code still sees it as a single synchronous invocation. +It doesn't matter what happens to the processes that host the workflow. The business logic code +just sees a single method call. +```java +public class FileProcessingWorkflowImpl implements FileProcessingWorkflow { + + private final FileProcessingActivities activities; + + public FileProcessingWorkflowImpl() { + this.activities = Workflow.newActivityStub( + FileProcessingActivities.class, + ActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofHours(1)) + .build()); + } + + @Override + public void processFile(Arguments args) { + String localName = null; + String processedName = null; + try { + localName = activities.download(args.getSourceBucketName(), args.getSourceFilename()); + processedName = activities.processFile(localName); + activities.upload(args.getTargetBucketName(), args.getTargetFilename(), processedName); + } finally { + if (localName != null) { // File was downloaded. + activities.deleteLocalFile(localName); + } + if (processedName != null) { // File was processed. + activities.deleteLocalFile(processedName); + } + } + } + ... +} +``` +If different activities need different options, like timeouts or a task list, multiple client-side stubs can be created +with different options. + +```java +public FileProcessingWorkflowImpl() { + ActivityOptions options1 = ActivityOptions.newBuilder() + .setTaskList("taskList1") + .setStartToCloseTimeout(Duration.ofMinutes(10)) + .build(); + this.store1 = Workflow.newActivityStub(FileProcessingActivities.class, options1); + + ActivityOptions options2 = ActivityOptions.newBuilder() + .setTaskList("taskList2") + .setStartToCloseTimeout(Duration.ofMinutes(5)) + .build(); + this.store2 = Workflow.newActivityStub(FileProcessingActivities.class, options2); +} +``` + +## Calling Activities Asynchronously + +Sometimes workflows need to perform certain operations in parallel. +The `Async` class static methods allow you to invoke any activity asynchronously. The calls return a `Promise` result immediately. +`Promise` is similar to both Java `Future` and `CompletionStage`. The `Promise` `get` blocks until a result is available. +It also exposes the `thenApply` and `handle` methods. See the `Promise` JavaDoc for technical details about differences with `Future`. + +To convert a synchronous call: +```java +String localName = activities.download(sourceBucket, sourceFile); +``` +To asynchronous style, the method reference is passed to `Async.function` or `Async.procedure` +followed by activity arguments: +```java +Promise localNamePromise = Async.function(activities::download, sourceBucket, sourceFile); +``` +Then to wait synchronously for the result: +```java +String localName = localNamePromise.get(); +``` +Here is the above example rewritten to call download and upload in parallel on multiple files: +```java +public void processFile(Arguments args) { + List> localNamePromises = new ArrayList<>(); + List processedNames = null; + try { + // Download all files in parallel. + for (String sourceFilename : args.getSourceFilenames()) { + Promise localName = Async.function(activities::download, + args.getSourceBucketName(), sourceFilename); + localNamePromises.add(localName); + } + // allOf converts a list of promises to a single promise that contains a list + // of each promise value. + Promise> localNamesPromise = Promise.allOf(localNamePromises); + + // All code until the next line wasn't blocking. + // The promise get is a blocking call. + List localNames = localNamesPromise.get(); + processedNames = activities.processFiles(localNames); + + // Upload all results in parallel. + List> uploadedList = new ArrayList<>(); + for (String processedName : processedNames) { + Promise uploaded = Async.procedure(activities::upload, + args.getTargetBucketName(), args.getTargetFilename(), processedName); + uploadedList.add(uploaded); + } + // Wait for all uploads to complete. + Promise allUploaded = Promise.allOf(uploadedList); + allUploaded.get(); // blocks until all promises are ready. + } finally { + for (Promise localNamePromise : localNamePromises) { + // Skip files that haven't completed downloading. + if (localNamePromise.isCompleted()) { + activities.deleteLocalFile(localNamePromise.get()); + } + } + if (processedNames != null) { + for (String processedName : processedNames) { + activities.deleteLocalFile(processedName); + } + } + } +} +``` + +## Child Workflows +Besides activities, a workflow can also orchestrate other workflows. + +`Workflow.newChildWorkflowStub` returns a client-side stub that implements a child workflow interface. + It takes a child workflow type and optional child workflow options as arguments. Workflow options may be needed to override + the timeouts and task list if they differ from the ones defined in the `@WorkflowMethod` annotation or parent workflow. + + The first call to the child workflow stub must always be to a method annotated with `@WorkflowMethod`. Similar to activities, a call + can be made synchronous or asynchronous by using `Async#function` or `Async#procedure`. The synchronous call blocks until a child workflow completes. The asynchronous call + returns a `Promise` that can be used to wait for the completion. After an async call returns the stub, it can be used to send signals to the child + by calling methods annotated with `@SignalMethod`. Querying a child workflow by calling methods annotated with `@QueryMethod` + from within workflow code is not supported. However, queries can be done from activities + using the provided `WorkflowClient` stub. + ```java +@WorkflowInterface +public interface GreetingChild { + @WorkflowMethod + String composeGreeting(String greeting, String name); +} + +public static class GreetingWorkflowImpl implements GreetingWorkflow { + + @Override + public String getGreeting(String name) { + GreetingChild child = Workflow.newChildWorkflowStub(GreetingChild.class); + + // This is a blocking call that returns only after child has completed. + return child.composeGreeting("Hello", name ); + } +} +``` +Running two children in parallel: +```java +public static class GreetingWorkflowImpl implements GreetingWorkflow { + + @Override + public String getGreeting(String name) { + + // Workflows are stateful, so a new stub must be created for each new child. + GreetingChild child1 = Workflow.newChildWorkflowStub(GreetingChild.class); + Promise greeting1 = Async.function(child1::composeGreeting, "Hello", name); + + // Both children will run concurrently. + GreetingChild child2 = Workflow.newChildWorkflowStub(GreetingChild.class); + Promise greeting2 = Async.function(child2::composeGreeting, "Bye", name); + + // Do something else here. + ... + return "First: " + greeting1.get() + ", second: " + greeting2.get(); + } +} +``` +To send a signal to a child, call a method annotated with `@SignalMethod`: +```java +@WorkflowInterface +public interface GreetingChild { + @WorkflowMethod + String composeGreeting(String greeting, String name); + + @SignalMethod + void updateName(String name); +} + +public static class GreetingWorkflowImpl implements GreetingWorkflow { + + @Override + public String getGreeting(String name) { + GreetingChild child = Workflow.newChildWorkflowStub(GreetingChild.class); + Promise greeting = Async.function(child::composeGreeting, "Hello", name); + child.updateName("Temporal"); + return greeting.get(); + } +} +``` +Calling methods annotated with `@QueryMethod` is not allowed from within workflow code. Use an activity to call them. diff --git a/versioned_docs/version-0.25.0/java-quick-start.md b/versioned_docs/version-0.25.0/java-quick-start.md new file mode 100644 index 0000000000..c1e1084654 --- /dev/null +++ b/versioned_docs/version-0.25.0/java-quick-start.md @@ -0,0 +1,774 @@ +--- +id: java-quick-start +title: Quick Start +--- + +This topic helps you install the Temporal server and implement a workflow. + +## Install Temporal Server Locally + +To run samples locally you need to run Temporal server locally using [instructions](/docs/installing-server). + +## Implement Hello World Java Workflow + +### Include Temporal Java SDK Dependency + +Go to the [Maven Repository Temporal Java Client Page](https://oss.sonatype.org/#nexus-search;quick~temporal-sdk) +and find the latest version of the library. Include it as a dependency into your Java project. For example if you +are using Gradle the dependency looks like: + +``` + compile group: 'io.temporal', name: 'temporal-sdk', version: '' +``` + +Also add the following dependencies that temporal-sdk relies on: + +``` + compile group: 'commons-configuration', name: 'commons-configuration', version: '1.9' + compile group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.3' +``` + +Make sure that the following code compiles: + +```java +import io.temporal.workflow.Workflow; +import io.temporal.workflow.WorkflowMethod; +import org.slf4j.Logger; + +public class GettingStarted { + + private static Logger logger = Workflow.getLogger(GettingStarted.class); + + @WorkflowInterface + interface HelloWorld { + @WorkflowMethod + void sayHello(String name); + } + +} +``` + +If you are having problems setting up the build files use the +[Temporal Java Samples](https://github.com/temporalio/temporal-java-samples) GitHub repository as a reference. + +Also add the following logback config file somewhere in your classpath: + +```xml + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + +``` + +### Implement Hello World Workflow + +Let's add `HelloWorldImpl` with the `sayHello` method that just logs the "Hello ..." and returns. + +```java +import io.temporal.worker.Worker; +import io.temporal.workflow.Workflow; +import io.temporal.workflow.WorkflowMethod; +import org.slf4j.Logger; + +public class GettingStarted { + + private static Logger logger = Workflow.getLogger(GettingStarted.class); + + @WorkflowInterface + public interface HelloWorld { + @WorkflowMethod + void sayHello(String name); + } + + public static class HelloWorldImpl implements HelloWorld { + + @Override + public void sayHello(String name) { + logger.info("Hello " + name + "!"); + } + } +} +``` + +To link the workflow implementation to the Temporal framework, it should be registered with a worker that connects to +a Temporal Service. By default the worker connects to the locally running Temporal service. + +```java + public static void main(String[] args) { + // gRPC stubs wrapper that talks to the local docker instance of temporal service. + WorkflowServiceStubs service = WorkflowServiceStubs.newInstance(); + // client that can be used to start and signal workflows + WorkflowClient client = WorkflowClient.newInstance(service); + // worker factory that can be used to create workers for specific task lists + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker("HelloWorldTaskList"); + worker.registerWorkflowImplementationTypes(HelloWorldImpl.class); + factory.start(); + } +``` + +### Execute Hello World Workflow using the CLI + +Now run the worker program. Following is an example log: + +```text +18:39:45.522 [main] INFO i.t.i.WorkflowServiceStubsImpl - Created GRPC client for channel: ManagedChannelOrphanWrapper{delegate=ManagedChannelImpl{logId=1, target=127.0.0.1:7233}} +18:39:45.674 [main] INFO io.temporal.internal.worker.Poller - start(): Poller{options=PollerOptions{maximumPollRateIntervalMilliseconds=1000, maximumPollRatePerSecond=0.0, pollBackoffCoefficient=2.0, pollBackoffInitialInterval=PT0.1S, pollBackoffMaximumInterval=PT1M, pollThreadCount=1, pollThreadNamePrefix='Workflow Poller taskList="HelloWorldTaskList", namespace="default"'}, identity=unknown-mac} +18:39:45.676 [main] INFO io.temporal.internal.worker.Poller - start(): Poller{options=PollerOptions{maximumPollRateIntervalMilliseconds=1000, maximumPollRatePerSecond=0.0, pollBackoffCoefficient=2.0, pollBackoffInitialInterval=PT0.1S, pollBackoffMaximumInterval=PT1M, pollThreadCount=1, pollThreadNamePrefix='null'}, identity=95963a78-641d-434b-841e-a2efe7f8a19f} +``` + +No Hello printed. This is expected because a worker is just a workflow code host. The workflow has to be started to execute. Let's use Temporal CLI to start the workflow: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" +Started Workflow Id: ef8c6cd6-de62-4481-8398-623865467696, run Id: 26eafcde-6cab-4836-9ad4-888a74e172e1 +``` + +The last line of output of the program should now be: + +``` +18:40:28.354 [workflow-1029765531] INFO main - Hello World! +``` + +Let's start another workflow execution: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" +Started Workflow Id: 7bdfba1d-b3f4-4665-88c2-cec73301dd52, run Id: d6c99e2d-7d76-458f-956b-a2f72af292bf +``` + +The last two lines of output of the program should now be: + +```text +18:40:28.354 [workflow-1029765531] INFO main - Hello World! +18:40:51.678 [workflow-1538256693] INFO main - Hello Temporal! +``` + +### List Workflows and Workflow History + +Let's list our workflows in the CLI: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow list + WORKFLOW TYPE | WORKFLOW ID | RUN ID | START TIME | EXECUTION TIME | END TIME + HelloWorld_sayHello | 7bdfba1d-b3f4-4665-88c2-cec73301dd52 | d6c99e2d-7d76-458f-956b-a2f72af292bf | 01:40:51 | 01:40:51 | 01:40:51 + HelloWorld_sayHello | ef8c6cd6-de62-4481-8398-623865467696 | 26eafcde-6cab-4836-9ad4-888a74e172e1 | 01:40:28 | 01:40:28 | 01:40:28 +``` + +Now let's look at the workflow execution history: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow showid 1965109f-607f-4b14-a5f2-24399a7b8fa7 + 1 EventTypeWorkflowExecutionStarted {WorkflowType:{Name:HelloWorld_sayHello}, + ParentInitiatedEventId:0, TaskList:{Name:HelloWorldTaskList, + Kind:TaskListKindNormal}, Input:["Temporal"], + ExecutionStartToCloseTimeoutSeconds:3600, + TaskStartToCloseTimeoutSeconds:10, + Initiator:ContinueAsNewInitiatorDecider, + ContinuedFailureDetails:[], LastCompletionResult:[], + OriginalExecutionRunId:d6c99e2d-7d76-458f-956b-a2f72af292bf, + Identity:tctl@docker-desktop, + FirstExecutionRunId:d6c99e2d-7d76-458f-956b-a2f72af292bf, + Attempt:0, ExpirationTimestamp:0, + FirstDecisionTaskBackoffSeconds:0} + 2 EventTypeDecisionTaskScheduled {TaskList:{Name:HelloWorldTaskList, + Kind:TaskListKindNormal}, + StartToCloseTimeoutSeconds:10, + Attempt:0} + 3 EventTypeDecisionTaskStarted {ScheduledEventId:2, Identity:unknown-mac, + RequestId:1ef618db-a3ec-45c3-b545-aea5ae5d36fb} + 4 EventTypeDecisionTaskCompleted {ExecutionContext:[], + ScheduledEventId:2, + StartedEventId:3, + Identity:unknown-mac} + 5 EventTypeWorkflowExecutionCompleted {Result:[], + DecisionTaskCompletedEventId:4} +``` + +Even for such a trivial workflow, the history gives a lot of useful information. For complex workflows this is a really useful tool for production and development troubleshooting. History can be automatically archived to a long-term blob store (for example Amazon S3) upon workflow completion for compliance, analytical, and troubleshooting purposes. + +### Workflow Id Uniqueness + +Before proceeding to a more complex workflow implementation, let's take a look at the workflow Id semantic. +When starting a workflow without providing an Id, the client generates one in the form of a UUID. In most real-life scenarios this is not a desired behavior. The business Id should be used instead. Here, we'll specify the Id when starting a workflow: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloTemporal1" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" +Started Workflow Id: HelloTemporal1, run Id: 78ca0a3f-8cd2-46a2-8d23-076c3f0f187c +``` + +Now the list operation is more meaningful as the WORKFLOW ID is our business Id: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow list + WORKFLOW TYPE | WORKFLOW ID | RUN ID | START TIME | EXECUTION TIME | END TIME + HelloWorld_sayHello | HelloTemporal1 | 78ca0a3f-8cd2-46a2-8d23-076c3f0f187c | 01:47:24 | 01:47:24 | 01:47:25 +``` + +After the previous one completes, let's try to start another workflow with the same Id: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloTemporal1" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"Temporal\" +Started Workflow Id: HelloTemporal1, run Id: 9b5e36a3-9868-4de5-bbdf-eda9cedcd865 +``` + +After the second start the workflow list is: + +```bash + WORKFLOW TYPE | WORKFLOW ID | RUN ID | START TIME | EXECUTION TIME | END TIME + HelloWorld_sayHello | HelloTemporal1 | 37a740e5-838c-4020-aed6-1111b0689c38 | 21:11:47 | 21:11:47 | 21:11:47 + HelloWorld_sayHello | HelloTemporal1 | 75170c60-6d72-48c6-b509-7c9d9f25a8a8 | 21:04:46 | 21:04:46 | 21:04:46 +``` + +It might be clear why every workflow has two Ids: Workflow Id and Run Id. Because the Workflow Id can be reused, the Run Id uniquely identifies a particular run of a workflow. Run Id is system generated and cannot be controlled by client code. + +Note - Under no circumstances does Temporal allow more than one instance of an open workflow with the same Id. Multiple workflow Ids are required in the case that paralell invocations wish to be supported (such as an Actor patern) + +### CLI Help + +See the CLI help command for all of the options supported: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow help start +NAME: + tctl workflow start - start a new workflow execution + +USAGE: + tctl workflow start [command options] [arguments...] + +OPTIONS: + --tasklist value, --tl value TaskList + --workflow_id value, --wid value, -w value WorkflowId + --workflow_type value, --wt value WorkflowTypeName + --execution_timeout value, --et value Execution start to close timeout in seconds (default: 0) + --decision_timeout value, --dt value Decision task start to close timeout in seconds (default: 10) + --cron value Optional cron schedule for the workflow. Cron spec is as following: + ┌───────────── minute (0 - 59) + │ ┌───────────── hour (0 - 23) + │ │ ┌───────────── day of the month (1 - 31) + │ │ │ ┌───────────── month (1 - 12) + │ │ │ │ ┌───────────── day of the week (0 - 6) (Sunday to Saturday) + │ │ │ │ │ + * * * * * + --workflowidreusepolicy value, --wrp value Optional input to configure if the same workflow Id is allowed to be used for a new workflow execution. Available options: 0: AllowDuplicate, 1: AllowDuplicateFailedOnly, 2: RejectDuplicate (default: 0) + --input value, -i value Optional input for the workflow, in JSON format. If there are multiple parameters, concatenate them and separate by a space. + --input_file value, --if value Optional input for the workflow from a JSON file. If there are multiple JSON, concatenate them and separate by a space or newline. Input from the file will be overwritten by input from the command line. + --memo_key value Optional key of memo. If there are multiple keys, concatenate them and separate by space. + --memo value Optional info that can be shown in list workflow, in JSON format. If there are multiple JSON, concatenate them and separate by a space. The order must be the same as memo_key. + --memo_file value Optional info that can be listed in list workflow, from JSON format file. If there are multiple JSON, concatenate them and separate by a space or newline. The order must be same as memo_key. +``` + +## Signals + +So far our workflow is not very interesting. Let's change it to listen on an external event and update state accordingly. + +```java + @WorkflowInterface + public interface HelloWorld { + @WorkflowMethod + void sayHello(String name); + + @SignalMethod + void updateGreeting(String greeting); + } + + public static class HelloWorldImpl implements HelloWorld { + + private String greeting = "Hello"; + + @Override + public void sayHello(String name) { + int count = 0; + while (!"Bye".equals(greeting)) { + logger.info(++count + ": " + greeting + " " + name + "!"); + String oldGreeting = greeting; + Workflow.await(() -> !Objects.equals(greeting, oldGreeting)); + } + logger.info(++count + ": " + greeting + " " + name + "!"); + } + + @Override + public void updateGreeting(String greeting) { + this.greeting = greeting; + } + } +``` + +The workflow interface now has a new method annotated with @SignalMethod. It is a callback method that is invoked +every time a new signal of "HelloWorld*updateGreeting" is delivered to a workflow. The workflow interface can have only +one @WorkflowMethod which is a \_main* function of the workflow and as many signal methods as needed. + +The updated workflow implementation demonstrates a few important Temporal concepts. The first is that workflow is stateful and can +have fields of any complex type. Another is that the `Workflow.await` function that blocks until the function it receives as a parameter evaluates to true. The condition is going to be evaluated only on workflow state changes, so it is not a busy wait in traditional sense. + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloSignal" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" +Started Workflow Id: HelloSignal, run Id: 6fa204cb-f478-469a-9432-78060b83b6cd +``` + +Program output: + +```text +16:53:56.120 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! +``` + +Let's send a signal using CLI: + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Hi\" +Signal workflow succeeded. +``` + +Program output: + +```text +16:53:56.120 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! +16:54:57.901 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 2: Hi World! +``` + +Try sending the same signal with the same input again. Note that the output doesn't change. This happens because the await condition +doesn't unblock when it sees the same value. But a new greeting unblocks it: + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Welcome\" +Signal workflow succeeded. +``` + +Program output: + +```text +16:53:56.120 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! +16:54:57.901 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 2: Hi World! +16:56:24.400 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 3: Welcome World! +``` + +Now shut down the worker and send the same signal again: + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloSignal" --name "updateGreeting" --input \"Welcome\" +Signal workflow succeeded. +``` + +Note that sending signals as well as starting workflows does not need a worker running. The requests are queued inside the Temporal service. + +Now bring the worker back. Note that it doesn't log anything besides the standard startup messages. +This occurs because it ignores the queued signal that contains the same input as the current value of greeting. +Note that the restart of the worker didn't affect the workflow execution. It is still blocked on the same line of code as before the failure. +This is the most important feature of Temporal. The workflow code doesn't need to deal with worker failures at all. Its state is fully recovered to its current state that includes all the local variables and threads. + +Let's look at the line where the workflow is blocked: + +```bash +> docker run --network=host --rm temporalio/tctl:0.25.0 workflow stack --workflow_id "Hello2" +Query result: +"workflow-root: (BLOCKED on await) +io.temporal.internal.sync.SyncDecisionContext.await(SyncDecisionContext.java:546) +io.temporal.internal.sync.WorkflowInternal.await(WorkflowInternal.java:243) +io.temporal.workflow.Workflow.await(Workflow.java:611) +io.temporal.samples.hello.GettingStarted$HelloWorldImpl.sayHello(GettingStarted.java:32) +sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) +sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)" +``` + +Yes, indeed the workflow is blocked on await. This feature works for any open workflow, greatly simplifying troubleshooting in production. +Let's complete the workflow by sending a signal with a "Bye" greeting: + +```text +16:58:22.962 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 4: Bye World! +``` + +Note that the value of the count variable was not lost during the restart. + +Also note that while a single worker instance is used for this +walkthrough, any real production deployment has multiple worker instances running. So any worker failure or restart does not delay any +workflow execution because it is just migrated to any other available worker. + +## Query + +So far we have learned that the workflow code is fault tolerant and can update its state in reaction to external events in the form of signals. +Temporal provides a query feature that supports synchronously returning any information from a workflow to an external caller. + +Update the workflow code to: + +```java + @WorkflowInterface + public interface HelloWorld { + @WorkflowMethod + void sayHello(String name); + + @SignalMethod + void updateGreeting(String greeting); + + @QueryMethod + int getCount(); + } + + public static class HelloWorldImpl implements HelloWorld { + + private String greeting = "Hello"; + private int count = 0; + + @Override + public void sayHello(String name) { + while (!"Bye".equals(greeting)) { + logger.info(++count + ": " + greeting + " " + name + "!"); + String oldGreeting = greeting; + Workflow.await(() -> !Objects.equals(greeting, oldGreeting)); + } + logger.info(++count + ": " + greeting + " " + name + "!"); + } + + @Override + public void updateGreeting(String greeting) { + this.greeting = greeting; + } + + @Override + public int getCount() { + return count; + } + } +``` + +The new `getCount` method annotated with `@QueryMethod` was added to the workflow interface definition. It is allowed +to have multiple query methods per workflow interface. + +The main restriction on the implementation of the query method is that it is not allowed to modify workflow state in any form. +It also is not allowed to block its thread in any way. It usually just returns a value derived from the fields of the workflow object. +Let's run the updated worker and send a couple signals to it: + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloQuery" --tasklist HelloWorldTaskList --workflow_type HelloWorld --execution_timeout 3600 --input \"World\" +Started Workflow Id: HelloQuery, run Id: 1925f668-45b5-4405-8cba-74f7c68c3135 +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Hi\" +Signal workflow succeeded. +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Welcome\" +Signal workflow succeeded. +``` + +The worker output: + +```text +17:35:50.485 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 1: Hello World! +17:36:10.483 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 2: Hi World! +17:36:16.204 [workflow-root] INFO c.u.c.samples.hello.GettingStarted - 3: Welcome World! +``` + +Now let's query the workflow using the CLI: + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow query --workflow_id "HelloQuery" --query_type "getCount" +Query result as JSON: +3 +``` + +One limitation of the query is that it requires a worker process running because it is executing callback code. +An interesting feature of the query is that it works for completed workflows as well. Let's complete the workflow by sending "Bye" and query it. + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow signal --workflow_id "HelloQuery" --name "updateGreeting" --input \"Bye\" +Signal workflow succeeded. +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow query --workflow_id "HelloQuery" --query_type "getCount" +Query result as JSON: +4 +``` + +The Query method can accept parameters. This might be useful if only part of the workflow state should be returned. + +## Activities + +Having fault tolerant code that maintains state, updates it in reaction to external events, and supports querying is already very useful. +But in most practical applications, the workflow is expected to act upon the external world. Temporal supports such externally-facing code in the form of activities. + +An activity is essentially a function that can execute any code like DB updates or service calls. The workflow is not allowed to +directly call any external APIs; it can do this only through activities. The workflow is essentially an orchestrator of activities. +Let's change our program to print the greeting from an activity on every change. + +First let's define an activities interface and implement it: + +```java + @ActivityInterface + public interface HelloWorldActivities { + void say(String message); + } +``` + +`@ActivityInterface` annotation is required for an activity interface. Each method that belongs to an activity interface +defines a separate activity type. + +Activity implementation is just a normal [POJO](https://en.wikipedia.org/wiki/Plain_old_Java_object). +The `out` stream is passed as a parameter to the constructor to demonstrate that the +activity object can have any dependencies. Examples of real application dependencies are database connections and service clients. + +```java + public class HelloWordActivitiesImpl implements HelloWorldActivities { + private final PrintStream out; + + public HelloWordActivitiesImpl(PrintStream out) { + this.out = out; + } + + @Override + public void say(String message) { + out.println(message); + } + } +``` + +Let's create a separate main method for the activity worker. It is common to have a single worker that hosts both activities and workflows, +but here we keep them separate to demonstrate how Temporal deals with worker failures. +To make the activity implementation known to Temporal, register it with the worker: + +```java +public class GettingStartedActivityWorker { + + public static void main(String[] args) { + WorkflowServiceStubs service = WorkflowServiceStubs.newInstance(); + WorkflowClient client = WorkflowClient.newInstance(service); + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker("HelloWorldTaskList"); + worker.registerActivitiesImplementations(new HelloWordActivitiesImpl(System.out)); + factory.start(); + } +} +``` + +A single instance of an activity object is registered per activity interface type. This means that the activity implementation should be thread-safe since the activity method can be simultaneously called from multiple threads. + +Let's modify the workflow code to invoke the activity instead of logging: + +```java + public static class HelloWorldImpl implements HelloWorld { + + private final HelloWorldActivities activities = Workflow.newActivityStub(HelloWorldActivities.class); + private String greeting = "Hello"; + private int count = 0; + + @Override + public void sayHello(String name) { + while (!"Bye".equals(greeting)) { + activities.say(++count + ": " + greeting + " " + name + "!"); + String oldGreeting = greeting; + Workflow.await(() -> !Objects.equals(greeting, oldGreeting)); + } + activities.say(++count + ": " + greeting + " " + name + "!"); + } + + @Override + public void updateGreeting(String greeting) { + this.greeting = greeting; + } + + @Override + public int getCount() { + return count; + } + } +``` + +Activities are invoked through a stub that implements their interface. So an invocation is just a method call on an activity stub. + +Now run the workflow worker. Do not run the activity worker yet. Then start a new workflow execution: + +```bash +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow start --workflow_id "HelloActivityWorker" --tasklist HelloWorldTaskList --workflow_type HelloWorld_sayHello --execution_timeout 3600 --input \"World\" +Started Workflow Id: HelloActivityWorker, run Id: ff015637-b5af-43e8-b3f6-8b6c7b919b62 +``` + +The workflow is started, but nothing visible happens. This is expected as the activity worker is not running. What are the options to understand the currently running workflow state? + +The first option is look at the stack trace: + +```text +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow stack --workflow_id "HelloActivityWorker" +Query result as JSON: +"workflow-root: (BLOCKED on Feature.get)io.temporal.internal.sync.CompletablePromiseImpl.get(CompletablePromiseImpl.java:71) +io.temporal.internal.sync.ActivityStubImpl.execute(ActivityStubImpl.java:58) +io.temporal.internal.sync.ActivityInvocationHandler.lambda$invoke$0(ActivityInvocationHandler.java:87) +io.temporal.internal.sync.ActivityInvocationHandler$$Lambda$25/1816732716.apply(Unknown Source) +io.temporal.internal.sync.ActivityInvocationHandler.invoke(ActivityInvocationHandler.java:94) +com.sun.proxy.$Proxy6.say(Unknown Source) +io.temporal.samples.hello.GettingStarted$HelloWorldImpl.sayHello(GettingStarted.java:55) +sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) +sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) +" +``` + +It shows that the workflow code is blocked on the "say" method of a Proxy object that implements the activity stub. +You can restart the workflow worker if you want to make sure that restarting it does not change that. It works for activities +of any duration. It is okay for the workflow code to block on an activity invocation for a month for example. + +Another way to see what exactly happened in the workflow execution is to look at the workflow execution history: + +```text +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow show --workflow_id "HelloActivityWorker" + 1 WorkflowExecutionStarted {WorkflowType:{Name:HelloWorld_sayHello}, + TaskList:{Name:HelloWorldTaskList}, + Input:["World"], + ExecutionStartToCloseTimeoutSeconds:3600, + TaskStartToCloseTimeoutSeconds:10, + ContinuedFailureDetails:[], + LastCompletionResult:[], + Identity:temporal-cli@linuxkit-025000000001, + Attempt:0, + FirstDecisionTaskBackoffSeconds:0} + 2 DecisionTaskScheduled {TaskList:{Name:HelloWorldTaskList}, + StartToCloseTimeoutSeconds:10, + Attempt:0} + 3 DecisionTaskStarted {ScheduledEventId:2, + Identity:36234@maxim-C02XD0AAJGH6, + RequestId:ef645576-7cee-4d2e-9892-597a08b7b01f} + 4 DecisionTaskCompleted {ExecutionContext:[], + ScheduledEventId:2, + StartedEventId:3, + Identity:36234@maxim-C02XD0AAJGH6} + 5 ActivityTaskScheduled {ActivityId:0, + ActivityType:{Name:HelloWorldActivities_say}, + TaskList:{Name:HelloWorldTaskList}, + Input:["1: Hello World!"], + ScheduleToCloseTimeoutSeconds:100, + ScheduleToStartTimeoutSeconds:100, + StartToCloseTimeoutSeconds:100, + HeartbeatTimeoutSeconds:100, + DecisionTaskCompletedEventId:4} +``` + +The last event in the workflow history is `ActivityTaskScheduled`. It is recorded when workflow invoked the activity, but it wasn't picked up by an activity worker yet. + +Another useful API is `DescribeWorkflowExecution` which, among other information, contains the list of outstanding activities: + +```text +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow describe --workflow_id "HelloActivityWorker" +{ + "ExecutionConfiguration": { + "taskList": { + "name": "HelloWorldTaskList" + }, + "executionStartToCloseTimeoutSeconds": 3600, + "taskStartToCloseTimeoutSeconds": 10, + "childPolicy": "TERMINATE" + }, + "WorkflowExecutionInfo": { + "Execution": { + "workflowId": "HelloActivityWorker", + "runId": "ff015637-b5af-43e8-b3f6-8b6c7b919b62" + }, + "Type": { + "name": "HelloWorld_sayHello" + }, + "StartTime": "2019-06-08T23:56:41Z", + "CloseTime": "1970-01-01T00:00:00Z", + "Status": null, + "HistoryLength": 5, + "ParentNamespaceId": null, + "ParentExecution": null, + "AutoResetPoints": {} + }, + "PendingActivities": [ + { + "ActivityId": "0", + "ActivityType": { + "name": "HelloWorldActivities_say" + }, + "State": "SCHEDULED", + "ScheduledTimestamp": "2019-06-08T23:57:00Z" + } + ] +} +``` + +Let's start the activity worker. It starts and immediately prints: + +```text +1: Hello World! +``` + +Let's look at the workflow execution history: + +```text +temporal: docker run --network=host --rm temporalio/tctl:0.25.0 workflow show --workflow_id "HelloActivityWorker" + 1 WorkflowExecutionStarted {WorkflowType:{Name:HelloWorld_sayHello}, + TaskList:{Name:HelloWorldTaskList}, + Input:["World"], + ExecutionStartToCloseTimeoutSeconds:3600, + TaskStartToCloseTimeoutSeconds:10, + ContinuedFailureDetails:[], + LastCompletionResult:[], + Identity:temporal-cli@linuxkit-025000000001, + Attempt:0, + FirstDecisionTaskBackoffSeconds:0} + 2 DecisionTaskScheduled {TaskList:{Name:HelloWorldTaskList}, + StartToCloseTimeoutSeconds:10, + Attempt:0} + 3 DecisionTaskStarted {ScheduledEventId:2, + Identity:37694@maxim-C02XD0AAJGH6, + RequestId:1d7cba6d-98c8-41fd-91b1-c27dffb21c7f} + 4 DecisionTaskCompleted {ExecutionContext:[], + ScheduledEventId:2, + StartedEventId:3, + Identity:37694@maxim-C02XD0AAJGH6} + 5 ActivityTaskScheduled {ActivityId:0, + ActivityType:{Name:HelloWorldActivities_say}, + TaskList:{Name:HelloWorldTaskList}, + Input:["1: Hello World!"], + ScheduleToCloseTimeoutSeconds:300, + ScheduleToStartTimeoutSeconds:300, + StartToCloseTimeoutSeconds:300, + HeartbeatTimeoutSeconds:300, + DecisionTaskCompletedEventId:4} + 6 ActivityTaskStarted {ScheduledEventId:5, + Identity:37784@maxim-C02XD0AAJGH6, + RequestId:a646d5d2-566f-4f43-92d7-6689139ce944, + Attempt:0} + 7 ActivityTaskCompleted {Result:[], ScheduledEventId:5, + StartedEventId:6, + Identity:37784@maxim-C02XD0AAJGH6} + 8 DecisionTaskScheduled {TaskList:{Name:maxim-C02XD0AAJGH6:fd3a85ed-752d-4662-a49d-2665b7667c8a}, + StartToCloseTimeoutSeconds:10, Attempt:0} + 9 DecisionTaskStarted {ScheduledEventId:8, + Identity:fd3a85ed-752d-4662-a49d-2665b7667c8a, + RequestId:601ef30a-0d1b-4400-b034-65b8328ad34c} + 10 DecisionTaskCompleted {ExecutionContext:[], + ScheduledEventId:8, + StartedEventId:9, + Identity:37694@maxim-C02XD0AAJGH6} +``` + +_ActivityTaskStarted_ event is recorded when the activity task is picked up by an activity worker. The Identity field +contains the Id of the worker (you can set it to any value on worker startup). + +_ActivityTaskCompleted_ event is recorded when activity completes. It contains the result of the activity execution. + +Let's look at various failure scenarios. Modify activity task timeout: + +```java + @ActivityInterface + public interface HelloWorldActivities { + @ActivityMethod(scheduleToCloseTimeoutSeconds = 100) + void say(String message); + } + + public class HelloWordActivitiesImpl implements HelloWorldActivities { + private final PrintStream out; + + public HelloWordActivitiesImpl(PrintStream out) { + this.out = out; + } + + @Override + public void say(String message) { + out.println(message); + } + } +``` + +(To be continued ...) diff --git a/versioned_docs/version-0.25.0/java-starting-workflow-executions.md b/versioned_docs/version-0.25.0/java-starting-workflow-executions.md new file mode 100644 index 0000000000..1da660627a --- /dev/null +++ b/versioned_docs/version-0.25.0/java-starting-workflow-executions.md @@ -0,0 +1,49 @@ +--- +id: java-starting-workflow-executions +title: Starting workflow executions +--- + +A workflow interface that executes a workflow requires initializing a `WorkflowClient` instance, creating +a client side stub to the workflow, and then calling a method annotated with @WorkflowMethod. + +```java + // service and client are heavyweight objects that should be created once per process lifetime. + WorkflowServiceStubs service = WorkflowServiceStubs.newInstance(); + WorkflowClient client = WorkflowClient.newInstance(service); + // Create a new workflow stub per each workflow start + FileProcessingWorkflow workflow = workflowClient.newWorkflowStub(FileProcessingWorkflow.class); +``` + +There are two ways to start workflow execution: asynchronously and synchronously. Asynchronous start initiates a workflow execution and immediately returns to the caller. This is the most common way to start workflows in production code. Synchronous invocation starts a workflow +and then waits for its completion. If the process that started the workflow crashes or stops waiting, the workflow continues executing. +Because workflows are potentially long running, and crashes of clients happen, this is not very commonly found in production use. + +Asynchronous start: +```java +// Returns as soon as the workflow starts. +WorkflowExecution workflowExecution = WorkflowClient.start(workflow::processFile, workflowArgs); + +System.out.println("Started process file workflow with workflowId=\"" + workflowExecution.getWorkflowId() + + "\" and runId=\"" + workflowExecution.getRunId() + "\""); +``` + +Synchronous start: +```java +// Start a workflow and then wait for a result. +// Note that if the waiting process is killed, the workflow will continue execution. +String result = workflow.processFile(workflowArgs); +``` + +If you need to wait for a workflow completion after an asynchronous start, the most straightforward way +is to call the blocking version again. If `WorkflowOptions.WorkflowIdReusePolicy` is not `AllowDuplicate`, then instead +of throwing `DuplicateWorkflowException`, it reconnects to an existing workflow and waits for its completion. +The following example shows how to do this from a different process than the one that started the workflow. All this process +needs is a `WorkflowId`. + +```java +WorkflowExecution execution = new WorkflowExecution().setWorkflowId(workflowId); +FileProcessingWorkflow workflow = workflowClient.newWorkflowStub(execution); +// Returns result potentially waiting for workflow to complete. +String result = workflow.processFile(workflowArgs); +``` + diff --git a/versioned_docs/version-0.25.0/java-versioning.md b/versioned_docs/version-0.25.0/java-versioning.md new file mode 100644 index 0000000000..71899d8ec2 --- /dev/null +++ b/versioned_docs/version-0.25.0/java-versioning.md @@ -0,0 +1,92 @@ +--- +id: java-versioning +title: Versioning +--- + +As outlined in the _Workflow Implementation Constraints_ section, workflow code has to be deterministic by taking the same +code path when replaying history events. Any workflow code change that affects the order in which decisions are generated breaks +this assumption. The solution that allows updating code of already running workflows is to keep both the old and new code. +When replaying, use the code version that the events were generated with and when executing a new code path, always take the +new code. + +Use the `Workflow.getVersion` function to return a version of the code that should be executed and then use the returned +value to pick a correct branch. Let's look at an example. + +```java +public void processFile(Arguments args) { + String localName = null; + String processedName = null; + try { + localName = activities.download(args.getSourceBucketName(), args.getSourceFilename()); + processedName = activities.processFile(localName); + activities.upload(args.getTargetBucketName(), args.getTargetFilename(), processedName); + } finally { + if (localName != null) { // File was downloaded. + activities.deleteLocalFile(localName); + } + if (processedName != null) { // File was processed. + activities.deleteLocalFile(processedName); + } + } +} +``` + +Now we decide to calculate the processed file checksum and pass it to upload. +The correct way to implement this change is: + +```java +public void processFile(Arguments args) { + String localName = null; + String processedName = null; + try { + localName = activities.download(args.getSourceBucketName(), args.getSourceFilename()); + processedName = activities.processFile(localName); + int version = Workflow.getVersion("checksumAdded", Workflow.DEFAULT_VERSION, 1); + if (version == Workflow.DEFAULT_VERSION) { + activities.upload(args.getTargetBucketName(), args.getTargetFilename(), processedName); + } else { + long checksum = activities.calculateChecksum(processedName); + activities.uploadWithChecksum( + args.getTargetBucketName(), args.getTargetFilename(), processedName, checksum); + } + } finally { + if (localName != null) { // File was downloaded. + activities.deleteLocalFile(localName); + } + if (processedName != null) { // File was processed. + activities.deleteLocalFile(processedName); + } + } +} +``` + +Later, when all workflows that use the old version are completed, the old branch can be removed. + +```java +public void processFile(Arguments args) { + String localName = null; + String processedName = null; + try { + localName = activities.download(args.getSourceBucketName(), args.getSourceFilename()); + processedName = activities.processFile(localName); + // getVersion call is left here to ensure that any attempt to replay history + // for a different version fails. It can be removed later when there is no possibility + // of this happening. + Workflow.getVersion("checksumAdded", 1, 1); + long checksum = activities.calculateChecksum(processedName); + activities.uploadWithChecksum( + args.getTargetBucketName(), args.getTargetFilename(), processedName, checksum); + } finally { + if (localName != null) { // File was downloaded. + activities.deleteLocalFile(localName); + } + if (processedName != null) { // File was processed. + activities.deleteLocalFile(processedName); + } + } +} +``` + +The Id that is passed to the `getVersion` call identifies the change. Each change is expected to have its own Id. But if +a change spawns multiple places in the workflow code and the new code should be either executed in all of them or +in none of them, then they have to share the Id. diff --git a/versioned_docs/version-0.25.0/java-workflow-interface.md b/versioned_docs/version-0.25.0/java-workflow-interface.md new file mode 100644 index 0000000000..dd4d65eaf8 --- /dev/null +++ b/versioned_docs/version-0.25.0/java-workflow-interface.md @@ -0,0 +1,119 @@ +--- +id: java-workflow-interface +title: Workflow Interface +--- + +Workflow encapsulates the orchestration of activities and child workflows. +It can also answer synchronous queries and receive external events (also known as signals). + +A workflow must define an interface class. A workflow interface class must be annotated with `@WorkflowInterface`. +All of its methods must have one of the following annotations: + +- **@WorkflowMethod** indicates an entry point to a workflow. It contains parameters such as timeouts and a task list. + Required parameters (such as `executionStartToCloseTimeoutSeconds`) that are not specified through the annotation must be provided at runtime. +- **@SignalMethod** indicates a method that reacts to external signals. It must have a `void` return type. +- **@QueryMethod** indicates a method that reacts to synchronous query requests. It must have a non `void` return type. + +You can have more than one method with the same annotation (except @WorkflowMethod). For example: +```java +@WorkflowInterface +public interface FileProcessingWorkflow { + + @WorkflowMethod + String processFile(Arguments args); + + @QueryMethod(name="history") + List getHistory(); + + @QueryMethod(name="status") + String getStatus(); + + @SignalMethod + void retryNow(); + + @SignalMethod + void abandon(); +} +``` +Note that name parameter of workflow method annotations can be used to specify name of workflow, signal and query types. +If name is not specified the short name of the workflow interface separated by underscore with the method name is used. +In the above code the @WorkflowMethod.name is not specified, thus the workflow type defaults to `"FileProcessingWorkflow_processFile"`. + +We recommended that you use a single value type argument for all types of workflow methods. +This way, adding new arguments as fields to the value type is a backwards-compatible change. + +# Workflow Interface Inheritance + +Workflow interfaces can form inheritance hierarchies. It may be useful for creating components reusable across multiple +workflow types. For example imaging a UI or CLI button that allows to call `retryNow` signal on any workflow. To implement +this feature you can redesign the above interface to: +```java +@WorkflowInterface +public interface Retryable { + @SignalMethod + void retryNow(); +} + +@WorkflowInterface +public interface FileProcessingWorkflow extends Retryable { + + @WorkflowMethod + String processFile(Arguments args); + + @QueryMethod(name="history") + List getHistory(); + + @QueryMethod(name="status") + String getStatus(); + + @SignalMethod + void abandon(); +} +``` +Then some other workflow can implement it as well: +```java +@WorkflowInterface +public interface MediaProcessingWorkflow extends Retryable { + @WorkflowMethod + String processBlob(Arguments args); +} +``` +Then it would be possible to send signal to both of them using the Retryable interface only: +```java +Retryable r = client.newWorkflowStab(Retryable.class, workflowId); +r.retryNow(); +``` +The same technique can be used to query workflows through a base interface. + +Note that an attempt to start workflow through a base interface annotated with `@WorkflowInterface` is not going to work. +Let's look at the following **invalid** example: +```java +// INVALID CODE! +@WorkflowInterface +public interface BaseWorkflow { + @WorkflowMethod + void retryNow(); +} + +@WorkflowInterface +public interface Workflow1 extends BaseWorkflow {} + +@WorkflowInterface +public interface Workflow2 extends BaseWorkflow {} +``` +An attempt to register implementations of Workflow1 and Workflow2 are going to fail as they are going to use the same +workflow type. The type is defined by the type of the class which is annotated with @WorkflowInterface. In this case `BaseWorkflow`. +The solution is to remove @WorkflowInterface annotation from BaseWorkflow. The following is valid code: +```java +public interface BaseWorkflow { + @WorkflowMethod + void retryNow(); +} + +@WorkflowInterface +public interface Workflow1 extends BaseWorkflow {} + +@WorkflowInterface +public interface Workflow2 extends BaseWorkflow {} +``` +Implementations of Workflow1 and Workflow2 can registered with the same worker as they will have types defined by their interfaces. diff --git a/versioned_docs/version-0.25.0/learn-activities.md b/versioned_docs/version-0.25.0/learn-activities.md new file mode 100644 index 0000000000..d5da7eb94c --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-activities.md @@ -0,0 +1,83 @@ +--- +id: learn-activities +title: Activities +--- + +Fault-oblivious stateful workflow code is the core abstraction of Temporal. But, due to deterministic execution requirements, they are not allowed to call any external API directly. +Instead they orchestrate execution of activities. In its simplest form, a Temporal activity is a function or an object method in one of the supported languages. +Temporal does not recover activity state in case of failures. Therefore an activity function is allowed to contain any code without restrictions. + +Activities are invoked asynchronously though task lists. A task list is essentially a queue used to store an activity task until it is picked up by an available worker. The worker processes an activity by invoking its implementation function. When the function returns, the worker reports the result back to the Temporal service which in turn notifies the workflow about completion. It is possible to implement an activity fully asynchronously by completing it from a different process. + +## Timeouts + +Temporal does not impose any system limit on activity duration. It is up to the application to choose the timeouts for its execution. These are the configurable activity timeouts: + +- `ScheduleToStart` is the maximum time from a workflow requesting activity execution to a worker starting its execution. The usual reason for this timeout to fire is all workers being down or not being able to keep up with the request rate. We recommend setting this timeout to the maximum time a workflow is willing to wait for an activity execution in the presence of all possible worker outages. +- `StartToClose` is the maximum time an activity can execute after it was picked by a worker. +- `ScheduleToClose` is the maximum time from the workflow requesting an activity execution to its completion. +- `Heartbeat` is the maximum time between heartbeat requests. See [Long Running Activities](#long-running-activities). + +Either `ScheduleToClose` or both `ScheduleToStart` and `StartToClose` timeouts are required. + +## Retries + +As Temporal doesn't recover an activity's state and they can communicate to any external system, failures are expected. Therefore, Temporal supports automatic activity retries. Any activity when invoked can have an associated retry policy. Here are the retry policy parameters: + +- `InitialInterval` is a delay before the first retry. +- `BackoffCoefficient`. Retry policies are exponential. The coefficient specifies how fast the retry interval is growing. The coefficient of 1 means that the retry interval is always equal to the `InitialInterval`. +- `MaximumInterval` specifies the maximum interval between retries. Useful for coefficients more than 1. +- `MaximumAttempts` specifies how many times to attempt to execute an activity in the presence of failures. If this limit is exceeded, the error is returned back to the workflow that invoked the activity. +- `NonRetryableErrorReasons` allows you to specify errors that shouldn't be retried. For example retrying invalid arguments error doesn't make sense in some scenarios. + +There are scenarios when not a single activity but rather the whole part of a workflow should be retried on failure. For example, a media encoding workflow that downloads a file to a host, processes it, and then uploads the result back to storage. In this workflow, if the host that hosts the worker dies, all three activities should be retried on a different host. Such retries should be handled by the workflow code as they are very use case specific. + +## Long Running Activities + +For long running activities, we recommended that you specify a relatively short heartbeat timeout and constantly heartbeat. This way worker failures for even very long running activities can be handled in a timely manner. An activity that specifies the heartbeat timeout is expected to call the heartbeat method _periodically_ from its implementation. + +A heartbeat request can include application specific payload. This is useful to save activity execution progress. If an activity times out due to a missed heartbeat, the next attempt to execute it can access that progress and continue its execution from that point. + +Long running activities can be used as a special case of leader election. Temporal timeouts use second resolution. So it is not a solution for realtime applications. But if it is okay to react to the process failure within a few seconds, then a Temporal heartbeat activity is a good fit. + +One common use case for such leader election is monitoring. An activity executes an internal loop that periodically polls some API and checks for some condition. It also heartbeats on every iteration. If the condition is satisfied, the activity completes which lets its workflow to handle it. If the activity worker dies, the activity times out after the heartbeat interval is exceeded and is retried on a different worker. The same pattern works for polling for new files in Amazon S3 buckets or responses in REST or other synchronous APIs. + +## Cancellation + +A workflow can request an activity cancellation. Currently the only way for an activity to learn that it was cancelled is through heart beating. The heartbeat request fails with a special error indicating that the activity was cancelled. Then it is up to the activity implementation to perform all the necessary cleanup and report that it is done with it. It is up to the workflow implementation to decide if it wants to wait for the activity cancellation confirmation or just proceed without waiting. + +Another common case for activity heartbeat failure is that the workflow that invoked it is in a completed state. In this case an activity is expected to perform cleanup as well. + +## Activity Task Routing through Task Lists + +Activities are dispatched to workers through task lists. Task lists are queues that workers listen on. Task lists are highly dynamic and lightweight. They don't need to be explicitly registered. And it is okay to have one task list per worker process. It is normal to have more than one activity type to be invoked through a single task list. And it is normal in some cases (like host routing) to invoke the same activity type on multiple task lists. + +Here are some use cases for employing multiple activity task lists in a single workflow: + +- _Flow control_. A worker that consumes from a task list asks for an activity task only when it has available capacity. So workers are never overloaded by request spikes. If activity executions are requested faster than workers can process them, they are backlogged in the task list. +- _Throttling_. Each activity worker can specify the maximum rate it is allowed to processes activities on a task list. It does not exceed this limit even if it has spare capacity. There is also support for global task list rate limiting. This limit works across all workers for the given task list. It is frequently used to limit load on a downstream service that an activity calls into. +- _Deploying a set of activities independently_. Think about a service that hosts activities and can be deployed independently from other activities and workflows. To send activity tasks to this service, a separate task list is needed. +- _Workers with different capabilities_. For example, workers on GPU boxes vs non GPU boxes. Having two separate task lists in this case allows workflows to pick which one to send activity an execution request to. +- _Routing activity to a specific host_. For example, in the media encoding case the transform and upload activity have to run on the same host as the download one. +- _Routing activity to a specific process_. For example, some activities load large data sets and caches it in the process. The activities that rely on this data set should be routed to the same process. +- _Multiple priorities_. One task list per priority and having a worker pool per priority. +- _Versioning_. A new backwards incompatible implementation of an activity might use a different task list. + +## Asynchronous Activity Completion + +By default an activity is a function or a method depending on a client side library language. As soon as the function returns, an activity completes. But in some cases an activity implementation is asynchronous. For example it is forwarded to an external system through a message queue. And the reply comes through a different queue. + +To support such use cases, Temporal allows activity implementations that do not complete upon activity function completions. A separate API should be used in this case to complete the activity. This API can be called from any process, even in a different programming language, that the original activity worker used. + +## Local Activities + +Some of the activities are very short lived and do not need the queing semantic, flow control, rate limiting and routing capabilities. For these Temporal supports so called _local activity_ feature. Local activities are executed in the same worker process as the workflow that invoked them. Consider using local activities for functions that are: + +* no longer than a few seconds +* do not require global rate limiting +* do not require routing to specific workers or pools of workers +* can be implemented in the same binary as the workflow that invokes them + +The main benefit of local activities is that they are much more efficient in utilizing Temporal service resources and have much lower latency overhead comparing to the usual activity invocation. + + diff --git a/versioned_docs/version-0.25.0/learn-archival.md b/versioned_docs/version-0.25.0/learn-archival.md new file mode 100644 index 0000000000..45e25b79b1 --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-archival.md @@ -0,0 +1,73 @@ +--- +id: learn-archival +title: Archival +--- + +Archival is a feature that automatically moves workflow histories from persistence to another location after the retention period. The purpose of archival is to be able to keep histories as long as needed while not overwhelming the persistence store. There are two reasons you may want to keep the histories after the retention period has past: +1. **Compliance:** For legal reasons histories may need to be stored for a long period of time. +2. **Debugging:** Old histories can still be accessed for debugging. + +Archival is still in beta and there are three limits to its feature set: +1. **Only Histories:** Only histories are archived, visibility records are simply deleted after the retention period. +2. **RunId Required:** In order to access an archived history, both workflowId and runId are required. +3. **Best Effort:** There are cases in which a history can be deleted from persistence without being archived first. These cases are rare but are possible with the current state of archival. + +Work is being prioritized on archival to eliminate these limitations. + +## Concepts + +- **Archiver:** Archiver is the component responsible for archiving and retrieving workflow histories. Its interface is quite generic and supports different kinds of archival locations: local file system, S3, Kafka, etc. Check [this README](https://github.com/temporalio/temporal/blob/master/common/archiver/README.md) for how to add a new archiver implementation. +- **URI:** An URI is used to specify the archival location. Based on the scheme part of an URI, the corresponding archiver will be selected by the system to perform archival. + +## Configuring Archival + +Archival is controlled by both namespace level config and cluster level config. + +### Cluster Archival Config + +A Temporal cluster can be in one of three archival states: + * **Disabled:** No archivals will occur and the archivers will be not initialized on startup. + * **Paused:** This state is not yet implemented. Currently setting cluster to paused is the same as setting it to disabled. + * **Enabled:** Archivals will occur. + +Enabling the cluster for archival simply means histories are being archived. There is another config which controls whether histories can be accessed from archival. Both these configs have defaults defined in static yaml, and have dynamic config overwrites. Note, however, dynamic config will take effect only when archival is enabled in static yaml. + +### Namespace Archival Config + +A namespace includes two pieces of archival related config: + * **Status:** Either enabled or disabled. If a namespace is in the disabled state no archivals will occur for that namespace. + A namespace can safely switch between statuses. + * **URI:** The scheme and location where histories will be archived to. When a namespace enables archival for the first time URI is set and can never be mutated. If URI is not specified when first enabling a namespace for archival, a default URI from static config will be used. + +## Running Locally + +In order to run locally do the following: +1. `./temporal-server start` +2. `./tctl --ns samples-namespace namespace register --gd false --history_archival_status enabled --retention 0` +3. Run the [helloworld temporal sample](https://github.com/temporalio/temporal-go-samples/tree/master/helloworld) by following the README +4. Copy the workflowId and runId of the completed workflow from log output +5. `./temporal --ns samples-namespace wf show --wid --rid ` + +In step 2, we registered a new namespace and enabled history archival feature for that namespace. Since we didn't provide an archival URI when registering the new namespace, the default URI specified in `config/development.yaml` is used. The default URI is `file:///tmp/temporal_archival/development`, so you can find the archived workflow history under the `/tmp/temporal_archival/development` directory. + +## FAQ + +### How does archival interact with global namespaces? +When archival occurs it will first run on the active side and some time later it will run on the standby side as well. +Before uploading history a check is done to see if it has already been uploaded, if so it is not re-uploaded. + +### Can I specify multiple archival URIs? +No, each namespace can only have one URI for history archival and one URI for visibility archival. Different namespaces, however, can have different URIs (with different schemes). + +### How does archival work with PII? +No temporal workflow should ever operate on clear text PII. Temporal can be thought +of as a database and just as one would not store PII in a database PII should not be +stored in Temporal. This is even more important when archival is enabled because +these histories can be kept forever. + +## Planned Future Work +* Support archival of visibility. +* Support accessing histories without providing runId. +* Provide hard guarantee that no history is deleted from persistence before being archived if archival is enabled. +* Implement paused state. In this state no archivals will occur but histories also will not be deleted from persistence. +Once enabled again from paused state, all skipped archivals will occur. diff --git a/versioned_docs/version-0.25.0/learn-cli.md b/versioned_docs/version-0.25.0/learn-cli.md new file mode 100644 index 0000000000..74bdeb6ee4 --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-cli.md @@ -0,0 +1,316 @@ +--- +id: learn-cli +title: Command Line Interface +--- + +The Temporal CLI is a command-line tool you can use to perform various tasks on a Temporal server. It can perform +namespace operations such as register, update, and describe as well as workflow operations like start +workflow, show workflow history, and signal workflow. + +## Using the CLI + +The Temporal CLI can be used directly from the Docker Hub image _temporalio/tctl_ or by building the CLI tool +locally. + +Example of using the docker image to describe a namespace: + +``` +docker run --rm temporalio/tctl:0.25.0 --namespace samples-namespace namespace describe +``` + +On Docker versions 18.03 and later, you may get a "connection refused" error. You can work around this by setting the host to "host.docker.internal" (see [here](https://docs.docker.com/docker-for-mac/networking/#use-cases-and-workarounds) for more info). + +``` +docker run --network=host --rm temporalio/tctl:0.25.0 --namespace samples-namespace namespace describe +``` + +To build the CLI tool locally, clone the [Temporal server repo](https://github.com/temporalio/temporal) and run +`make bins`. This produces an executable called `tctl`. With a local build, the same command to +describe a namespace would look like this: + +``` +./tctl --namespace samples-namespace namespace describe +``` + +The example commands below will use `./tctl` for brevity. + +## Environment variables + +Setting environment variables for repeated parameters can shorten the CLI commands. + +- **TEMPORAL_CLI_ADDRESS** - host:port for Temporal frontend service, the default is for the local server +- **TEMPORAL_CLI_NAMESPACE** - default workflow namespace, so you don't need to specify `--namespace` + +## Quick Start + +Run `./tctl` for help on top level commands and global options +Run `./tctl namespace` for help on namespace operations +Run `./tctl workflow` for help on workflow operations +Run `./tctl tasklist` for help on tasklist operations +(`./tctl help`, `./tctl help [namespace|workflow]` will also print help messages) + +**Note:** make sure you have a Temporal server running before using CLI + +### Namespace operation examples + +- Register a new namespace named "samples-namespace": + +``` +./tctl --namespace samples-namespace namespace register --global_namespace false +# OR using short alias +./tctl --ns samples-namespace n re --gd false +``` + +- View "samples-namespace" details: + +``` +./tctl --namespace samples-namespace namespace describe +``` + +### Workflow operation examples + +The following examples assume the TEMPORAL_CLI_NAMESPACE environment variable is set. + +#### Run workflow + +Start a workflow and see its progress. This command doesn't finish until workflow completes. + +``` +./tctl workflow run --tl helloWorldGroup --wt main.Workflow --et 60 -i '"temporal"' + +# view help messages for workflow run +./tctl workflow run -h +``` + +Brief explanation: +To run a workflow, the user must specify the following: + +1. Tasklist name (--tl) +2. Workflow type (--wt) +3. Execution start to close timeout in seconds (--et) +4. Input in JSON format (--i) (optional) + +example above uses [this sample workflow](https://github.com/temporalio/temporal-go-samples/blob/master/cmd/samples/recipes/helloworld/helloworld_workflow.go) +and takes a string as input with the `-i '"temporal"'` parameter. Single quotes (`''`) are used to wrap input as JSON. + +**Note:** You need to start the worker so that the workflow can make progress. +(Run `make && ./bin/helloworld -m worker` in temporal-go-samples to start the worker) + +#### Show running workers of a tasklist + +``` +./tctl tasklist desc --tl helloWorldGroup +``` + +#### Start workflow + +``` +./tctl workflow start --tl helloWorldGroup --wt main.Workflow --et 60 -i '"temporal"' + +# view help messages for workflow start +./tctl workflow start -h + +# for a workflow with multiple inputs, separate each json with space/newline like +./tctl workflow start --tl helloWorldGroup --wt main.WorkflowWith3Args --et 60 -i '"your_input_string" 123 {"Name":"my-string", "Age":12345}' +``` + +The workflow `start` command is similar to the `run` command, but immediately returns the workflow_id and +run_id after starting the workflow. Use the `show` command to view the workflow's history/progress. + +##### Reuse the same workflow id when starting/running a workflow + +Use option `--workflowidreusepolicy` or `--wrp` to configure the workflow id reuse policy. +**Option 0 AllowDuplicateFailedOnly:** Allow starting a workflow execution using the same workflow Id when a workflow with the same workflow Id is not already running and the last execution close state is one of _[terminated, cancelled, timedout, failed]_. +**Option 1 AllowDuplicate:** Allow starting a workflow execution using the same workflow Id when a workflow with the same workflow Id is not already running. +**Option 2 RejectDuplicate:** Do not allow starting a workflow execution using the same workflow Id as a previous workflow. + +``` +# use AllowDuplicateFailedOnly option to start a workflow +./tctl workflow start --tl helloWorldGroup --wt main.Workflow --et 60 -i '"temporal"' --wid "" --wrp 0 + +# use AllowDuplicate option to run a workflow +./tctl workflow run --tl helloWorldGroup --wt main.Workflow --et 60 -i '"temporal"' --wid "" --wrp 1 +``` + +##### Start a workflow with a memo + +Memos are immutable key/value pairs that can be attached to a workflow run when starting the workflow. These are +visible when listing workflows. More information on memos can be found +[here](/docs/learn-workflow-filtering#memo-vs-search-attributes). + +``` +tctl wf start -tl helloWorldGroup -wt main.Workflow -et 60 -i '"temporal"' -memo_key ‘“Service” “Env” “Instance”’ -memo ‘“serverName1” “test” 5’ +``` + +#### Show workflow history + +``` +./tctl workflow show -w 3ea6b242-b23c-4279-bb13-f215661b4717 -r 866ae14c-88cf-4f1e-980f-571e031d71b0 +# a shortcut of this is (without -w -r flag) +./tctl workflow showid 3ea6b242-b23c-4279-bb13-f215661b4717 866ae14c-88cf-4f1e-980f-571e031d71b0 + +# if run_id is not provided, it will show the latest run history of that workflow_id +./tctl workflow show -w 3ea6b242-b23c-4279-bb13-f215661b4717 +# a shortcut of this is +./tctl workflow showid 3ea6b242-b23c-4279-bb13-f215661b4717 +``` + +#### Show workflow execution information + +``` +./tctl workflow describe -w 3ea6b242-b23c-4279-bb13-f215661b4717 -r 866ae14c-88cf-4f1e-980f-571e031d71b0 +# a shortcut of this is (without -w -r flag) +./tctl workflow describeid 3ea6b242-b23c-4279-bb13-f215661b4717 866ae14c-88cf-4f1e-980f-571e031d71b0 + +# if run_id is not provided, it will show the latest workflow execution of that workflow_id +./tctl workflow describe -w 3ea6b242-b23c-4279-bb13-f215661b4717 +# a shortcut of this is +./tctl workflow describeid 3ea6b242-b23c-4279-bb13-f215661b4717 +``` + +#### List closed or open workflow executions + +``` +./tctl workflow list + +# default will only show one page, to view more items, use --more flag +./tctl workflow list -m +``` + +Use **--query** to list workflows with SQL like query: + +``` +./tctl workflow list --query "WorkflowType='main.SampleParentWorkflow' AND CloseTime = missing " +``` + +This will return all open workflows with workflowType as "main.SampleParentWorkflow". + +#### Query workflow execution + +``` +# use custom query type +./tctl workflow query -w -r --qt + +# use build-in query type "__stack_trace" which is supported by Temporal SDK +./tctl workflow query -w -r --qt __stack_trace +# a shortcut to query using __stack_trace is (without --qt flag) +./tctl workflow stack -w -r +``` + +#### Signal, cancel, terminate workflow + +``` +# signal +./tctl workflow signal -w -r -n -i '"signal-value"' + +# cancel +./tctl workflow cancel -w -r + +# terminate +./tctl workflow terminate -w -r --reason +``` + +Terminating a running workflow execution will record a WorkflowExecutionTerminated event as the closing event in the history. No more decision tasks will be scheduled for a terminated workflow execution. +Canceling a running workflow execution will record a WorkflowExecutionCancelRequested event in the history, and a new decision task will be scheduled. The workflow has a chance to do some clean up work after cancellation. + +#### Signal, cancel, terminate workflows as a batch job + +Batch job is based on List Workflow Query(**--query**). It supports signal, cancel and terminate as batch job type. +For terminating workflows as batch job, it will terminte the children recursively. + +Start a batch job(using signal as batch type): + +``` +tctl --ns samples-namespace wf batch start --query "WorkflowType='main.SampleParentWorkflow' AND CloseTime=missing" --reason "test" --bt signal --sig testname +This batch job will be operating on 5 workflows. +Please confirm[Yes/No]:yes +{ + "jobId": "", + "msg": "batch job is started" +} + +``` + +You need to remember the JobId or use List command to get all your batch jobs: + +``` +tctl --ns samples-namespace wf batch list +``` + +Describe the progress of a batch job: + +``` +tctl --ns samples-namespace wf batch desc -jid +``` + +Terminate a batch job: + +``` +tctl --ns samples-namespace wf batch terminate -jid +``` + +Note that the operation performed by a batch will not be rolled back by terminating the batch. However, you can use reset to rollback your workflows. + +#### Restart, reset workflow + +The Reset command allows resetting a workflow to a particular point and continue running from there. +There are a lot of use cases: + +- Rerun a failed workflow from the beginning with the same start parameters. +- Rerun a failed workflow from the failing point without losing the achieved progress(history). +- After deploying new code, reset an open workflow to let the workflow run to different flows. + +You can reset to some predefined event types: + +``` +./tctl workflow reset -w -r --reset_type --reason "some_reason" +``` + +- FirstDecisionCompleted: reset to the beginning of the history. +- LastDecisionCompleted: reset to the end of the history. +- LastContinuedAsNew: reset to the end of the history for the previous run. + +If you are familiar with the Temporal history event, You can also reset to any decision finish event by using: + +``` +./tctl workflow reset -w -r --event_id --reason "some_reason" +``` + +Some things to note: + +- When reset, a new run will be kicked off with the same workflowId. But if there is a running execution for the workflow(workflowId), the current run will be terminated. +- decision_finish_event_id is the Id of events of the type: DecisionTaskComplete/DecisionTaskFailed/DecisionTaskTimeout. +- To restart a workflow from the beginning, reset to the first decision task finish event. + +To reset multiple workflows, you can use batch reset command: + +``` +./tctl workflow reset-batch --input_file --reset_type --reason "some_reason" +``` + +#### Recovery from bad deployment -- auto-reset workflow + +If a bad deployment lets a workflow run into a wrong state, you might want to reset the workflow to the point that the bad deployment started to run. But usually it is not easy to find out all the workflows impacted, and every reset point for each workflow. In this case, auto-reset will automatically reset all the workflows given a bad deployment identifier. + +Let's get familiar with some concepts. Each deployment will have an identifier, we call it "**Binary Checksum**" as it is usually generated by the md5sum of a binary file. For a workflow, each binary checksum will be associated with an **auto-reset point**, which contains a **runId**, an **eventID**, and the **created_time** that binary/deployment made the first decision for the workflow. + +To find out which **binary checksum** of the bad deployment to reset, you should be aware of at least one workflow running into a bad state. Use the describe command with **--reset_points_only** option to show all the reset points: + +``` +./tctl wf desc -w --reset_points_only ++----------------------------------+--------------------------------+--------------------------------------+---------+ +| BINARY CHECKSUM | CREATE TIME | RUNID | EVENTID | ++----------------------------------+--------------------------------+--------------------------------------+---------+ +| c84c5afa552613a83294793f4e664a7f | 2019-05-24 10:01:00.398455019 | 2dd29ab7-2dd8-4668-83e0-89cae261cfb1 | 4 | +| aae748fdc557a3f873adbe1dd066713f | 2019-05-24 11:01:00.067691445 | d42d21b8-2adb-4313-b069-3837d44d6ce6 | 4 | +... +... +``` + +Then use this command to tell Temporal to auto-reset all workflows impacted by the bad deployment. The command will store the bad binary checksum into namespace info and trigger a process to reset all your workflows. + +``` +./tctl --ns namespace update --add_bad_binary aae748fdc557a3f873adbe1dd066713f --reason "rollback bad deployment" +``` + +As you add the bad binary checksum to your namespace, Temporal will not dispatch any decision tasks to the bad binary. So make sure that you have rolled back to a good deployment(or roll out new bits with bug fixes). Otherwise your workflow can't make any progress after auto-reset. diff --git a/versioned_docs/version-0.25.0/learn-cross-dc-replication.md b/versioned_docs/version-0.25.0/learn-cross-dc-replication.md new file mode 100644 index 0000000000..32d6546aed --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-cross-dc-replication.md @@ -0,0 +1,103 @@ +--- +id: learn-cross-dc-replication +title: Cross-DC Replication +--- + +The Temporal Global Namespace feature provides clients with the capability to continue their workflow execution from another +cluster in the event of a datacenter failover. Although you can configure a Global Namespace to be replicated to any number of +clusters, it is only considered active in a single cluster. + +## Global Namespaces Architecture +Temporal has introduced a new top level entity, Global Namespaces, which provides support for replication of workflow +execution across clusters. Client applications need to run workers polling on Activity/Decision tasks on all clusters. +Temporal will only dispatch tasks on the current active cluster; workers on the standby cluster will sit idle +until the Global Namespace is failed over. + +Because Temporal is a service that provides highly consistent semantics, we only allow external events like +**StartWorkflowExecution**, **SignalWorkflowExecution**, etc. on an active cluster. Global Namespaces relies on light-weight +transactions (paxos) on the local cluster (Local_Quorum) to update the workflow execution state and create replication +tasks which are applied asynchronously to replicate state across clusters. If an application makes these API calls on a +cluster where Global Namespace is in standby mode, Temporal will reject those calls with **NamespaceNotActiveError**, which +contains the name of the current active cluster. It is the responsibility of the application to forward the external +event to the cluster that is currently active. + +## New config for Global Namespaces + +### IsGlobal +This config is used to distinguish namespaces local to the cluster from the global namespace. It controls the creation of +replication tasks on updates allowing the state to be replicated across clusters. This is a read-only setting that can +only be set when the namespace is provisioned. + +### Clusters +A list of clusters where the namespace can fail over to, including the current active cluster. +This is also a read-only setting that can only be set when the namespace is provisioned. A re-replication feature on the +roadmap will allow updating this config to add/remove clusters in the future. + +### Active Cluster Name +Name of the current active cluster for the Global Namespace. This config is updated each time the Global Namespace is failed over to +another cluster. + +### Failover Version +Unique failover version which also represents the current active cluster for Global Namespace. Temporal allows failover to +be triggered from any cluster, so failover version is designed in a way to not allow conflicts if failover is mistakenly +triggered simultaneously on two clusters. + +## Conflict Resolution +Unlike local namespaces which provide at-most-once semantics for activity execution, Global Namespaces can only support at-least-once +semantics. Temporal XDC relies on asynchronous replication of events across clusters, so in the event of a failover +it is possible that activity gets dispatched again on the new active cluster due to a replication task lag. This also +means that whenever workflow execution is updated after a failover by the new cluster, any previous replication tasks +for that execution cannot be applied. This results in loss of some progress made by the workflow execution in the +previous active cluster. During such conflict resolution, Temporal re-injects any external events like Signals to the +new history before discarding replication tasks. Even though some progress could rollback during failovers, Temporal +provides the guarantee that workflows won’t get stuck and will continue to make forward progress. + +## Visibility API +All Visibility APIs are allowed on both active and standby clusters. This enables +[Temporal Web](https://github.com/temporalio/temporal-web) to work seamlessly for Global Namespaces as all visibility records for +workflow executions can be queried from any cluster the namespace is replicated to. Applications making API calls directly +to the Temporal Visibility API will continue to work even if a Global Namespace is in standby mode. However, they might see +a lag due to replication delay when querying the workflow execution state from a standby cluster. + +## CLI +The Temporal CLI can also be used to query the namespace config or perform failovers. Here are some useful commands. + +### Query Global Namespace +The following command can be used to describe Global Namespace metadata: + +```bash +$ tctl --ns temporal-canary-xdc n desc +Name: temporal-canary-xdc +Description: temporal canary cross dc testing namespace +OwnerEmail: temporal-dev@temporal.io +NamespaceData: +Status: REGISTERED +RetentionInDays: 7 +EmitMetrics: true +ActiveClusterName: dc1 +Clusters: dc1, dc2 +``` + +### Failover Global Namespace +The following command can be used to failover Global Namespace *my-namespace-global* to the *dc2* cluster: + +```bash +$ tctl --ns my-namespace-global n up --ac dc2 +``` + +## FAQ + +### What happens to outstanding activities after failover? +Temporal does not forward activity completions across clusters. Any outstanding activity will eventually timeout based +on the configuration. Your application should have retry logic in place so that the activity gets retried and dispatched +again to a worker after the failover to the new DC. Handling this is pretty much the same as activity timeout caused by +a worker restart even without Global Namespaces. + +### What happens when a start or signal API call is made to a standby cluster? +Temporal will reject the call and return **NamespaceNotActiveError**. It is the responsibility of the application to forward +the failed call to active cluster based on information provided in the error. + +### What is the recommended pattern to send external events to an active cluster? +The recommendation at this point is to publish events to a Kafka topic if they can be generated in any DC. +Then, have a consumer that consumes from the aggregated Kafka topic in the same DC and sends them to Temporal. Both the +Kafka consumer and Global Namespace need to be failed over together. diff --git a/versioned_docs/version-0.25.0/learn-events.md b/versioned_docs/version-0.25.0/learn-events.md new file mode 100644 index 0000000000..cae549293d --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-events.md @@ -0,0 +1,34 @@ +--- +id: learn-events +title: Events +--- + +## Event Handling + +Fault-oblivious stateful workflows can be _signalled_ about an external event. A signal is always point to point destined to a specific workflow instance. Signals are always processed in the order in which they are received. + +There are multiple scenarios for which signals are useful. + +## Event Aggregation and Correlation + +Temporal is not a replacement for generic stream processing engines like Apache Flink or Apache Spark. But in certain scenarios it is a better fit. For example, when all events that should be aggregated and correlated are always applied to to some business entity with a clear Id. And then when a certain condition is met, actions should be executed. + +The main limitation is that a single Temporal workflow has a pretty limited throughput, while the number of workflows is practically unlimited. So if you need to aggregate events per customer, and your application has 100 million customers and each customer doesn't generate more than 20 events per second, then Temporal would work fine. But if you want to aggregate all events for US customers then the rate of these events would be beyond the single workflow capacity. + +For example, an IoT device generates events and a certain sequence of events indicates that the device should be reprovisioned. A workflow instance per device would be created and each instance would manage the state machine of the device and execute reprovision activity when necessary. + +Another use case is a customer loyalty program. Every time a customer makes a purchase, an event is generated into Apache Kafka for downstream systems to process. A loyalty service Kafka consumer receives the event and signals a customer workflow about the purchase using the Temporal `signalWorkflowExecution` API. The workflow accumulates the count of the purchases. If a specified threshold is achieved, the workflow executes an activity that notifies some external service that the customer has reached the next level of loyalty program. The workflow also executes activities to periodically message the customer about their current status. + +## Human Tasks + +A lot of business processes involve human participants. The standard Temporal pattern for implementing an external interaction is to execute an activity that creates a human task in an external system. It can be an email with a form, or a record in some external database, or a mobile app notification. When a user changes the status of the task, a signal is sent to the corresponding workflow. For example, when the form is submitted, or a mobile app notification is acknowledged. Some tasks have multiple possible actions like claim, return, complete, reject. So multiple signals can be sent in relation to it. + +## Process Execution Alteration + +Some business processes should change their behavior if some external event has happened. For example, while executing an order shipment workflow, any change in item quantity could be delivered in a form of a signal. + +Another example is a service deployment workflow. While rolling out new software version to a Kubernetes cluster some problem was identified. A signal can be used to ask the workflow to pause while the problem is investigated. Then either a continue or a rollback signal can be used to execute the appropriate action. + +## Synchronization + +Temporal workflows are strongly consistent so they can be used as a synchronization point for executing actions. For example, there is a requirement that all messages for a single user are processed sequentially but the underlying messaging infrastructure can deliver them in parallel. The Temporal solution would be to have a workflow per user and signal it when an event is received. Then the workflow would buffer all signals in an internal data structure and then call an activity for every signal received. See the following [Stack Overflow answer](https://stackoverflow.com/a/56615120/1664318) for an example. diff --git a/versioned_docs/version-0.25.0/learn-glossary.md b/versioned_docs/version-0.25.0/learn-glossary.md new file mode 100644 index 0000000000..84ea6de80d --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-glossary.md @@ -0,0 +1,136 @@ +--- +id: learn-glossary +title: Glossary +--- + +This glossary contains terms that are used with the Temporal product. + +### Activity +A business-level function that implements your application logic such as calling +a service or transcoding a media file. An activity usually implements a single +well-defined action; it can be short or long running. An activity can be implemented +as a synchronous method or fully asynchronously involving multiple processes. +An activity can be retried indefinitely according to the provided exponential retry policy. +If for any reason an activity is not completed within the specified timeout, an error is reported to the workflow and the workflow decides how to handle it. There is no limit on potential activity +duration. + +### Activity Task +A task that contains an activity invocation information that is delivered to an [activity worker](#activity-worker) through and an [activity task list](#activity-task-list). An activity worker upon receiving activity task executes a correponding [activity](#activity) + +### Activity Task List +Task list that is used to deliver [activity tasks](#activity-task) to [activity workers](#activity-worker) + +### Activity Worker +An object that is executed in the client application and receives [activity tasks](#activity-task) from an [activity task list](#activity-task-list) it is subscribed to. Once task is received it invokes a correspondent activity. + +### Archival +Archival is a feature that automatically moves [histories](#event-history) from persistence to a blobstore after +the workflow retention period. The purpose of archival is to be able to keep histories as long as needed +while not overwhelming the persistence store. There are two reasons you may want +to keep the histories after the retention period has passed: +1. **Compliance:** For legal reasons, histories may need to be stored for a long period of time. +2. **Debugging:** Old histories can still be accessed for debugging. + +### CLI +Temporal command-line interface. + +### Client Stub +A client-side proxy used to make remote invocations to an entity that it +represents. For example, to start a workflow, a stub object that represents +this workflow is created through a special API. Then this stub is used to start, +query, or signal the corresponding workflow. + +The Go client doesn't use this. + +### Decision +Any action taken by the workflow durable function is called a decision. For example: +scheduling an activity, canceling a child workflow, or starting a timer. A [decision task](#decision-task) contains an optional list of decisions. Every decision is recorded in the [event history](#event-history) as an [event](#event). + +### Decision Task +Every time a new external event that might affect a workflow state is recorded, a decision task that contains it is added to a [decision-task-list](#decision-task-list) and then picked up by a workflow worker. After the new event is handled, the decision task is completed with a list of [decisions](#decision). +Note that handling of a decision task is usually very fast and is not related to duration +of operations that the workflow invokes. + +### Decision Task List +Task list that is used to deliver [decision tasks](#decision-task) to [workflow workers](#workflow-worker) + +### Event +An indivisible operation performed by your application. For example, +activity_task_started, task_failed, or timer_canceled. Events are recorded in the event history. + +### Event History +An append log of events for your application. History is durably persisted +by the Temporal service, enabling seamless recovery of your application state +from crashes or failures. It also serves as an audit log for debugging. + +### Local Activity + +A [local activity](/docs/learn-activities#local-activities) is an activity that is invoked directly in the same process by a workflow code. It consumes much less resources than a normal activity, but imposes a lot of limitations like low duration and lack of rate limiting. + +### Namespace +Temporal is backed by a multi tenant service. The unit of isolation is called a **namespace**. Each namespace acts as a namespace for task list names as well as workflow Ids. For example, when a workflow is started, it is started in a +specific namespace. Temporal guarantees a unique workflow Id within a namespace, and +supports running workflow executions to use the same workflow Id if they are in +different namespaces. Various configuration options like retention period or archival destination are configured per namespace as well through a special CRUD API or through the Temporal CLI. In the multi-cluster deployment, namespace is a unit of fail-over. Each namespace can only be active on a single Temporal cluster at a time. However, different namespaces can be active in different clusters and can fail-over independently. + +### Query +A synchronous (from the caller's point of view) operation that is used to +report a workflow state. Note that a query is inherently read only and cannot +affect a workflow state. + +### Run Id +A UUID that a Temporal service assigns to each workflow run. If allowed by +a configured policy, you might be able to re-execute a workflow, after it has +closed or failed, with the same *Workflow Id*. Each such re-execution is called +a run. *Run Id* is used to uniquely identify a run even if it shares a *Workflow Id* +with others. + +### Signal +An external asynchronous request to a workflow. It can be used to deliver +notifications or updates to a running workflow at any point in its existence. + +### Task +The context needed to execute a specific activity or workflow state transition. +There are two types of tasks: an [Activity task](#activity-task) and a [Decision task](#decision-task) +(aka workflow task). Note that a single activity execution corresponds to a single activity task, +while a workflow execution employs multiple decision tasks. + +### Task List +Common name for [activity task lists](#activity-task-list) and [decision task lists](#decision-task-list) + +### Task Token +A unique correlation Id for a Temporal activity. Activity completion calls take either task token +or Namespace, WorkflowId, ActivityId arguments. + +### Worker +Also known as a *worker service*. A service that hosts the workflow and +activity implementations. The worker polls the Temporal service for tasks, performs +those tasks, and communicates task execution results back to the Temporal service. +Worker services are developed, deployed, and operated by Temporal customers. + +### Workflow +A fault-oblivious stateful function that orchestrates activities. A *Workflow* has full control over +which activities are executed, and in which order. A *Workflow* must not affect +the external world directly, only through activities. What makes workflow code +a *Workflow* is that its state is preserved by Temporal. Therefore any failure +of a worker process that hosts the workflow code does not affect the workflow +execution. The *Workflow* continues as if these failures did not happen. At the +same time, activities can fail any moment for any reason. Because workflow code +is fully fault-oblivious, it is guaranteed to get notifications about activity +failures or timeouts and act accordingly. There is no limit on potential workflow +duration. + +### Workflow Execution +An instance of a *Workflow*. The instance can be in the process of executing +or it could have already completed execution. + +### Workflow Id +A unique identifier for a *Workflow Execution*. Temporal guarantees the +uniqueness of an Id within a namespace. An attempt to start a *Workflow* with a +duplicate Id results in an **already started** error. + +### Workflow Task +Synonym of the [Decision Task](#decision-task). + +### Workflow Worker +An object that is executed in the client application and receives [decision tasks](#decision-task) from an [decision task list](#decision-task-list) it is subscribed to. Once task is received it is handled by a correponding workflow. \ No newline at end of file diff --git a/versioned_docs/version-0.25.0/learn-queries.md b/versioned_docs/version-0.25.0/learn-queries.md new file mode 100644 index 0000000000..293852439a --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-queries.md @@ -0,0 +1,18 @@ +--- +id: learn-queries +title: Queries +--- + +## Synchronous Query + +Workflow code is stateful with the Temporal framework preserving it over various software and hardware failures. The state is constantly mutated during workflow execution. To expose this internal state to the external world Temporal provides a synchronous query feature. From the workflow implementer point of view the query is exposed as a synchronous callback that is invoked by external entities. Multiple such callbacks can be provided per workflow type exposing different information to different external systems. + +To execute a query an external client calls a synchronous Temporal API providing _namespace, workflowId, query name_ and optional _query arguments_. + +Query callbacks must be read-only not mutating the workflow state in any way. The other limitation is that the query callback cannot contain any blocking code. Both above limitations rule out ability to invoke activities from the query handlers. + +Temporal team is currently working on implementing _update_ feature that would be similar to query in the way it is invoked, but would support workflow state mutation and local activity invocations. + +## Stack Trace Query + +The Temporal client libraries expose some predefined queries out of the box. Currently the only supported built-in query is _stack_trace_. This query returns stacks of all workflow owned threads. This is a great way to troubleshoot any workflow in production. diff --git a/versioned_docs/version-0.25.0/learn-server-configuration.md b/versioned_docs/version-0.25.0/learn-server-configuration.md new file mode 100644 index 0000000000..81b55cf542 --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-server-configuration.md @@ -0,0 +1,359 @@ +--- +id: learn-server-configuration +title: Configuring Temporal +sidebar_label: Configuration +--- + +Temporal Server configuration is found in `development.yaml` and may contain the following possible sections: + +- [**global**](#global) +- [**persistence**](#persistence) +- [**log**](#log) +- [**clusterMetadata**](#clustermetadata) +- [**services**](#services) +- [**kafka**](#kafka) +- [**publicClient**](#publicclient) +- archival +- dcRedirectionPolicy +- dynamicConfigClient +- namespaceDefaults + +**Note:** Changing any properties in `development.yaml` file requires a process restart for changes to take effect. + +**Note:** If you'd like to dig deeper and see how we actually parse this file, see our source code [here](https://github.com/temporalio/temporal/blob/master/common/service/config/config.go). + +## global + +The `global` section contains process-wide configuration. See below for a minimal configuration (optional parameters are commented out.) + +```yaml +global: + membership: + name: temporal + #maxJoinDuration: 30s + #broadcastAddress: "127.0.0.1" + #pprof: + #port: 7936 + #tls: + #... + +``` + +### membership - *required* + +The `membership` section controls the following membership layer parameters: + +- `name` - *required* - used to identify other cluster members in the gossip ring. This must be the same for all nodes. +- `maxJoinDuration` - The amount of time the service will attempt to join the gossip layer before failing. +- `broadcastAddress` - Used as the address that is communicated to remote nodes to connect on. + - This is generally used when BindOnIP would be the same across several nodes (ie: 0.0.0.0) and for nat traversal scenarios. `net.ParseIP` controls the supported syntax. Note: Only IPV4 is supported. + +### pprof + +- `port` - If specified, this will initialize pprof upon process start on the listed port. + +### tls + +The `tls` section controls the SSL/TLS settings for network communication and contains two subsections, `internode` and `frontend`. The `internode` section governs internal service communication among roles where the `frontend` governs SDK client communication to the frontend service role. + +Each of these subsections contain a `server` section and a `client` section. The `server` contains the following parameters: + +- `certFile` - The path to the file containing the PEM-encoded public key of the certificate to use. +- `keyFile` - The path to the file containing the PEM-encoded private key of the certificate to use. +- `requireClientAuth` - *boolean* - Requires clients to authenticate with a certificate when connecting, otherwise known as mutual TLS. +- `clientCaFiles` - A list of paths to files containing the PEM-encoded public key of the Certificate Authorities you wish to trust for client authentication. This value is ignored if `requireClientAuth` is not enabled. + +Below is an example enabling Server TLS (https) between SDKs and the Frontend APIs: + +```yaml +global: + tls: + frontend: + server: + certFile: /path/to/public/cert + keyFile: /path/to/private/cert + client: + serverName: dnsSanInFrontendCertificate +``` + +Note, the `client` section generally needs to be provided to specify an expected DNS SubjectName contained in the presented server certificate via the `serverName` field; this is needed as Temporal uses IP to IP communication. You can avoid specifying this if your server certificates contain the appropriate IP Subject Alternative Names. + +Additionally, the `rootCaFiles` field needs to be provided when the client's host does not trust the Root CA used by the server. The example below extends the above example to manually specify the Root CA used by the frontend services: + +```yaml +global: + tls: + frontend: + server: + certFile: /path/to/public/cert + keyFile: /path/to/private/cert + client: + serverName: dnsSanInFrontendCertificate + rootCaFiles: + - /path/to/frontend/server/ca +``` + +Below is an additional example of a fully secured cluster using mutual TLS for both frontend and internode communication with manually specified Cas: + +```yaml +global: + tls: + internode: + server: + certFile: /path/to/internode/publicCert + keyFile: /path/to/internode/privCert + requireClientAuth: true + clientCaFiles: + - /path/to/internode/serverCa + client: + serverName: dnsSanInInternodeCertificate + rootCaFiles: + - /path/to/internode/serverCa + frontend: + server: + certFile: /path/to/frontend/publicCert + keyFile: /path/to/frontend/privCert + requireClientAuth: true + clientCaFiles: + - /path/to/internode/serverCa + - /path/to/sdkClientPool1/ca + - /path/to/sdkClientPool2/ca + client: + serverName: dnsSanInFrontendCertificate + rootCaFiles: + - /path/to/frontend/serverCa + +``` +**Note:** In the case that client authentication is enabled, the `internode.server` certificate is used as the client certificate among services. This adds the following requirements: + +- The `internode.server` certificate must be specified on all roles, even for a frontend-only configuration. +- Internode server certificates must be minted with either **no** Extended Key Usages or **both** ServerAuth and ClientAuth EKUs. +- If your Certificate Authorities are untrusted, such as in the previous example, the internode server Ca will need to be specified in the following places: + + - `internode.server.clientCaFiles` + - `internode.client.rootCaFiles` + - `frontend.server.clientCaFiles` + +## persistence +The `persistence` section holds configuration for the data store / persistence layer. Below is an example minimal specification for a password-secured Cassandra cluster. +```yaml +persistence: + defaultStore: default + visibilityStore: visibility + numHistoryShards: 512 + datastores: + default: + cassandra: + hosts: "127.0.0.1" + keyspace: "temporal" + user: "username" + password: "password" + visibility: + cassandra: + hosts: "127.0.0.1" + keyspace: "temporal_visibility" +``` + +The following top level configuration items are required: + +- `numHistoryShards` - *required* - the number of history shards to create when initializing the cluster. + - **Warning**: This value is immutable and will be ignored after the first run. Please ensure you set this value appropriately high enough to scale with the worst case peak load for this cluster. +- `defaultStore` - *required* - the name of the data store definition that should be used by the Temporal server. +- `visibilityStore` - *required* - the name of the data store definition that should be used by the Temporal visibility server. +- `datastores` - *required* - contains named data store definitions to be referenced. + - Each definition is defined with a heading declaring a name (ie: `default:` and `visibility:` above), which contains a data store definition. + - data store definitions must be either `cassandra` or `sql`. + +A `cassandra` data store definition can contain the following values: + +- `hosts` - *required* - a csv of Cassandra endpoints. +- `port` - default: 9042 - Cassandra port used for connection by `gocql` client. +- `user` - Cassandra user used for authentication by `gocql` client. +- `password` - Cassandra password used for authentication by `gocql` client. +- `keyspace` - *required* - the Cassandra keyspace. +- `datacenter` - the data center filter arg for Cassandra. +- `maxConns` - the max number of connections to this data store for a single TLS configuration. +- `tls` - See TLS below. + +A `sql` data store definition can contain the following values: + +- `user` - user used for authentication. +- `password` - password used for authentication. +- `pluginName` - *required* - SQL database type. + - *Valid values*: `mysql` or `postgres`. +- `databaseName` - *required* - the name of SQL database to connect to. +- `connectAddr` - *required* - the remote addr of the database. +- `connectProtocol` - *required* - the protocol that goes with the `connectAddr` + - *Valid values*: `tcp` or `unix` +- `connectAttributes` - a map of key-value attributes to be sent as part of connect `data_source_name` url. +- `maxConns` - the max number of connections to this data store. +- `maxIdleConns` - the max number of idle connections to this data store +- `maxConnLifetime` - is the maximum time a connection can be alive. +- `numShards` - number of storage shards to use for tables in a sharded sql database (*Default:* 1). +- `tls` - See below. + +`tls` sections may contain: +- `enabled` - *boolean*. +- `serverName` - name of the server hosting the data store. +- `certFile` - path to the cert file. +- `keyFile` - path to the key file. +- `caFile` - path to the ca file. +- `enableHostVerification` - *boolean* - `true` to verify the hostname and server cert (like a wildcard for Cassandra cluster). This option is basically the inverse of `InSecureSkipVerify`. See `InSecureSkipVerify` in http://golang.org/pkg/crypto/tls/ for more info. + +Note: `certFile` and `keyFile` are optional depending on server config, but both fields must be omitted to avoid using a client certificate. + +## log +The `log` section is optional and contains the following possible values: + +- `stdout` - *boolean* - `true` if the output needs to go to standard out. +- `level` - sets the logging level. + - *Valid values* - debug, info, warn, error or fatal. +- `outputFile` - path to output log file. + +## clusterMetadata + +`clusterMetadata` contains all cluster definitions, including those which participate in cross DC. + +An example `clusterMetadata` section: +```yaml +clusterMetadata: + enableGlobalNamespace: false + failoverVersionIncrement: 10 + masterClusterName: "active" + currentClusterName: "active" + clusterInformation: + active: + enabled: true + initialFailoverVersion: 0 + rpcAddress: "127.0.0.1:7233" + #replicationConsumer: + #type: kafka +``` +- `currentClusterName` - *required* - the name of the current cluster. **Warning**: This value is immutable and will be ignored after the first run. +- `enableGlobalNamespace` - *Default:* `false`. +- `replicationConsumerConfig` - determines which method to use to consume replication tasks. The type may be either `kafka` or `rpc`. +- `failoverVersionIncrement` - the increment of each cluster version when failover happens. +- `masterClusterName` - the master cluster name, only the master cluster can register/update namespace. All clusters can do namespace failover. +- `clusterInformation` - contains a map of cluster names to `ClusterInformation` definitions. `ClusterInformation` sections consist of: + - `enabled` - *boolean* + - `initialFailoverVersion` + - `rpcAddress` - indicate the remote service address (host:port). Host can be DNS name. Use `dns:///` prefix to enable round-robin between IP address for DNS name. + +## services +The `services` section contains configuration keyed by service role type. There are four supported service roles: + +- `frontend` +- `matching` +- `worker` +- `history` + +Below is a minimal example of a `frontend` service definition under `services`: +```yaml +services: + frontend: + rpc: + grpcPort: 8233 + membershipPort: 8933 + bindOnLocalHost: true + metrics: + statsd: + hostPort: "127.0.0.1:8125" + prefix: "temporal_standby" + +``` + +There are two sections defined under each service heading: + +### rpc - *required* +`rpc` contains settings related to the way a service interacts with other services. The following values are supported: + +- `grpcPort` is the port on which gRPC will listen. +- `membershipPort` - used by the membership listener. +- `bindOnLocalHost` - uses `localhost` as the listener address. +- `bindOnIP` - used to bind service on specific ip (eg. `0.0.0.0`) - check `net.ParseIP` for supported syntax, only IPv4 is supported, mutually exclusive with `BindOnLocalHost` option. +- `disableLogging` - disables all logging for rpc. +- `logLevel` - the desired log level. + +**Note**: Port values are currently expected to be consistent among role types across all hosts. + +### metrics +`metrics` contains configuration for the metrics subsystem keyed by provider name. There are three supported providers: + +- `statsd` +- `prometheus` +- `m3` + +The `statsd` sections supports the following settings: + +- `hostPort` - the statsd server host:port. +- `prefix` - specific prefix in reporting to `statsd`. +- `flushInterval` - maximum interval for sending packets. (*Default* 1 second). +- `flushBytes` - specifies the maximum UDP packet size you wish to send. (*Default* 1432 bytes). + +Additionally, metrics supports the following non-provider specific settings: + +- `tags` - the set of key-value pairs to be reported. +- `prefix` - sets the prefix to all outgoing metrics. + +## kafka +The `kafka` section describes the configuration needed to connect to all Kafka clusters and supports the following values: + +- `tls` - uses the TLS structure as under the `persistence` section. +- `clusters` - map of named `ClusterConfig` definitions, which describe the configuration for each Kafka cluster. A `ClusterConfig` definition contains a list of brokers using the configuration value `brokers`. +- `topics` - map of topics to Kafka clusters. +- `temporal-cluster-topics`- map of named `TopicList` definitions. +- `applications` - map of named `TopicList` definitions. + +A `TopicList` definition describes the topic names for each cluster and contains the following required values: +- `topic` +- `retryTopic` +- `dlqTopic` + +Below is a sample `kafka` section: + +```yaml +kafka: + tls: + enabled: false + certFile: "" + keyFile: "" + caFile: "" + clusters: + test: + brokers: + - 127.0.0.1:9092 + topics: + active: + cluster: test + active-dlq: + cluster: test + standby: + cluster: test + standby-dlq: + cluster: test + other: + cluster: test + other-dlq: + cluster: test + temporal-cluster-topics: + active: + topic: active + dlq-topic: active-dlq + standby: + topic: standby + dlq-topic: standby-dlq + other: + topic: other + dlq-topic: other-dlq +``` + +## publicClient +`publicClient` is a required section that contains a single value `hostPort` that is used to specify IPv4 address or DNS name and port to reach a frontend client. + +Example: +```yaml +publicClient: + hostPort: "localhost:8933" +``` + +Use `dns:///` prefix to enable round-robin between IP address for DNS name. diff --git a/versioned_docs/version-0.25.0/learn-task-lists.md b/versioned_docs/version-0.25.0/learn-task-lists.md new file mode 100644 index 0000000000..b87690e7f2 --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-task-lists.md @@ -0,0 +1,30 @@ +--- +id: learn-task-lists +title: Task Lists +--- + +When a workflow invokes an activity, it sends the ```ScheduleActivityTask``` [decision](/docs/learn-glossary#decision) to the +Temporal service. As a result, the service updates the workflow state and dispatches +an [activity task](/docs/learn-glossary#activity-task) to a worker that implements the activity. +Instead of calling the worker directly, an intermediate queue is used. So the service adds an _activity task_ to this +queue and a worker receives the task using a long poll request. +Temporal calls this queue used to dispatch activity tasks an *activity task list*. + +Similarly, when a workflow needs to handle an external event, a decision task is created. +*A Decision task list* is used to deliver it to the workflow worker (also called _decider_). + +While Temporal task lists are queues, they have some differences from commonly used queuing technologies. +The main one is that they do not require explicit registration and are created on demand. The number of task lists +is not limited. A common use case is to have a task list per worker process and use it to deliver activity tasks +to the process. Another use case is to have a task list per pool of workers. + +There are multiple advantages of using a task list to deliver tasks instead of invoking an activity +worker through a synchronous RPC: + +* Worker doesn't need to have any open ports, which is more secure. +* Worker doesn't need to advertise itself through DNS or any other network discovery mechanism. +* When all workers are down, messages are persisted in a task list waiting for the workers to recover. +* A worker polls for a message only when it has spare capacity, so it never gets overloaded. +* Automatic load balancing across a large number of workers. +* Task lists support server side throttling. This allows you to limit the task dispatch rate to the pool of workers and still supports adding a task with a higher rate when spikes happen. +* Task lists can be used to route a request to specific pools of workers or even a specific process. diff --git a/versioned_docs/version-0.25.0/learn-topology.md b/versioned_docs/version-0.25.0/learn-topology.md new file mode 100644 index 0000000000..ae30a09cc9 --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-topology.md @@ -0,0 +1,54 @@ +--- +id: learn-topology +title: Deployment Topology +--- + +## Overview + +Temporal is a highly scalable fault-oblivious stateful code platform. The fault-oblivious code is a next level of abstraction over commonly used techniques to achieve fault tolerance and durability. + +A common Temporal-based application consists of a Temporal service, workflow and activity workers, and external clients. +Note that both types of workers as well as external clients are roles and can be collocated in a single application process if necessary. + +## Temporal Service + +![Temporal Overview](/img/docs/learn-topology-overview.png) + +At the core of Temporal is a highly scalable multitentant service. The service exposes all its functionality through a strongly typed [Proto API](https://github.com/temporalio/temporal-proto/blob/master/workflowservice/service.proto). + +Internally it depends on a persistent store. Currently, Apache Cassandra and MySQL stores are supported out of the box. For listing workflows using complex predicates, Elasticsearch cluster can be used. + +Temporal service is responsible for keeping workflow state and associated durable timers. It maintains internal queues (called task lists) which are used to dispatch tasks to external workers. + +Temporal service is multitentant. Therefore it is expected that multiple pools of workers implementing different use cases connect to the same service instance. For example, at Uber a single service is used by more than a hundred applications. At the same time some external customers deploy an instance of Temporal service per application. For local development, a local Temporal service instance configured through docker-compose is used. + + +![Temporal Overview](/img/docs/temporal-overview.svg) + +## Workflow Worker + +Temporal reuses terminology from _workflow automation_ namespace. So fault-oblivious stateful code is called workflow. + +The Temporal service does not execute workflow code directly. The workflow code is hosted by an external (from the service point of view) _workflow worker_ process. These processes receive _decision tasks_ that contain events that the workflow is expected to handle from the Temporal service, delivers them to the workflow code, and communicates workflow _decisions_ back to the service. + +As workflow code is external to the service, it can be implemented in any language that can talk service Thrift API. Currently Java and Go clients are production ready. While Python and C# clients are under development. Let us know if you are interested in contributing a client in your preferred language. + +The Temporal service API doesn't impose any specific workflow definition language. So a specific worker can be implemented to execute practically any existing workflow specification. The model the Temporal team chose to support out of the box is based on the idea of durable function. Durable functions are as close as possible to application business logic with minimal plumbing required. + +## Activity Worker + +Workflow fault-oblivious code is immune to infrastructure failures. But it has to communicate with the imperfect external world where failures are common. All communication to the external world is done through activities. Activities are pieces of code that can perform any application-specific action like calling a service, updating a database record, or downloading a file from Amazon S3. Temporal activities are very feature-rich compared to queuing systems. Example features are task routing to specific processes, infinite retries, heartbeats, and unlimited execution time. + +Activities are hosted by _activity worker_ processes that receive _activity tasks_ from the Temporal service, invoke correspondent activity implementations and report back task completion statuses. + +## External Clients + +Workflow and activity workers host workflow and activity code. But to create a workflow instance (an execution in Temporal terminology) the `StartWorkflowExecution` Temporal service API call should be used. Usually, workflows are started by outside entities like UIs, microservices or CLIs. + +These entities can also: + +- notify workflows about asynchronous external events in the form of signals +- synchronously query workflow state +- synchronously wait for a workflow completion +- cancel, terminate, restart, and reset workflows +- search for specific workflows using list API diff --git a/versioned_docs/version-0.25.0/learn-workflow-filtering.md b/versioned_docs/version-0.25.0/learn-workflow-filtering.md new file mode 100644 index 0000000000..619134b55c --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-workflow-filtering.md @@ -0,0 +1,291 @@ +--- +id: learn-workflow-filtering +title: Filtering Workflows +--- + +Temporal supports creating workflows with customized key-value pairs, updating the information within the workflow code, and then listing/searching workflows with a SQL-like query. For example, you can create workflows with keys `city` and `age`, then search all workflows with `city = seattle and age > 22`. + +Also note that normal workflow properties like start time and workflow type can be queried as well. For example, the following query could be specified when [listing workflows from the CLI](/docs/learn-cli#list-closed-or-open-workflow-executions) or using the list APIs ([Go](https://pkg.go.dev/go.temporal.io/temporal/client#Client), [Java](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/WorkflowService.Iface.html#ListWorkflowExecutions-com.uber.cadence.ListWorkflowExecutionsRequest-)): + +```sql +WorkflowType = "main.Workflow" and Status != 0 and (StartTime > "2019-06-07T16:46:34-08:00" or CloseTime > "2019-06-07T16:46:34-08:00" order by StartTime desc) +``` + +## Memo vs Search Attributes + +Temporal offers two methods for creating workflows with key-value pairs: memo and search attributes. Memo can only be provided on workflow start. Also, memo data are not indexed, and are therefore not searchable. Memo data are visible when listing workflows using the list APIs. Search attributes data are indexed so you can search workflows by querying on these attributes. However, search attributes require the use of Elasticsearch. + +Memo and search attributes are available in the Go client in [StartWorkflowOptions](https://pkg.go.dev/go.temporal.io/temporal/internal#StartWorkflowOptions). + +```go +type StartWorkflowOptions struct { + // ... + + // Memo - Optional non-indexed info that will be shown in list workflow. + Memo map[string]interface{} + + // SearchAttributes - Optional indexed info that can be used in query of List/Scan/Count workflow APIs (only + // supported when Temporal server is using Elasticsearch). The key and value type must be registered on Temporal server side. + // Use GetSearchAttributes API to get valid key and corresponding value type. + SearchAttributes map[string]interface{} +} +``` + +In the Java client, the _WorkflowOptions.Builder_ has similar methods for [memo](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/client/WorkflowOptions.Builder.html#setMemo-java.util.Map-) and [search attributes](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/client/WorkflowOptions.Builder.html#setSearchAttributes-java.util.Map-). + +Some important distinctions between memo and search attributes: + +- Memo can support all data types because it is not indexed. Search attributes only support basic data types (including String, Int, Float, Bool, Datetime) because it is indexed by Elasticsearch. +- Memo does not restrict on key names. Search attributes require that keys are allowlisted before using them because Elasticsearch has a limit on indexed keys. +- Memo doesn't require Temporal clusters to depend on Elasticsearch while search attributes only works with Elasticsearch. + +## Search Attributes (Go Client Usage) + +When using the Temporal Go client, provide key-value pairs as SearchAttributes in [StartWorkflowOptions](https://pkg.go.dev/go.temporal.io/temporal/internal#StartWorkflowOptions). + +SearchAttributes is `map[string]interface{}` where the keys need to be allowlisted so that Temporal knows the attribute key name and value type. The value provided in the map must be the same type as registered. + +### Allow Listing Search Attributes + +Start by querying the list of search attributes using the CLI: + +```bash +$ tctl --namespace samples-namespace cl get-search-attr ++---------------------+------------+ +| KEY | VALUE TYPE | ++---------------------+------------+ +| Status | INT | +| CloseTime | INT | +| CustomBoolField | DOUBLE | +| CustomDatetimeField | DATETIME | +| CustomNamespace | KEYWORD | +| CustomDoubleField | BOOL | +| CustomIntField | INT | +| CustomKeywordField | KEYWORD | +| CustomStringField | STRING | +| NamespaceId | KEYWORD | +| ExecutionTime | INT | +| HistoryLength | INT | +| RunId | KEYWORD | +| StartTime | INT | +| WorkflowId | KEYWORD | +| WorkflowType | KEYWORD | ++---------------------+------------+ +``` + +Use the admin CLI to add a new search attribute: + +```bash +tctl --namespace samples-namespace adm cl asa --search_attr_key NewKey --search_attr_type 1 +``` + +The numbers for the attribute types map as follows: + +- 0 = String +- 1 = Keyword +- 2 = Int +- 3 = Double +- 4 = Bool +- 5 = DateTime + +#### Keyword vs String + +Note that **Keyword** and **String** are concepts taken from Elasticsearch. Each word in a **String** is considered a searchable keyword. For a UUID, that can be problematic as Elasticsearch will index each portion of the UUID separately. To have the whole string considered as a searchable keyword, use the **Keyword** type. + +For example, key RunId with value "2dd29ab7-2dd8-4668-83e0-89cae261cfb1" + +- as a **Keyword** will only be matched by RunId = "2dd29ab7-2dd8-4668-83e0-89cae261cfb1" (or in the future with [regular expressions](https://github.com/uber/cadence/issues/1137)) +- as a **String** will be matched by RunId = "2dd8", which may cause unwanted matches + +**Note:** String type can not be used in Order By query. + +There are some pre-allowlisted search attributes that are handy for testing: + +- CustomKeywordField +- CustomIntField +- CustomDoubleField +- CustomBoolField +- CustomDatetimeField +- CustomStringField + +Their types are indicated in their names. + +### Value Types + +Here are the Search Attribute value types and their correspondent Golang types: + +- Keyword = string +- Int = int64 +- Double = float64 +- Bool = bool +- Datetime = time.Time +- String = string + +### Limit + +We recommend limiting the number of Elasticsearch indexes by enforcing limits on the following: + +- Number of keys: 100 per workflow +- Size of value: 2kb per value +- Total size of key and values: 40kb per workflow + +Temporal reserves keys like NamespaceId, WorkflowId, and RunId. These can only be used in list queries. The values are not updatable. + +### Upsert Search Attributes in Workflow + +[UpsertSearchAttributes](https://pkg.go.dev/go.temporal.io/temporal/workflow#UpsertSearchAttributes) is used to add or update search attributes from within the workflow code. + +Go samples for search attributes can be found at [github.com/temporalio/temporal-go-samples](https://github.com/temporalio/temporal-go-samples/tree/master/searchattributes). + +UpsertSearchAttributes will merge attributes to the existing map in the workflow. Consider this example workflow code: + +```go +func MyWorkflow(ctx workflow.Context, input string) error { + + attr1 := map[string]interface{}{ + "CustomIntField": 1, + "CustomBoolField": true, + } + workflow.UpsertSearchAttributes(ctx, attr1) + + attr2 := map[string]interface{}{ + "CustomIntField": 2, + "CustomKeywordField": "seattle", + } + workflow.UpsertSearchAttributes(ctx, attr2) +} +``` + +After the second call to UpsertSearchAttributes, the map will contain: + +```go +map[string]interface{}{ + "CustomIntField": 2, + "CustomBoolField": true, + "CustomKeywordField": "seattle", +} +``` + +There is no support for removing a field. To achieve a similar effect, set the field to a sentinel value. For example, to remove “CustomKeywordField”, update it to “impossibleVal”. Then searching `CustomKeywordField != ‘impossibleVal’` will match workflows with CustomKeywordField not equal to "impossibleVal", which **includes** workflows without the CustomKeywordField set. + +Use `workflow.GetInfo` to get current search attributes. + +### ContinueAsNew and Cron + +When performing a [ContinueAsNew](/docs/go-continue-as-new) or using [Cron](/docs/go-distributed-cron), search attributes (and memo) will be carried over to the new run by default. + +## Query Capabilities + +Query workflows by using a SQL-like where clause when [listing workflows from the CLI](/docs/learn-cli#list-closed-or-open-workflow-executions) or using the list APIs ([Go](https://pkg.go.dev/go.temporal.io/temporal/client#Client), [Java](https://static.javadoc.io/com.uber.cadence/cadence-client/2.6.0/com/uber/cadence/WorkflowService.Iface.html#ListWorkflowExecutions-com.uber.cadence.ListWorkflowExecutionsRequest-)). + +Note that you will only see workflows from one namespace when querying. + +### Supported Operators + +- AND, OR, () +- =, !=, >, >=, <, <= +- IN +- BETWEEN ... AND +- ORDER BY + +### Default Attributes + +These can be found by using the CLI get-search-attr command or the GetSearchAttributes API. The names and types are as follows: + +| KEY | VALUE TYPE | +| ------------------- | ---------- | +| Status | INT | +| CloseTime | INT | +| CustomBoolField | DOUBLE | +| CustomDatetimeField | DATETIME | +| CustomNamespace | KEYWORD | +| CustomDoubleField | BOOL | +| CustomIntField | INT | +| CustomKeywordField | KEYWORD | +| CustomStringField | STRING | +| NamespaceId | KEYWORD | +| ExecutionTime | INT | +| HistoryLength | INT | +| RunId | KEYWORD | +| StartTime | INT | +| WorkflowId | KEYWORD | +| WorkflowType | KEYWORD | + +There are some special considerations for these attributes: + +- Status, CloseTime, NamespaceId, ExecutionTime, HistoryLength, RunId, StartTime, WorkflowId, WorkflowType are reserved by Temporal and are read-only +- Status is a mapping of int to state: + - 0 = unknown + - 1 = running + - 2 = completed + - 3 = failed + - 4 = canceled + - 5 = terminated + - 6 = continuedasnew + - 7 = timedout +- StartTime, CloseTime and ExecutionTime are stored as INT, but support queries using both EpochTime in nanoseconds, and string in RFC3339 format (ex. "2006-01-02T15:04:05+07:00") +- CloseTime, HistoryLength are only present in closed workflow +- ExecutionTime is for Retry/Cron user to query a workflow that will run in the future + +To list only open workflows, add `CloseTime = missing` to the end of the query. + +If you use retry or the cron feature to query workflows that will start execution in a certain time range, you can add predicates on ExecutionTime. For example: `ExecutionTime > 2019-01-01T10:00:00-07:00`. Note that if predicates on ExecutionTime are included, only cron or a workflow that needs to retry will be returned. + +### General Notes About Queries + +- Pagesize default is 1000, and cannot be larger than 10k +- Range query on Temporal timestamp (StartTime, CloseTime, ExecutionTime) cannot be larger than 9223372036854775807 (maxInt64 - 1001) +- Query by time range will have 1ms resolution +- Query column names are case sensitive +- ListWorkflow may take longer when retrieving a large number of workflows (10M+) +- To retrieve a large number of workflows without caring about order, use the ScanWorkflow API +- To efficiently count the number of workflows, use the CountWorkflow API + +## Tools Support + +### CLI + +Support for search attributes is available as of version 0.6.0 of the Temporal server. You can also use the CLI from the latest [CLI Docker image](https://hub.docker.com/r/temporalio/tctl) (supported on 0.6.4 or later). + +#### Start Workflow with Search Attributes + +```bash +tctl --ns samples-namespace workflow start --tl helloWorldGroup --wt main.Workflow --et 60 --dt 10 -i '"vancexu"' -search_attr_key 'CustomIntField | CustomKeywordField | CustomStringField | CustomBoolField | CustomDatetimeField' -search_attr_value '5 | keyword1 | vancexu test | true | 2019-06-07T16:16:36-08:00' +``` + +#### Search Workflows with List API + +```bash +tctl --ns samples-namespace wf list -q '(CustomKeywordField = "keyword1" and CustomIntField >= 5) or CustomKeywordField = "keyword2"' -psa +``` + +```bash +tctl --ns samples-namespace wf list -q 'CustomKeywordField in ("keyword2", "keyword1") and CustomIntField >= 5 and CloseTime between "2018-06-07T16:16:36-08:00" and "2019-06-07T16:46:34-08:00" order by CustomDatetimeField desc' -psa +``` + +To list only open workflows, add `CloseTime = missing` to the end of the query. + +Note that queries can support more than one type of filter: + +```bash +tctl --ns samples-namespace wf list -q 'WorkflowType = "main.Workflow" and (WorkflowId = "1645a588-4772-4dab-b276-5f9db108b3a8" or RunId = "be66519b-5f09-40cd-b2e8-20e4106244dc")' +``` + +```bash +tctl --ns samples-namespace wf list -q 'WorkflowType = "main.Workflow" StartTime > "2019-06-07T16:46:34-08:00" and CloseTime = missing' +``` + +### Web UI Support + +Queries are supported in [Temporal Web](https://github.com/temporalio/temporal-web) as of release 0.20.0. Use the "Basic/Advanced" button to switch to "Advanced" mode and type the query in the search box. + +## Local Testing + +1. Increase Docker memory to higher than 6GB. Navigate to Docker -> Preferences -> Advanced -> Memory +2. Get the Temporal Docker compose file. Run `curl -L https://github.com/temporalio/temporal/releases/download/v0.25.0/docker.tar.gz | tar -xz --strip-components 1 docker/docker-compose-es.yml` +3. Start Temporal Docker (which contains Apache Kafka, Apache Zookeeper, and Elasticsearch) using `docker-compose -f docker-compose-es.yml up` +4. From the Docker output log, make sure Elasticsearch and Temporal started correctly. If you encounter an insufficient disk space error, try `docker system prune -a --volumes` +5. Register a local namespace and start using it. `tctl --ns samples-namespace n re` +6. Allow list search attributes. `tctl --ns namespace adm cl asa --search_attr_key NewKey --search_attr_type 1` + +Note: starting a workflow with search attributes but without Elasticsearch will succeed as normal, but will not be searchable and will not be shown in list results. diff --git a/versioned_docs/version-0.25.0/learn-workflows.md b/versioned_docs/version-0.25.0/learn-workflows.md new file mode 100644 index 0000000000..b81d45eb5d --- /dev/null +++ b/versioned_docs/version-0.25.0/learn-workflows.md @@ -0,0 +1,129 @@ +--- +id: learn-workflows +title: Fault-Oblivious Stateful Workflow Code +sidebar_label: Workflows +description: Temporal core abstraction is a fault-oblivious stateful workflow. The state of the workflow code, including local variables and threads it creates, is immune to process and Temporal service failures. +--- + +import ReactPlayer from 'react-player' + +## Overview + +Temporal core abstraction is a **fault-oblivious stateful workflow**. The state of the workflow code, including local variables and threads it creates, is immune to process and Temporal service failures. +This is a very powerful concept as it encapsulates state, processing threads, durable timers and event handlers. + +## Example + +Let's look at a use case. A customer signs up for an application with a trial period. After the period, if the customer has not cancelled, he should be charged once a month for the renewal. The customer has to be notified by email about the charges and should be able to cancel the subscription at any time. + +The business logic of this use case is not very complicated and can be expressed in a few dozen lines of code. But any practical implementation has to ensure that the business process is fault tolerant and scalable. There are various ways to approach the design of such a system. + +One approach is to center it around a database. An application process would periodically scan database tables for customers in specific states, execute necessary actions, and update the state to reflect that. While feasible, this approach has various drawbacks. The most obvious is that the state machine of the customer state quickly becomes extremely complicated. For example, charging a credit card or sending emails can fail due to a downstream system unavailability. The failed calls might need to be retried for a long time, ideally using an exponential retry policy. These calls should be throttled to not overload external systems. There should be support for poison pills to avoid blocking the whole process if a single customer record cannot be processed for whatever reason. The database-based approach also usually has performance problems. Databases are not efficient for scenarios that require constant polling for records in a specific state. + +Another commonly employed approach is to use a timer service and queues. Any update is pushed to a queue and then a worker that consumes from it updates a database and possibly pushes more messages in downstream queues. For operations that require scheduling, an external timer service can be used. This approach usually scales much better because a database is not constantly polled for changes. But it makes the programming model more complex and error prone as usually there is no transactional update between a queuing system and a database. + +With Temporal, the entire logic can be encapsulated in a simple durable function that directly implements the business logic. Because the function is stateful, the implementer doesn't need to employ any additional systems to ensure durability and fault tolerance. + +Here is an example workflow that implements the subscription management use case. It is in Java, but Go is also supported. The Python and .NET libraries are under active development. + +```java +public interface SubscriptionWorkflow { + @WorkflowMethod + void execute(String customerId); +} + +public class SubscriptionWorkflowImpl implements SubscriptionWorkflow { + + private final SubscriptionActivities activities = + Workflow.newActivityStub(SubscriptionActivities.class); + + @Override + public void execute(String customerId) { + activities.sendWelcomeEmail(customerId); + try { + boolean trialPeriod = true; + while (true) { + Workflow.sleep(Duration.ofDays(30)); + activities.chargeMonthlyFee(customerId); + if (trialPeriod) { + activities.sendEndOfTrialEmail(customerId); + trialPeriod = false; + } else { + activities.sendMonthlyChargeEmail(customerId); + } + } + } catch (CancellationException e) { + activities.processSubscriptionCancellation(customerId); + activities.sendSorryToSeeYouGoEmail(customerId); + } + } +} +``` + +Again, note that this code directly implements the business logic. If any of the invoked operations (aka activities) takes a long time, the code is not going to change. It is okay to block on `chargeMonthlyFee` for a day if the downstream processing service is down that long. The same way that blocking sleep for 30 days is a normal operation inside the workflow code. + +Temporal has practically no scalability limits on the number of open workflow instances. So even if your site has hundreds of millions of consumers, the above code is not going to change. + +The commonly asked question by developers that learn Temporal is "How do I handle workflow worker process failure/restart in my workflow"? The answer is that you do not. **The workflow code is completely oblivious to any failures and downtime of workers or even the Temporal service itself**. As soon as they are recovered and the workflow needs to handle some event, like timer or an activity completion, the current state of the workflow is fully restored and the execution is continued. The only reason for a workflow failure is the workflow business code throwing an exception, not underlying infrastructure outages. + +Another commonly asked question is whether a worker can handle more workflow instances than its cache size or number of threads it can support. The answer is that a workflow, when in a blocked state, can be safely removed from a worker. +Later it can be resurrected on a different or the same worker when the need (in the form of an external event) arises. So a single worker can handle millions of open workflow executions, assuming it can handle the update rate. + +## State Recovery and Determinism + +The workflow state recovery utilizes event sourcing which puts a few restrictions on how the code is written. The main restriction is that the workflow code must be deterministic which means that it must produce exactly the same result if executed multiple times. This rules out any external API calls from the workflow code as external calls can fail intermittently or change its output any time. That is why all communication with the external world should happen through activities. For the same reason, workflow code must use Temporal APIs to get current time, sleep, and create new threads. + +To understand the Temporal execution model as well as the recovery mechanism, watch the following webcast. The animation covering recovery starts at 15:50. + + + +## Id Uniqueness + +Workflow Id is assigned by a client when starting a workflow. It is usually a business level Id like customer Id or order Id. + +Temporal guarantees that there could be only one workflow (across all workflow types) with a given Id open per [namespace](/docs/learn-glossary#namespace) at any time. An attempt to start a workflow with the same Id is going to fail with `WorkflowExecutionAlreadyStarted` error. + +An attempt to start a workflow if there is a completed workflow with the same Id depends on a `WorkflowIdReusePolicy` option: + +- `AllowDuplicateFailedOnly` means that it is allowed to start a workflow only if a previously executed workflow with the same Id failed. +- `AllowDuplicate` means that it is allowed to start independently of the previous workflow completion status. +- `RejectDuplicate` means that it is not allowed to start a workflow execution using the same workflow Id at all. + +The default is `AllowDuplicateFailedOnly`. + +To distinguish multiple runs of a workflow with the same workflow Id, Temporal identifies a workflow with two Ids: `Workflow Id` and `Run Id`. `Run Id` is a service-assigned UUID. To be precise, any workflow is uniquely identified by a triple: `Namespace`, `Workflow Id` and `Run Id`. + +## Child Workflow + +A workflow can execute other workflows as `child workflows`. A child workflow completion or failure is reported to its parent. + +Some reasons to use child workflows are: + +- A child workflow can be hosted by a separate set of workers which don't contain the parent workflow code. So it would act as a separate service that can be invoked from multiple other workflows. +- A single workflow has a limited size. For example, it cannot execute 100k activities. Child workflows can be used to partition the problem into smaller chunks. One parent with 1000 children each executing 1000 activities is 1 million executed activities. +- A child workflow can be used to manage some resource using its Id to guarantee uniqueness. For example, a workflow that manages host upgrades can have a child workflow per host (host name being a workflow Id) and use them to ensure that all operations on the host are serialized. +- A child workflow can be used to execute some periodic logic without blowing up the parent history size. When a parent starts a child, it executes periodic logic calling that continues as many times as needed, then completes. From the parent point if view, it is just a single child workflow invocation. + +The main limitation of a child workflow versus collocating all the application logic in a single workflow is lack of the shared state. Parent and child can communicate only through asynchronous signals. But if there is a tight coupling between them, it might be simpler to use a single workflow and just rely on a shared object state. + +We recommended starting from a single workflow implementation if your problem has bounded size in terms of number of executed activities and processed signals. It is more straightforward than multiple asynchronously communicating workflows. + +## Workflow Timeouts + +It's often necessary to limit the amount of time a specific workflow can be running. To support this, the following three parameters can be provided to workflow options: + +- `WorkflowExecutionTimeout` maximum time a workflow should be allowed to run including retries and continue as new. Use `WorkflowRunTimeout` to limit execution time of a single run. +- `WorkflowRunTimeout` maximum time a single workflow run should be allowed. +- `WorkflowTaskTimeout` timeout for processing a workflow task starting from the point when a worker pulled the task. If a decision task is lost, it is retried after this timeout. + +## Workflow Retries + +Workflow code is unaffected by infrastructure level downtime and failures. But it still can fail due to business logic level failures. For example, an activity can fail due to exceeding the retry interval and the error is not handled by application code, or the workflow code having a bug. + +Some workflows require a guarantee that they keep running even in presence of such failures. To support such use cases, an optional exponential _retry policy_ can be specified when starting a workflow. When it is specified, a workflow failure restarts a workflow from the beginning after the calculated retry interval. Following are the retry policy parameters: + +- `InitialInterval` is a delay before the first retry. +- `BackoffCoefficient`. Retry policies are exponential. The coefficient specifies how fast the retry interval is growing. The coefficient of 1 means that the retry interval is always equal to the `InitialInterval`. +- `MaximumInterval` specifies the maximum interval between retries. Useful for coefficients of more than 1. +- `MaximumAttempts` specifies how many times to attempt to execute a workflow in the presence of failures. If this limit is exceeded, the workflow fails without retry. +- `NonRetryableErrorReasons` allows to specify errors that shouldn't be retried. For example, retrying invalid arguments error doesn't make sense in some scenarios. diff --git a/versioned_docs/version-0.25.0/license.md b/versioned_docs/version-0.25.0/license.md new file mode 100644 index 0000000000..78eda78851 --- /dev/null +++ b/versioned_docs/version-0.25.0/license.md @@ -0,0 +1,28 @@ +--- +id: license +title: MIT License +--- + +``` +Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. + +Copyright (c) 2017 Uber Technologies, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +``` \ No newline at end of file diff --git a/versioned_docs/version-0.25.0/mdx.md b/versioned_docs/version-0.25.0/mdx.md new file mode 100644 index 0000000000..1462851227 --- /dev/null +++ b/versioned_docs/version-0.25.0/mdx.md @@ -0,0 +1,17 @@ +--- +id: mdx +title: Powered by MDX +--- + +You can write JSX and use React components within your Markdown thanks to [MDX](https://mdxjs.com/). + +export const Highlight = ({children, color}) => ( {children} ); + +Docusaurus green and Facebook blue are my favorite colors. + +I can write **Markdown** alongside my _JSX_! diff --git a/versioned_docs/version-0.25.0/overview.md b/versioned_docs/version-0.25.0/overview.md new file mode 100644 index 0000000000..0d732a4da5 --- /dev/null +++ b/versioned_docs/version-0.25.0/overview.md @@ -0,0 +1,44 @@ +--- +id: overview +title: Overview +sidebar_label: Overview +description: This guide will help you build your own resilient applications using Temporal Workflow as Code™ +--- + +import ReactPlayer from 'react-player' + +A large number of use cases span beyond a single request-reply, require tracking +of a complex state, respond to asynchronous events, and communicate to external unreliable dependencies. +The usual approach to building such applications is a hodgepodge of stateless services, +databases, cron jobs, and queuing systems. This negatively impacts the developer productivity as most of the code is +dedicated to plumbing, obscuring the actual business logic behind a myriad of low-level details. Such systems frequently have availability problems as it is hard to keep all the components healthy. + +The Temporal solution is a [_fault-oblivious stateful_ programming model](/docs/learn-workflows) that obscures most of the complexities of building scalable distributed applications. In essence, Temporal provides a durable virtual memory that is not +linked to a specific process, and preserves the full application state, including function stacks, with local variables across all sorts of host and software failures. +This allows you to write code using the full power of a programming language while Temporal takes care of durability, availability, and scalability of the application. + +Temporal consists of a programming framework (or client library) and a managed service (or backend). +The framework enables developers to author and coordinate tasks in familiar languages +([Go](https://github.com/temporalio/temporal-go-sdk/) and [Java](https://github.com/temporalio/temporal-java-sdk) +are supported today with some projects in [Python](https://github.com/firdaus/cadence-python) and +[C#](https://github.com/nforgeio/neonKUBE/tree/master/Lib/Neon.Cadence) +via a [proxy](https://github.com/nforgeio/neonKUBE/tree/master/Go/src/github.com/loopieio/cadence-proxy) +in development). + +The framework enables developers to author fault-oblivious code in familiar languages. +([Go](https://github.com/temporalio/temporal-go-sdk/) and [Java](https://github.com/temporalio/temporal-java-sdk) +are in production. [Python](https://github.com/firdaus/cadence-python) and +[C#](https://github.com/nforgeio/neonKUBE/tree/master/Lib/Neon.Cadence) are under development). + +The backend service is stateless and relies on a persistent store. Currently, Cassandra and MySQL stores +are supported. An adapter to any other database that provides multi-row single shard transactions +can be added. There are different service deployment models. At Uber, our team operates multitenant clusters +that are shared by hundreds of applications. + +Watch Maxim's talk from the Uber Open Summit for an introduction to the Temporal programming model and value proposition. + + + +The GitHub repo for the Temporal server is [temporalio/temporal](https://github.com/temporalio/temporal). The docker +image for the Temporal server is available on Docker Hub at +[temporalio/server](https://hub.docker.com/r/temporalio/server). diff --git a/versioned_docs/version-0.25.0/use-cases-batch-job.md b/versioned_docs/version-0.25.0/use-cases-batch-job.md new file mode 100644 index 0000000000..f21343f680 --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-batch-job.md @@ -0,0 +1,11 @@ +--- +id: use-cases-batch-job +title: Batch Job +sidebar_label: Batch Job +--- + +A lot of batch jobs are not pure data manipulation programs. For those, the existing big data frameworks are the best fit. +But if processing a record requires external API calls that might fail and potentially take a long time, Temporal might be preferable. + +One of our internal Uber customer uses Temporal for end of month statement generation. Each statement requires calls to multiple +microservices and some statements can be really large. Temporal was chosen because it provides hard guarantees around durability of the financial data and seamlessly deals with long running operations, retries, and intermittent failures. diff --git a/versioned_docs/version-0.25.0/use-cases-big-ml.md b/versioned_docs/version-0.25.0/use-cases-big-ml.md new file mode 100644 index 0000000000..061e254937 --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-big-ml.md @@ -0,0 +1,9 @@ +--- +id: use-cases-big-ml +title: Big Data and ML +sidebar_label: Big Data and ML +--- + +A lot of companies build custom ETL and ML training and deployment solutions. Temporal is a good fit for a control plane for such applications. + +One important feature of Temporal is its ability to route task execution to a specific process or host. It is useful to control how ML models and other large files are allocated to hosts. For example, if an ML model is partitioned by city, the requests should be routed to hosts that contain the corresponding city model. diff --git a/versioned_docs/version-0.25.0/use-cases-cicd.md b/versioned_docs/version-0.25.0/use-cases-cicd.md new file mode 100644 index 0000000000..140b68ef14 --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-cicd.md @@ -0,0 +1,15 @@ +--- +id: use-cases-cicd +title: CI/CD +sidebar_label: CI/CD +--- + +Implementing CI/CD pipelines and deployment of applications to containers or virtual or physical machines is a non-trivial process. +Its business logic has to deal with complex requirements around rolling upgrades, canary deployments, and rollbacks. +Temporal is a perfect platform for building a deployment solution because it provides all the necessary guarantees and abstractions +allowing developers to focus on the business logic. + +Example production systems: + +* Uber internal deployment infrastructure +* Update push to IoT devices diff --git a/versioned_docs/version-0.25.0/use-cases-dsl.md b/versioned_docs/version-0.25.0/use-cases-dsl.md new file mode 100644 index 0000000000..60c7af467b --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-dsl.md @@ -0,0 +1,15 @@ +--- +id: use-cases-dsl +title: DSL Workflows +sidebar_label: DSL Workflows +--- + +Temporal supports implementing business logic directly in programming languages like Java and Go. But there are cases when +using a namespace-specific language is more appropriate. Or there might be a legacy system that uses some form of DSL for process definition but it is not operationally stable and scalable. This also applies to more recent systems like Apache Airflow, various BPMN engines and AWS Step Functions. + +An application that interprets the DSL definition can be written using the Temporal SDK. It automatically becomes highly fault tolerant, scalable, and durable when running on Temporal. Cadence has been used to deprecate several Uber internal DSL engines. The customers continue to use existing process definitions, but Temporal is used as an execution engine. + +There are multiple benefits of unifying all company workflow engines on top of Temporal. The most obvious one is that +it is more efficient to support a single product instead of many. It is also difficult to beat the scalability and stability of +Temporal which each of the integrations it comes with. Additionally, the ability to share activities across "engines" +might be a huge benefit in some cases. diff --git a/versioned_docs/version-0.25.0/use-cases-event-driven.md b/versioned_docs/version-0.25.0/use-cases-event-driven.md new file mode 100644 index 0000000000..b7e2cc32d0 --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-event-driven.md @@ -0,0 +1,16 @@ +--- +id: use-cases-event-driven +title: Event Driven Application +sidebar_label: Event Driven Application +--- + +Many applications listen to multiple event sources, update the state of correspondent business entities, +and have to execute actions if some state is reached. +Temporal is a good fit for many of these. It has direct support for asynchronous events (aka signals), +has a simple programming model that obscures a lot of complexity +around state persistence, and ensures external action execution through built-in retries. + +Real-world examples: + +* Fraud detection where workflow reacts to events generated by consumer behavior +* Customer loyalty program where the workflow accumulates reward points and applies them when requested diff --git a/versioned_docs/version-0.25.0/use-cases-interactive.md b/versioned_docs/version-0.25.0/use-cases-interactive.md new file mode 100644 index 0000000000..9d9517551f --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-interactive.md @@ -0,0 +1,8 @@ +--- +id: use-cases-interactive +title: Interactive Application +sidebar_label: Interactive Application +--- + +Temporal is performant and scalable enough to support interactive applications. It can be used to track UI session state and +at the same time execute background operations. For example, while placing an order a customer might need to go through several screens while a background task evaluates the customer for fraudulent activity. diff --git a/versioned_docs/version-0.25.0/use-cases-operational-management.md b/versioned_docs/version-0.25.0/use-cases-operational-management.md new file mode 100644 index 0000000000..2e6fd820f2 --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-operational-management.md @@ -0,0 +1,10 @@ +--- +id: use-cases-operational-management +title: Operational Management +sidebar_label: Operational Management +--- + +Imagine that you have to create a self operating database similar to Amazon RDS. Temporal is used in multiple projects +that automate managing and automatic recovery of various products like MySQL, Elasticsearch and Apache Cassandra. + +Such systems are usually a mixture of different use cases. They need to monitor the status of resources using polling. They have to execute orchestration API calls to administrative interfaces of a database. They have to provision new hardware or Docker instances if necessary. They need to push configuration updates and perform other actions like backups periodically. diff --git a/versioned_docs/version-0.25.0/use-cases-orchestration.md b/versioned_docs/version-0.25.0/use-cases-orchestration.md new file mode 100644 index 0000000000..1122cec0cd --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-orchestration.md @@ -0,0 +1,18 @@ +--- +id: use-cases-orchestration +title: Microservice Orchestration and Saga +sidebar_label: Microservice Orchestration +--- + +It is common that some business processes are implemented as multiple microservice calls. +And the implementation must guarantee that all of the calls must eventually succeed even with the occurrence of prolonged downstream service failures. +In some cases, instead of trying to complete the process by retrying for a long time, compensation rollback logic should be executed. +[Saga Pattern](https://microservices.io/patterns/data/saga.html) is one way to standardize on compensation APIs. + +Temporal is a perfect fit for such scenarios. It guarantees that workflow code eventually completes, has built-in support +for unlimited exponential activity retries and simplifies coding of the compensation logic. It also gives full visibility into the state of each workflow, in contrast to an orchestration based on queues where getting a current status of each individual request is practically impossible. + +Following are some real-world examples of Temporal-based service orchestration scenarios: + + * [Using Temporal workflows to spin up Kubernetes by Banzai Cloud](https://banzaicloud.com/blog/introduction-to-cadence/) + * [Improving the User Experience with Uber’s Customer Obsession Ticket Routing Workflow and Orchestration Engine](https://eng.uber.com/customer-obsession-ticket-routing-workflow-and-orchestration-engine/) diff --git a/versioned_docs/version-0.25.0/use-cases-partitioned-scan.md b/versioned_docs/version-0.25.0/use-cases-partitioned-scan.md new file mode 100644 index 0000000000..2b6b417f26 --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-partitioned-scan.md @@ -0,0 +1,14 @@ +--- +id: use-cases-partitioned-scan +title: Storage Scan +sidebar_label: Storage Scan +--- + +It is common to have large data sets partitioned across a large number of hosts or databases, or having billions of files in an Amazon S3 bucket. +Temporal is an ideal solution for implementing the full scan of such data in a scalable and resilient way. The standard pattern +is to run an activity (or multiple parallel activities for partitioned data sets) that performs the scan and heartbeats its progress +back to Temporal. In the case of a host failure, the activity is retried on a different host and continues execution from the last reported progress. + +A real-world example: + +* Temporal internal system workflow that performs periodic scan of all workflow execution records diff --git a/versioned_docs/version-0.25.0/use-cases-periodic.md b/versioned_docs/version-0.25.0/use-cases-periodic.md new file mode 100644 index 0000000000..edf28ffecc --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-periodic.md @@ -0,0 +1,18 @@ +--- +id: use-cases-periodic +title: Periodic Execution (aka Distributed Cron) +sidebar_label: Periodic Execution +--- + +Periodic execution, frequently referred to as distributed cron, is when you execute business logic periodically. The advantage of Temporal for these scenarios is that it guarantees execution, sophisticated error handling, retry policies, and visibility into execution history. + +Another important dimension is scale. Some use cases require periodic execution for a large number of entities. +At Uber, there are applications that create periodic workflows per customer. +Imagine 100+ million parallel cron jobs that don't require a separate batch processing framework. + +Periodic execution is often part of other use cases. For example, once a month report generation is a periodic service orchestration. Or an event-driven workflow that accumulates loyalty points for a customer and applies those points once a month. + +There are many real-world examples of Temporal periodic executions. Such as the following: + + * An Uber backend service that recalculates various statistics for each [hex](https://eng.uber.com/h3/) in each city once a minute. + * Monthly Uber for Business report generation. diff --git a/versioned_docs/version-0.25.0/use-cases-polling.md b/versioned_docs/version-0.25.0/use-cases-polling.md new file mode 100644 index 0000000000..4ef145497c --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-polling.md @@ -0,0 +1,15 @@ +--- +id: use-cases-polling +title: Polling +sidebar_label: Polling +--- + +Polling is executing a periodic action checking for a state change. Examples are pinging a host, calling a REST API, or listing an Amazon S3 bucket for newly uploaded files. + +Temporal support for long running activities and unlimited retries makes it a good fit. + +Some real-world use cases: + +* Network, host and service monitoring +* Processing files uploaded to FTP or S3 +* Polling an external API for a specific resource to become available diff --git a/versioned_docs/version-0.25.0/use-cases-provisioning.md b/versioned_docs/version-0.25.0/use-cases-provisioning.md new file mode 100644 index 0000000000..0cbdd9c766 --- /dev/null +++ b/versioned_docs/version-0.25.0/use-cases-provisioning.md @@ -0,0 +1,16 @@ +--- +id: use-cases-provisioning +title: Infrastructure Provisioning +sidebar_label: Infrastructure Provisioning +--- + +Provisioning a new datacenter or a pool of machines in a public cloud is a potentially long running operation with +a lot of possibilities for intermittent failures. The scale is also a concern when tens or even hundreds of thousands of resources should be provisioned and configured. One useful feature for provisioning scenarios is Temporal support for routing activity execution to a specific process or host. + +A lot of operations require some sort of locking to ensure that no more than one mutation is executed on a resource at a time. +Temporal provides strong guarantees of uniqueness by business Id. This can be used to implement such locking behavior in a fault tolerant and scalable manner. + +Some real-world use cases: + + * [Using Temporal workflows to spin up Kubernetes, by Banzai Cloud](https://banzaicloud.com/blog/introduction-to-cadence/) + * [Using Temporal to orchestrate cluster life cycle in HashiCorp Consul, by HashiCorp](https://www.youtube.com/watch?v=kDlrM6sgk2k&feature=youtu.be&t=1188) diff --git a/versioned_sidebars/version-0.25.0-sidebars.json b/versioned_sidebars/version-0.25.0-sidebars.json new file mode 100644 index 0000000000..422ac91d17 --- /dev/null +++ b/versioned_sidebars/version-0.25.0-sidebars.json @@ -0,0 +1,250 @@ +{ + "version-0.25.0/docs": [ + { + "type": "category", + "label": "Getting Started", + "items": [ + { + "type": "doc", + "id": "version-0.25.0/overview" + }, + { + "type": "doc", + "id": "version-0.25.0/license" + }, + { + "type": "doc", + "id": "version-0.25.0/installing-server" + } + ] + }, + { + "type": "category", + "label": "Use Cases", + "items": [ + { + "type": "doc", + "id": "version-0.25.0/use-cases-periodic" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-orchestration" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-polling" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-event-driven" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-partitioned-scan" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-batch-job" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-provisioning" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-cicd" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-operational-management" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-interactive" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-dsl" + }, + { + "type": "doc", + "id": "version-0.25.0/use-cases-big-ml" + } + ] + }, + { + "type": "category", + "label": "Learn", + "items": [ + { + "type": "doc", + "id": "version-0.25.0/learn-glossary" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-workflows" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-activities" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-events" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-queries" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-topology" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-task-lists" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-server-configuration" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-archival" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-cross-dc-replication" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-workflow-filtering" + }, + { + "type": "doc", + "id": "version-0.25.0/learn-cli" + } + ] + }, + { + "type": "category", + "label": "Java SDK", + "items": [ + { + "type": "doc", + "id": "version-0.25.0/java-quick-start" + }, + { + "type": "doc", + "id": "version-0.25.0/java-workflow-interface" + }, + { + "type": "doc", + "id": "version-0.25.0/java-implementing-workflows" + }, + { + "type": "doc", + "id": "version-0.25.0/java-starting-workflow-executions" + }, + { + "type": "doc", + "id": "version-0.25.0/java-activity-interface" + }, + { + "type": "doc", + "id": "version-0.25.0/java-implementing-activities" + }, + { + "type": "doc", + "id": "version-0.25.0/java-versioning" + }, + { + "type": "doc", + "id": "version-0.25.0/java-distributed-cron" + } + ] + }, + { + "type": "category", + "label": "Go SDK", + "items": [ + { + "type": "doc", + "id": "version-0.25.0/go-quick-start" + }, + { + "type": "doc", + "id": "version-0.25.0/go-sdk-video-tutorial" + }, + { + "type": "doc", + "id": "version-0.25.0/go-workers" + }, + { + "type": "doc", + "id": "version-0.25.0/go-create-workflows" + }, + { + "type": "doc", + "id": "version-0.25.0/go-activities" + }, + { + "type": "doc", + "id": "version-0.25.0/go-execute-activity" + }, + { + "type": "doc", + "id": "version-0.25.0/go-child-workflows" + }, + { + "type": "doc", + "id": "version-0.25.0/go-retries" + }, + { + "type": "doc", + "id": "version-0.25.0/go-error-handling" + }, + { + "type": "doc", + "id": "version-0.25.0/go-signals" + }, + { + "type": "doc", + "id": "version-0.25.0/go-continue-as-new" + }, + { + "type": "doc", + "id": "version-0.25.0/go-side-effect" + }, + { + "type": "doc", + "id": "version-0.25.0/go-queries" + }, + { + "type": "doc", + "id": "version-0.25.0/go-activity-async-completion" + }, + { + "type": "doc", + "id": "version-0.25.0/go-workflow-testing" + }, + { + "type": "doc", + "id": "version-0.25.0/go-versioning" + }, + { + "type": "doc", + "id": "version-0.25.0/go-sessions" + }, + { + "type": "doc", + "id": "version-0.25.0/go-distributed-cron" + }, + { + "type": "doc", + "id": "version-0.25.0/go-tracing" + } + ] + } + ] +} diff --git a/versions.json b/versions.json index 98743075aa..7f4481ad06 100644 --- a/versions.json +++ b/versions.json @@ -1,3 +1,4 @@ [ + "0.25.0", "0.23.1" ]