From dcff41a519ac9bd0e71cb8ba6175ad28280673b1 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 17 Apr 2024 05:04:45 -0500 Subject: [PATCH] Update developer diagrams (#4922) Signed-off-by: Ben Sherman --- docs/developer/diagram.md | 10 +++ docs/developer/diagrams/merge-diagrams.sh | 2 +- docs/developer/diagrams/nextflow.cache.mmd | 1 - .../diagrams/nextflow.cloud.aws.nio.mmd | 2 + docs/developer/diagrams/nextflow.config.mmd | 5 +- docs/developer/diagrams/nextflow.executor.mmd | 40 ++++------- .../developer/diagrams/nextflow.extension.mmd | 2 + docs/developer/diagrams/nextflow.script.mmd | 34 ++++----- docs/developer/diagrams/nextflow.secret.mmd | 4 +- docs/developer/diagrams/nextflow.trace.mmd | 25 +++---- docs/developer/diagrams/overview.mmd | 69 +++++++++++++++++++ docs/developer/nextflow.executor.md | 4 +- docs/developer/nextflow.k8s.md | 2 +- docs/developer/nextflow.processor.md | 6 +- docs/developer/nextflow.script.md | 2 +- docs/developer/nextflow.trace.md | 2 +- docs/index.md | 1 + 17 files changed, 132 insertions(+), 79 deletions(-) create mode 100644 docs/developer/diagram.md create mode 100644 docs/developer/diagrams/overview.mmd diff --git a/docs/developer/diagram.md b/docs/developer/diagram.md new file mode 100644 index 0000000000..9b895d0f7b --- /dev/null +++ b/docs/developer/diagram.md @@ -0,0 +1,10 @@ +(diagram-page)= + +# Workflow Diagram + +The following diagram is a high-level overview of the Nextflow source code in a similar style as the {ref}`workflow diagram ` visualization for Nextflow pipelines. Each node and subgraph is a class. Arrows depict the flow of data and/or communication between classes. + +In general, nodes with sharp corners are "record" classes that simply hold information, while nodes with rounded edges are "function" classes that transform some input into an output. Subgraphs are either long-running classes, i.e. "places where things happen", or one of the other two types for which it was useful to expand and show internal details. + +```{mermaid} diagrams/overview.mmd +``` diff --git a/docs/developer/diagrams/merge-diagrams.sh b/docs/developer/diagrams/merge-diagrams.sh index a76321ca92..087422500a 100755 --- a/docs/developer/diagrams/merge-diagrams.sh +++ b/docs/developer/diagrams/merge-diagrams.sh @@ -9,7 +9,7 @@ packages+=("nextflow.cli") # packages+=("nextflow.cloud.aws.nio") # packages+=("nextflow.cloud.azure") # packages+=("nextflow.cloud.google") -# packages+=("nextflow.config") +packages+=("nextflow.config") # packages+=("nextflow.container") packages+=("nextflow.dag") # packages+=("nextflow.executor") diff --git a/docs/developer/diagrams/nextflow.cache.mmd b/docs/developer/diagrams/nextflow.cache.mmd index 105fd444f8..6b241d3ee0 100644 --- a/docs/developer/diagrams/nextflow.cache.mmd +++ b/docs/developer/diagrams/nextflow.cache.mmd @@ -3,7 +3,6 @@ classDiagram %% nextflow.cache %% Session --* CacheDB - %% CacheFactory --> CacheDB : createInstance CacheDB --* CacheStore diff --git a/docs/developer/diagrams/nextflow.cloud.aws.nio.mmd b/docs/developer/diagrams/nextflow.cloud.aws.nio.mmd index 523c5f971a..69242f88b2 100644 --- a/docs/developer/diagrams/nextflow.cloud.aws.nio.mmd +++ b/docs/developer/diagrams/nextflow.cloud.aws.nio.mmd @@ -2,6 +2,8 @@ classDiagram %% %% nextflow.cloud.aws.nio %% + FileSystemProvider <|-- S3FileSystemProvider + S3FileSystemProvider --> S3FileSystem : newFileSystem class S3FileSystem { diff --git a/docs/developer/diagrams/nextflow.config.mmd b/docs/developer/diagrams/nextflow.config.mmd index 611c19a413..6962ab5b1b 100644 --- a/docs/developer/diagrams/nextflow.config.mmd +++ b/docs/developer/diagrams/nextflow.config.mmd @@ -2,9 +2,6 @@ classDiagram %% %% nextflow.config %% - CmdRun --> ConfigMap : run Session --* ConfigMap - - ConfigBuilder --> ConfigParser : build + CmdRun --> ConfigBuilder : run ConfigBuilder --> ConfigMap : build - ConfigParser --> ConfigBase : parse diff --git a/docs/developer/diagrams/nextflow.executor.mmd b/docs/developer/diagrams/nextflow.executor.mmd index 4044bbced4..08c33527c2 100644 --- a/docs/developer/diagrams/nextflow.executor.mmd +++ b/docs/developer/diagrams/nextflow.executor.mmd @@ -2,19 +2,16 @@ classDiagram %% %% nextflow.executor %% - ProcessDef --> Executor : run - %% ExecutorFactory --> Executor : getExecutor + ExecutorFactory --> Executor : getExecutor - TaskProcessor --* Executor - - %% class Executor { - %% name : String - %% monitor : TaskMonitor - %% } - %% Executor --* TaskMonitor - %% Executor --> TaskHandler : submit + class Executor { + name : String + monitor : TaskMonitor + } + Executor --* TaskMonitor + Executor --> TaskHandler : submit - %% TaskMonitor <|-- TaskPollingMonitor + TaskMonitor <|-- TaskPollingMonitor class TaskPollingMonitor { capacity : int @@ -23,17 +20,13 @@ classDiagram dumpInterval : Duration } - %% TaskPollingMonitor <|-- LocalPollingMonitor + TaskPollingMonitor <|-- LocalPollingMonitor class LocalPollingMonitor { maxCpus : int maxMemory : long } - %% class TaskHandler { - %% task : TaskRun - %% } - Executor <|-- AbstractGridExecutor Executor <|-- LocalExecutor %% Executor <|-- NopeExecutor @@ -49,19 +42,10 @@ classDiagram %% PbsExecutor <|-- PbsProExecutor %% SgeExecutor <|-- CrgExecutor - LocalExecutor --> LocalPollingMonitor : init - LocalExecutor --> LocalTaskHandler : submit - LocalExecutor --> NativeTaskHandler : submit - LocalTaskHandler --> BashWrapperBuilder : submit - - AbstractGridExecutor --> TaskPollingMonitor : init - AbstractGridExecutor --> GridTaskHandler : submit - GridTaskHandler --> BashWrapperBuilder : submit - %% TaskHandler <|-- CachedTaskHandler - %% TaskHandler <|-- GridTaskHandler - %% TaskHandler <|-- LocalTaskHandler - %% TaskHandler <|-- NativeTaskHandler + TaskHandler <|-- GridTaskHandler + TaskHandler <|-- LocalTaskHandler + TaskHandler <|-- NativeTaskHandler %% TaskHandler <|-- NopeTaskHandler %% TaskHandler <|-- StoredTaskHandler diff --git a/docs/developer/diagrams/nextflow.extension.mmd b/docs/developer/diagrams/nextflow.extension.mmd index 4cb817c2e7..290246b883 100644 --- a/docs/developer/diagrams/nextflow.extension.mmd +++ b/docs/developer/diagrams/nextflow.extension.mmd @@ -26,3 +26,5 @@ classDiagram OperatorImpl --> ToListOp : toList, toSortedList OperatorImpl --> TransposeOp : transpose OperatorImpl --> UntilOp : until + + WorkflowBinding --> OpCall : invokeMethod diff --git a/docs/developer/diagrams/nextflow.script.mmd b/docs/developer/diagrams/nextflow.script.mmd index aa6869d0cd..4b22d49cf7 100644 --- a/docs/developer/diagrams/nextflow.script.mmd +++ b/docs/developer/diagrams/nextflow.script.mmd @@ -9,7 +9,6 @@ classDiagram session : Session } ScriptRunner --* ScriptFile - ScriptRunner --* Session ScriptRunner --> ScriptParser : execute ScriptParser --> BaseScript : parse @@ -22,8 +21,15 @@ classDiagram projectName : String } - Session --* BaseScript - Session --* ScriptBinding + class BaseScript { + meta : ScriptMeta + entryFlow : WorkflowDef + } + BaseScript --* ScriptBinding + BaseScript --* ScriptMeta + BaseScript --> IncludeDef : include + + IncludeDef --> ScriptParser : load0 class ScriptBinding { scriptPath : Path @@ -33,16 +39,6 @@ classDiagram entryName : String } - IncludeDef --> BaseScript : load0 - - class BaseScript { - meta : ScriptMeta - entryFlow : WorkflowDef - } - BaseScript --* ScriptMeta - %% BaseScript --> ProcessDef : process - %% BaseScript --> WorkflowDef : workflow - class ScriptMeta { scriptPath : Path definitions : Map @@ -68,11 +64,9 @@ classDiagram baseName : String rawBody : Closure~BodyDef~ } - ProcessDef --> ProcessConfig : run - ProcessDef --> BodyDef : run - ProcessDef --> Executor : run - ProcessDef --> TaskProcessor : run - ProcessDef --> ChannelOut : run + ProcessDef --* ProcessConfig + ProcessDef --* BodyDef + ProcessDef --* ChannelOut class WorkflowDef { name : String @@ -82,8 +76,8 @@ classDiagram variableNames : Set~String~ } WorkflowDef --* BodyDef - WorkflowDef --> WorkflowBinding : run - WorkflowDef --> ChannelOut : run + WorkflowDef --* WorkflowBinding + WorkflowDef --* ChannelOut class ProcessConfig { configProperties : Map diff --git a/docs/developer/diagrams/nextflow.secret.mmd b/docs/developer/diagrams/nextflow.secret.mmd index 547ade7b93..1ee67a0363 100644 --- a/docs/developer/diagrams/nextflow.secret.mmd +++ b/docs/developer/diagrams/nextflow.secret.mmd @@ -2,7 +2,9 @@ classDiagram %% %% nextflow.secret %% - CmdRun --> SecretsProvider : run + ConfigBuilder --> SecretsLoader : build + BaseScript --> SecretsLoader : run + BashWrapperBuilder --> SecretsLoader : build SecretsLoader --> SecretsProvider : load SecretsProvider --> Secret : getSecret diff --git a/docs/developer/diagrams/nextflow.trace.mmd b/docs/developer/diagrams/nextflow.trace.mmd index 891cfcc698..9498988d4c 100644 --- a/docs/developer/diagrams/nextflow.trace.mmd +++ b/docs/developer/diagrams/nextflow.trace.mmd @@ -2,21 +2,12 @@ classDiagram %% %% nextflow.trace %% - direction LR + Session --> TraceObserverFactory : init - %% TraceObserverFactory "1" --> "*" TraceObserver : create - %% TraceObserver <|-- AnsiLogObserver - %% TraceObserver <|-- GraphObserver - %% TraceObserver <|-- ReportObserver - %% TraceObserver <|-- TimelineObserver - %% TraceObserver <|-- TraceFileObserver - %% TraceObserver <|-- WebLogObserver - %% TraceObserver <|-- WorkflowStatsObserver - - Session --> AnsiLogObserver : init - Session --> GraphObserver : init - Session --> ReportObserver : init - Session --> TimelineObserver : init - Session --> TraceFileObserver : init - Session --> WebLogObserver : init - Session --> WorkflowStatsObserver : init + TraceObserverFactory "1" --> "*" TraceObserver : create + TraceObserver <|-- AnsiLogObserver + TraceObserver <|-- GraphObserver + TraceObserver <|-- ReportObserver + TraceObserver <|-- TimelineObserver + TraceObserver <|-- TraceFileObserver + TraceObserver <|-- WorkflowStatsObserver diff --git a/docs/developer/diagrams/overview.mmd b/docs/developer/diagrams/overview.mmd new file mode 100644 index 0000000000..a59e924c47 --- /dev/null +++ b/docs/developer/diagrams/overview.mmd @@ -0,0 +1,69 @@ +flowchart TB + subgraph Launcher + subgraph CmdRun + subgraph AssetManager + ScriptFile + end + subgraph ConfigBuilder + ConfigParser([ConfigParser]) + ConfigBase([ConfigBase]) + end + subgraph ScriptRunner + subgraph Session + ConfigMap + DAG + ExecutorFactory([ExecutorFactory]) + subgraph TaskProcessor + TaskRun + end + subgraph Executor + subgraph TaskMonitor + TaskHandler + end + TaskBean + BashWrapperBuilder([BashWrapperBuilder]) + end + TraceRecord + CacheFactory([CacheFactory]) + CacheDB + TraceObserver([TraceObserver]) + end + ScriptParser([ScriptParser]) + BaseScript([BaseScript]) + subgraph ScriptMeta + WorkflowDef([WorkflowDef]) + ProcessDef([ProcessDef]) + FunctionDef([FunctionDef]) + end + IncludeDef([IncludeDef]) + OpCall([OpCall]) + end + ConfigParser --> ConfigBase + ConfigBase --> ConfigMap + ScriptFile --> ScriptParser + ScriptParser --> BaseScript + BaseScript --> WorkflowDef + BaseScript --> ProcessDef + BaseScript --> FunctionDef + BaseScript --> IncludeDef + IncludeDef --> ScriptParser + WorkflowDef --> OpCall + OpCall --> DAG + ProcessDef --> DAG + DAG --> TaskRun + TaskRun --> DAG + ExecutorFactory --> Executor + ConfigMap --> Executor + ProcessDef --> TaskProcessor + ConfigMap --> TaskProcessor + TaskRun --> TaskHandler + TaskRun --> TaskBean + TaskBean --> BashWrapperBuilder + BashWrapperBuilder --> TaskHandler + CacheFactory --> CacheDB + TaskHandler --> CacheDB + TaskHandler --> TraceRecord + TraceRecord --> CacheDB + TaskHandler --> TraceObserver + end + end \ No newline at end of file diff --git a/docs/developer/nextflow.executor.md b/docs/developer/nextflow.executor.md index 65c0771549..8be432393e 100644 --- a/docs/developer/nextflow.executor.md +++ b/docs/developer/nextflow.executor.md @@ -14,8 +14,8 @@ Some classes may be excluded from the above diagram for brevity. ## Notes -The `Executor` class is the base class for all Nextflow executors. The main purpose of an `Executor` is to submit tasks to an underlying compute environment, such as an HPC scheduler or cloud batch executor. It uses a `TaskMonitor` to manage the lifecycle of all tasks and `TaskHandler`s to manage each individual task. Most executors use the same polling monitor, but each executor implements its own task handler to customize it for a particular compute environment. See [nextflow.processor](nextflow.processor.md) for more details about these classes. +The `Executor` class is the base class for all Nextflow executors. The main purpose of an `Executor` is to submit tasks to an underlying compute environment, such as an HPC scheduler or cloud batch executor. It uses a `TaskMonitor` to manage the lifecycle of all tasks and a `TaskHandler` to manage each individual task. Most executors use the same polling monitor, but each executor implements its own task handler to customize it for a particular compute environment. See [nextflow.processor](nextflow.processor.md) for more details about these classes. -The built-in executors include the local executor (`LocalExecutor`) and the various grid executors (SLURM, PBS, LSF, etc), all of which extend `AbstractGridExecutor`. The `LocalExecutor` implements both "local" tasks (processes with a `script` or `shell` block) and "native" tasks (processes with an `exec` block). +The built-in executors include the local executor (`LocalExecutor`) and the various grid executors (SLURM, PBS, LSF, etc), all of which extend `AbstractGridExecutor`. The `LocalExecutor` implements both "script" tasks (processes with a `script` or `shell` block) and "native" tasks (processes with an `exec` block). The `BashWrapperBuilder` is used by executors to generate the wrapper script (`.command.run`) for a task, from a template script called `command-run.txt`, as well as the task configuration and the execution environment. diff --git a/docs/developer/nextflow.k8s.md b/docs/developer/nextflow.k8s.md index 99168b0d6c..1f51e0dac5 100644 --- a/docs/developer/nextflow.k8s.md +++ b/docs/developer/nextflow.k8s.md @@ -14,4 +14,4 @@ Some classes may be excluded from the above diagram for brevity. ## Notes -The Kubernetes integration uses the K8s REST API to interact with K8s clusters, and relies on the `kubectl` command and `~/.kube/config` file for authentication. +The Kubernetes integration uses the K8s HTTP API to interact with K8s clusters, and relies on the `kubectl` command and `~/.kube/config` file for authentication. diff --git a/docs/developer/nextflow.processor.md b/docs/developer/nextflow.processor.md index e58125954f..ec5e2fa0fa 100644 --- a/docs/developer/nextflow.processor.md +++ b/docs/developer/nextflow.processor.md @@ -14,8 +14,10 @@ Some classes may be excluded from the above diagram for brevity. ## Notes -While the [`executor`](nextflow.executor.md) package defines how tasks are submitted to a particular execution environment (such as an HPC scheduler), the `processor` package defines how tasks are created and executed. As such, these packages work closely together, and in fact several components of the `Executor` interface, specifically the `TaskHandler` and `TaskMonitor`, are defined in this package. +While the [`executor`](nextflow.executor.md) package defines how tasks are submitted to a particular execution backend (such as an HPC scheduler), the `processor` package defines how tasks are created and executed. As such, these packages work closely together, and in fact several components of the `Executor` interface, specifically the `TaskHandler` and `TaskMonitor`, are defined in this package. -The `TaskProcessor` is by far the largest and most complex class in this package. It implements both the dataflow operator for a given process as well as the task execution logic. In other words, it defines the mapping from an abstract process definition with concrete channel inputs into concrete task executions. +The `TaskProcessor` is by far the largest and most complex class in this package. It implements both the dataflow operator for a given process as well as the task execution logic. In other words, it defines the mapping from an abstract process definition with input and output channels into concrete task executions. A `TaskRun` represents a particular task execution. There is also `TaskBean`, which is a serializable representation of a task. Legends say that `TaskBean` was originally created to support a "daemon" mode in which Nextflow would run on both the head node and the worker nodes, so the Nextflow "head" would need to send tasks to the Nextflow "workers". This daemon mode was never completed, but echoes of it remain (see `CmdNode`, `DaemonLauncher`, and the `nf-ignite` plugin). + +When a `TaskProcessor` receives a set of input values, it creates a `TaskRun` and submits it to an `Executor`, which in turn submits the task to a underlying execution backend. The executor's `TaskMonitor` then monitors the status of the task, and when it is completed, returns it to the task processor for finalization. If the task completed successfully, the task processor collects the task outputs and emits them on the corresponding output channels. If the task failed, the task processor will retry it if possible, or else return a task error to the workflow run. diff --git a/docs/developer/nextflow.script.md b/docs/developer/nextflow.script.md index 2757987c3f..7c76f1c2bd 100644 --- a/docs/developer/nextflow.script.md +++ b/docs/developer/nextflow.script.md @@ -14,7 +14,7 @@ Some classes may be excluded from the above diagram for brevity. ## Notes -The execution of a Nextflow pipeline occurs in two phases. In the first phase, Nextflow parses and runs the script (using the language extensions in [nextflow.ast](nextflow.ast.md) and [nextflow.extension](nextflow.extension.md)), which constructs the workflow DAG. In the second phase, Nextflow executes the workflow. +The execution of a Nextflow pipeline occurs in two phases. In the first phase, Nextflow parses and runs the script (using the language extensions in [nextflow.ast](nextflow.ast.md) and [nextflow.extension](nextflow.extension.md)), which produces the workflow DAG. In the second phase, Nextflow executes the workflow. ```{note} In DSL1, there was no separation between workflow construction and execution -- dataflow operators were executed as soon as they were constructed. DSL2 introduced lazy execution in order to separate process definition from execution, and thereby facilitate subworkflows and modules. diff --git a/docs/developer/nextflow.trace.md b/docs/developer/nextflow.trace.md index e5e4257c97..8e86092c6e 100644 --- a/docs/developer/nextflow.trace.md +++ b/docs/developer/nextflow.trace.md @@ -14,4 +14,4 @@ Some classes may be excluded from the above diagram for brevity. ## Notes -The `TraceObserver` interface defines a set of hooks into the workflow execution, such as when a workflow starts and completes, when a task starts and completes, and when an output file is published. The `Session` maintains a list of all observers and triggers each hook when the corresponding event occurs. Implementing classes can use these hooks to perform custom behaviors. In fact, this interface is used to implemented several core features, including the various execution reports, DAG renderer, and the integration with Seqera Platform. +The `TraceObserver` interface defines a set of hooks into the workflow execution, such as when a workflow starts and completes, when a task starts and completes, and when an output file is published. The `Session` maintains a list of all observers and triggers each hook when the corresponding event occurs. Implementing classes can use these hooks to perform custom behaviors. In fact, this interface is used to implement several core features, including the various execution reports, DAG renderer, and the integration with Seqera Platform. diff --git a/docs/index.md b/docs/index.md index f18ef2b0f0..69476d91c7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -121,6 +121,7 @@ flux :maxdepth: 1 developer/index +developer/diagram developer/packages developer/plugins ```