# Prefect Hello World

In [1]:
import os
os.environ['PREFECT__FLOWS__CHECKPOINTING'] = 'true'

import prefect
from prefect import task, Task, Flow, Parameter
from prefect.engine.results import LocalResult
from prefect.engine.executors import DaskExecutor
from dask.distributed import Client
from loguru import logger
import datetime as dt
from time import sleep

print('Prefect checkpoints ON:', prefect.config.flows.checkpointing)

Prefect checkpoints ON: True


## Basic flow

In [2]:
@task
def t1():
    logger.info('t1 input: -')
    return 2.

@task
def t2(x):
    logger.info('t2 input: {}'.format(x))
    return x+1.

@task
def t3(x):
    logger.info('t3 input: {}'.format(x))
    return x+1.5

@task 
def t4(x, y):
    logger.info('t3 input: {} {}'.format(x, y))
    return x+y

with Flow('big_flow') as flow:
    r1 = t1()
    r2 = t2(r1)
    r3 = t3(r2)
    r4 = t4(r2, r3)

state = flow.run()
print('Output: ', state.result[r4].result)

[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Beginning Flow run for 'big_flow'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:55.133 | INFO     | __main__:t1:3 - t1 input: -


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:49:55.167 | INFO     | __main__:t2:8 - t2 input: 2.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't3': Starting task run...


2020-10-06 17:49:55.204 | INFO     | __main__:t3:13 - t3 input: 3.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't3': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't4': Starting task run...


2020-10-06 17:49:55.238 | INFO     | __main__:t4:18 - t3 input: 3.0 4.5


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't4': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  7.5


## Extending an existing flow

In [3]:
with Flow('small_flow') as flow1:
    r1 = t1()
    r2 = t2(r1)

with flow1.copy() as flow2:
    flow2.name = 'extended_flow'
    r3 = t3(r2)
    r4 = t4(r2, r3)

state = flow1.run()
print('Output: ', state.result[r2].result)

print()
state = flow2.run()
print('Output: ', state.result[r4].result)

[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Beginning Flow run for 'small_flow'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:55.298 | INFO     | __main__:t1:3 - t1 input: -


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:49:55.331 | INFO     | __main__:t2:8 - t2 input: 2.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0

[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Beginning Flow run for 'extended_flow'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:55.375 | INFO     | __main__:t1:3 - t1 input: -


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:49:55.414 | INFO     | __main__:t2:8 - t2 input: 2.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't3': Starting task run...


2020-10-06 17:49:55.453 | INFO     | __main__:t3:13 - t3 input: 3.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't3': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't4': Starting task run...


2020-10-06 17:49:55.488 | INFO     | __main__:t4:18 - t3 input: 3.0 4.5


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't4': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  7.5


In [4]:
with Flow('small_flow') as flow1:
    r1 = t1()
    r2 = t2(r1)

with Flow('extended_flow') as flow2:
    flow2.update(flow1)
    r3 = t3(r2)
    r4 = t4(r2, r3)

state = flow1.run()
print('Output: ', state.result[r2].result)

print()
state = flow2.run()
print('Output: ', state.result[r4].result)

[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Beginning Flow run for 'small_flow'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:55.550 | INFO     | __main__:t1:3 - t1 input: -


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:49:55.591 | INFO     | __main__:t2:8 - t2 input: 2.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0

[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Beginning Flow run for 'extended_flow'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:55.643 | INFO     | __main__:t1:3 - t1 input: -


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:49:55.687 | INFO     | __main__:t2:8 - t2 input: 2.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't3': Starting task run...


2020-10-06 17:49:55.733 | INFO     | __main__:t3:13 - t3 input: 3.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't3': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't4': Starting task run...


2020-10-06 17:49:55.780 | INFO     | __main__:t4:18 - t3 input: 3.0 4.5


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't4': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  7.5


## Parametrized flow

In [5]:
with Flow('param_flow') as flow:
    y = Parameter('y')
    scaling = Parameter('scaling', 1.)
    r1 = t1()
    r2 = t2(r1)
    r4 = scaling*t4(r2, y)

    
state = flow.run(dict(y=4.5))
print('Output: ', state.result[r4].result)
print()
state = flow.run(dict(y=4.5, scaling=2.))
print('Output: ', state.result[r4].result)

[2020-10-06 10:49:55] INFO - prefect.FlowRunner | Beginning Flow run for 'param_flow'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:55.837 | INFO     | __main__:t1:3 - t1 input: -


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:49:55.874 | INFO     | __main__:t2:8 - t2 input: 2.0


[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 'y': Starting task run...
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 'y': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 'scaling': Starting task run...
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 'scaling': finished task run for task with final state: 'Success'
[2020-10-06 10:49:55] INFO - prefect.TaskRunner | Task 't4': Starting task run...


2020-10-06 17:49:55.998 | INFO     | __main__:t4:18 - t3 input: 3.0 4.5


[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't4': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'Mul': Starting task run...
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'Mul': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  7.5

[2020-10-06 10:49:56] INFO - prefect.FlowRunner | Beginning Flow run for 'param_flow'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:56.080 | INFO     | __main__:t1:3 - t1 input: -


[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:49:56.119 | INFO     | __main__:t2:8 - t2 input: 2.0


[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'y': Starting task run...
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'y': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'scaling': Starting task run...
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'scaling': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't4': Starting task run...


2020-10-06 17:49:56.226 | INFO     | __main__:t4:18 - t3 input: 3.0 4.5


[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't4': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'Mul': Starting task run...
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 'Mul': finished task run for task with final state: 'Success'
[2020-10-06 10:49:56] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  15.0


## Cached flow

In [6]:
@task(cache_for=dt.timedelta(hours=1))
def t1():
    sleep(2)
    logger.info('t1++ input: -')
    return 2.

@task(cache_for=dt.timedelta(hours=1))
def t2(x):
    sleep(2)
    logger.info('t2++ input: {}'.format(x))
    return x+1.


with Flow('cached_flow') as flow:
    r1 = t1()
    r2 = t2(r1)

state = flow.run()
print('Output: ', state.result[r2].result)

print()
state = flow.run()
print('Output: ', state.result[r2].result)

[2020-10-06 10:49:56] INFO - prefect.FlowRunner | Beginning Flow run for 'cached_flow'
[2020-10-06 10:49:56] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:49:58.316 | INFO     | __main__:t1:4 - t1++ input: -


[2020-10-06 10:49:58] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Cached'
[2020-10-06 10:49:58] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:50:00.356 | INFO     | __main__:t2:10 - t2++ input: 2.0


[2020-10-06 10:50:00] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Cached'
[2020-10-06 10:50:00] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0

[2020-10-06 10:50:00] INFO - prefect.FlowRunner | Beginning Flow run for 'cached_flow'
[2020-10-06 10:50:00] INFO - prefect.TaskRunner | Task 't1': Starting task run...
[2020-10-06 10:50:00] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Cached'
[2020-10-06 10:50:00] INFO - prefect.TaskRunner | Task 't2': Starting task run...
[2020-10-06 10:50:00] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Cached'
[2020-10-06 10:50:00] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0


In [7]:
@task(checkpoint=True, result=LocalResult(dir='cache'), target="{flow_name}_{task_name}")
def t1():
    sleep(2)
    logger.info('t1++ input: -')
    return 2.

@task(checkpoint=True, result=LocalResult(dir='cache'), target="{flow_name}_{task_name}")
def t2(x):
    sleep(2)
    logger.info('t2++ input: {}'.format(x))
    return x+1.


with Flow('persisted_flow') as flow:
    r1 = t1()
    r2 = t2(r1)

state = flow.run()
print('Output: ', state.result[r2].result)

print()
state = flow.run()
print('Output: ', state.result[r2].result)

[2020-10-06 10:50:00] INFO - prefect.FlowRunner | Beginning Flow run for 'persisted_flow'
[2020-10-06 10:50:00] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:50:02.513 | INFO     | __main__:t1:4 - t1++ input: -


[2020-10-06 10:50:02] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:50:02] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:50:04.561 | INFO     | __main__:t2:10 - t2++ input: 2.0


[2020-10-06 10:50:04] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:50:04] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0

[2020-10-06 10:50:04] INFO - prefect.FlowRunner | Beginning Flow run for 'persisted_flow'
[2020-10-06 10:50:04] INFO - prefect.TaskRunner | Task 't1': Starting task run...
[2020-10-06 10:50:04] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Cached'
[2020-10-06 10:50:04] INFO - prefect.TaskRunner | Task 't2': Starting task run...
[2020-10-06 10:50:04] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Cached'
[2020-10-06 10:50:04] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0


In [8]:
@task(checkpoint=True, result=LocalResult(dir='cache'), target="{flow_name}_{task_name}_{parameters}")
def t1():
    sleep(2)
    logger.info('t1++ input: -')
    return 2.

@task(checkpoint=True, result=LocalResult(dir='cache'), target="{flow_name}_{task_name}_{parameters}")
def t2(x, y):
    sleep(2)
    logger.info('t2++ input: {}'.format(x))
    return x+y


with Flow('parametrized_persisted_flow') as flow:
    y = Parameter('y')
    r1 = t1()
    r2 = t2(r1, y)

state = flow.run(dict(y=1.))
print('Output: ', state.result[r2].result)

print()
state = flow.run(dict(y=1.))
print('Output: ', state.result[r2].result)

print()
state = flow.run(dict(y=2.))
print('Output: ', state.result[r2].result)

[2020-10-06 10:50:04] INFO - prefect.FlowRunner | Beginning Flow run for 'parametrized_persisted_flow'
[2020-10-06 10:50:04] INFO - prefect.TaskRunner | Task 't1': Starting task run...


2020-10-06 17:50:06.779 | INFO     | __main__:t1:4 - t1++ input: -


[2020-10-06 10:50:06] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:50:06] INFO - prefect.TaskRunner | Task 'y': Starting task run...
[2020-10-06 10:50:06] INFO - prefect.TaskRunner | Task 'y': finished task run for task with final state: 'Success'
[2020-10-06 10:50:06] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:50:08.856 | INFO     | __main__:t2:10 - t2++ input: 2.0


[2020-10-06 10:50:08] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:50:08] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0

[2020-10-06 10:50:08] INFO - prefect.FlowRunner | Beginning Flow run for 'parametrized_persisted_flow'
[2020-10-06 10:50:08] INFO - prefect.TaskRunner | Task 't1': Starting task run...
[2020-10-06 10:50:08] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Cached'
[2020-10-06 10:50:08] INFO - prefect.TaskRunner | Task 'y': Starting task run...
[2020-10-06 10:50:08] INFO - prefect.TaskRunner | Task 'y': finished task run for task with final state: 'Success'
[2020-10-06 10:50:08] INFO - prefect.TaskRunner | Task 't2': Starting task run...
[2020-10-06 10:50:08] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Cached'
[2020-10-06 10:50:08] INFO - prefect.FlowRunner | Flow run SUCCESS: all referen

2020-10-06 17:50:11.016 | INFO     | __main__:t1:4 - t1++ input: -


[2020-10-06 10:50:11] INFO - prefect.TaskRunner | Task 't1': finished task run for task with final state: 'Success'
[2020-10-06 10:50:11] INFO - prefect.TaskRunner | Task 'y': Starting task run...
[2020-10-06 10:50:11] INFO - prefect.TaskRunner | Task 'y': finished task run for task with final state: 'Success'
[2020-10-06 10:50:11] INFO - prefect.TaskRunner | Task 't2': Starting task run...


2020-10-06 17:50:13.128 | INFO     | __main__:t2:10 - t2++ input: 2.0


[2020-10-06 10:50:13] INFO - prefect.TaskRunner | Task 't2': finished task run for task with final state: 'Success'
[2020-10-06 10:50:13] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  4.0


## With Dask backend

**Start dask scheduler and workers in the shell:**
```bash
$ dask-scheduler
# start a task scheduler
>>> Scheduler at:   tcp://192.0.0.100:8786

# connect one or multiple workers (computational clusters)
$ dask-worker tcp://192.0.0.100:8786
```

**Connect to the scheduler (Dask DataFrames):**
```python
from dask.distributed import Client
# connect to the scheduler to compute Dask DataFrame
client = Client('tcp://192.0.0.100:8786')
```

**Connect to the scheduler (Prefect):**
```python
from prefect.engine.executors import DaskExecutor
executor = DaskExecutor(address="tcp://192.0.0.100:8786")
flow.run(executor=executor)
```

In [9]:
dask_server_loc = 'tcp://10.5.0.2:8786'

In [10]:
client = Client(dask_server_loc)
client

0,1
Client  Scheduler: tcp://10.5.0.2:8786  Dashboard: http://10.5.0.2:8787/status,Cluster  Workers: 1  Cores: 12  Memory: 67.15 GB


In [11]:
executor = DaskExecutor(address=dask_server_loc)

# Note: loguru logger can't be pickled
# Note: prefect logger doesn't show in stdout with DaskExecutor 
@task(log_stdout=True)
def t1():
    _logger = prefect.context.get("logger")
    _logger.info('t1 input: -')
    return 2.

@task(log_stdout=True)
def t2(x):
    _logger = prefect.context.get("logger")
    _logger.info('t2 input: {}'.format(x))
    return x+1.

with Flow('distritubuted_flow') as flow:
    r1 = t1()
    r2 = t2(r1)

state = flow.run(executor=executor)
print('Output: ', state.result[r2].result)

[2020-10-06 10:50:13] INFO - prefect.FlowRunner | Beginning Flow run for 'distritubuted_flow'
[2020-10-06 10:50:13] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
Output:  3.0
