In [None]:
# ! pip install flowcept[dask]

In [None]:
! python reset_dask_nb_exec_counts.py   
! rm -f output.log
# This notebook is causing a silly bug after starting dask cluster. 
# This command just resets the nb's execution counts to fix it. 
# Just save the notebook (cmd+s) after you run dask cluster setup.

In [None]:
# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. 
# In a normal user interaction, these sleeps would not be necessary.
from time import sleep

In [None]:
def dummy_func1(x, workflow_id=None):
    return x * 2


def dummy_func2(y, workflow_id=None):
    return y + y


def calculate_batch_and_epochs(z, w, workflow_id=None):
    return {
        "batch_size": int(z + w + 16),
        "epochs": max(int(z/w)+1, 2)
    }

### Set the env var pointing to the conf file where the ports, hostnames, and other conf variables are read from.

There is an exemplary conf file available in the `resources` directory in FlowCept repository. You can use it as is if running this Notebook on your local laptop.

In [None]:
def setup_local_dask_cluster():
    from dask.distributed import Client, LocalCluster
    from flowcept import (
        FlowceptDaskSchedulerAdapter,
        FlowceptDaskWorkerAdapter,
    )
    cluster = LocalCluster(n_workers=2)
    scheduler = cluster.scheduler
    client = Client(scheduler.address)

    # Instantiate and Register Scheduler and Worker Adapters    
    client.register_plugin(FlowceptDaskSchedulerAdapter())
    client.register_plugin(FlowceptDaskWorkerAdapter())
    
    return client

## Start Flowcept's Consumer

In [None]:
from flowcept import Flowcept
flowcept = Flowcept('dask')

In [None]:
flowcept.start()

## Start Local Dask Cluster

In [None]:
dask_client = setup_local_dask_cluster()
dask_client

## Client.Submit-based Workflow

In [None]:
import numpy as np
from uuid import uuid4
i1 = np.random.random()
i1 = np.random.random()
wf_id = f"wf_{uuid4()}"
print(f"Workflow_Id={wf_id}")
o1 = dask_client.submit(dummy_func1, i1, workflow_id=wf_id)
o2 = dask_client.submit(dummy_func2, o1, workflow_id=wf_id)
o3 = dask_client.submit(calculate_batch_and_epochs, o1, o2, workflow_id=wf_id)
print(f"Task3_id={o3.key}")
print(f"Result={o3.result()}")

In [None]:
from dask.distributed import Client
client = Client()
def incr(n):
    return n+1
futures = client.map(incr, range(1000))
results = client.gather(futures)

In [None]:
sleep(10)

## Start Flowcept Query API

In [None]:
from flowcept import TaskQueryAPI
query_api = TaskQueryAPI()

In [None]:
sleep(5)

## Query the database

In [None]:
_filter = {"workflow_id": wf_id}
tasks = query_api.query(_filter)
tasks

In [None]:
dask_client.shutdown()

In [None]:
sleep(3)

## Stop consumer

In [None]:
flowcept.stop()

In [None]:
# The exception that are thrown during execution here are caused within Dask's Nannies after a client.shutdown(). This only happens when Dask cluster is shut down. It will happen even if consumer.stop() is not used.

In [None]:
exit()