In [1]:
#!pip install prefect[dask] dask-jobqueue


In [2]:
#!prefect config set PREFECT_API_URL="https://ard-modeling-service.slac.stanford.edu/api" 


In [8]:
!python -m pip install jupyter-server-proxy

Defaulting to user installation because normal site-packages is not writeable
Collecting jupyter-server-proxy
  Downloading jupyter_server_proxy-4.4.0-py3-none-any.whl.metadata (8.7 kB)
Collecting aiohttp (from jupyter-server-proxy)
  Downloading aiohttp-3.10.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)
Collecting simpervisor>=1.0.0 (from jupyter-server-proxy)
  Downloading simpervisor-1.0.0-py3-none-any.whl.metadata (4.3 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->jupyter-server-proxy)
  Downloading aiohappyeyeballs-2.4.0-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->jupyter-server-proxy)
  Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->jupyter-server-proxy)
  Downloading frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->jupy

2024-09-10 18:10:42,921 - distributed.core - INFO - Event loop was unresponsive in Scheduler for 6.18s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.


In [6]:

from dask.distributed import Client
from dask_jobqueue import SLURMCluster

# Simplify SLURMCluster parameters to basic working configuration

#SBATCH --qos=debug
#SBATCH --time=5
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=128
#SBATCH --constraint=cpu


cluster_kwargs = {
    "cores": 1,  
    "memory": "10GB",  # Correct spacing
    "shebang": "#!/bin/bash",
    #"account": "sanjeevc",
    "walltime": "00:10:00",
    "job_script_prologue": ["source ~/.bashrc"],
    # Only basic directives, add more as needed and test each step
    "job_extra_directives": ["-A m669","-q regular","--constraint=cpu"],
        "asynchronous": True
}
cluster = SLURMCluster(**cluster_kwargs)
print(cluster.job_script())


#!/bin/bash

#SBATCH -J dask-worker
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=10G
#SBATCH -t 00:10:00
#SBATCH -A m669
#SBATCH -q regular
#SBATCH --constraint=cpu
source ~/.bashrc
/global/common/software/nersc/pe/conda-envs/24.1.0/python-3.11/nersc-python/bin/python -m distributed.cli.dask_worker tcp://<insert-scheduler-address-here>:8786 --name dummy-name --nthreads 1 --memory-limit 9.31GiB --nanny --death-timeout 60



In [4]:
#!dask-scheduler


In [5]:
#!prefect config set PREFECT_API_URL="https://ard-modeling-service.slac.stanford.edu/api" 

In [6]:
#!prefect config set PREFECT_API_URL="http://localhost:8000/api"


In [7]:
%time
slurm_jobs = 10
cluster.scale(jobs=slurm_jobs)
client = Client(cluster)
client

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.48 µs


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster


2024-09-10 18:10:20,128 - distributed.http.proxy - INFO - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
Perhaps you already have a cluster running?
Hosting the HTTP server on port 45387 instead
2024-09-10 18:10:20,155 - distributed.scheduler - INFO - State start
2024-09-10 18:10:20,164 - distributed.scheduler - INFO -   Scheduler at:  tcp://128.55.64.38:43845
2024-09-10 18:10:20,164 - distributed.scheduler - INFO -   dashboard at:  http://128.55.64.38:45387/status
2024-09-10 18:10:20,166 - distributed.scheduler - INFO - Registering Worker plugin shuffle
2024-09-10 18:10:32,415 - distributed.scheduler - INFO - Receive client connection: Client-9cdbcada-6fda-11ef-9254-a5ba4eb77f07
2024-09-10 18:10:32,417 - distributed.core - INFO - Starting established connection to tcp://128.55.64.38:53216


In [9]:
print(client.dashboard_link)

http://128.55.64.38:45387/status


In [9]:
from prefect import flow, get_run_logger, task
from prefect_dask.task_runners import DaskTaskRunner


@task
def say_hello(name: str) -> None:
    # logs not currently working see https://github.com/PrefectHQ/prefect/issues/5850
    logger = get_run_logger()
    logger.info(f"hello {name}")


@task
def say_goodbye(name: str) -> None:
    logger = get_run_logger()
    logger.info(f"goodbye {name}")


# TODO: can the task runner be parameterised so we don't duplicate the flow with dask_kubes_flow?
# see https://github.com/PrefectHQ/prefect/issues/5560


# creates a LocalCluster https://docs.dask.org/en/stable/deploying-python.html#localcluster
@flow(task_runner=DaskTaskRunner(cluster=cluster))
def dask(names: list[str]) -> None:
    for name in names:
        # tasks must be submitted to run on dask
        # if called without .submit() they are still tracked but
        # run immediately and locally rather than async on dask
        say_hello.submit(name)
        say_goodbye.submit(name)


if __name__ == "__main__":
    dask(["arthur", "trillian", "ford", "marvin"])

04:23:50.774 | [36mINFO[0m    | Task run 'say_goodbye-1' - goodbye trillian
04:23:50.816 | [36mINFO[0m    | Task run 'say_goodbye-1' - Finished in state [32mCompleted[0m()
04:23:50.845 | [36mINFO[0m    | Task run 'say_hello-0' - hello arthur
04:23:50.899 | [36mINFO[0m    | Task run 'say_hello-2' - hello ford
04:23:50.905 | [36mINFO[0m    | Task run 'say_hello-0' - Finished in state [32mCompleted[0m()
04:23:50.929 | [36mINFO[0m    | Task run 'say_goodbye-2' - goodbye ford
04:23:50.959 | [36mINFO[0m    | Task run 'say_hello-2' - Finished in state [32mCompleted[0m()
04:23:50.973 | [36mINFO[0m    | Task run 'say_goodbye-0' - goodbye arthur
04:23:50.984 | [36mINFO[0m    | Task run 'say_goodbye-2' - Finished in state [32mCompleted[0m()
04:23:50.997 | [36mINFO[0m    | Task run 'say_goodbye-3' - goodbye marvin
04:23:51.039 | [36mINFO[0m    | Task run 'say_goodbye-0' - Finished in state [32mCompleted[0m()
04:23:51.045 | [36mINFO[0m    | Task run 'say_goodbye-3'

In [None]:
client.shutdown()


# Temporary Dask Cluster
Some users may prefer to spin up a Dask cluster (i.e. Slurm job) for each indiviual @flow. This is also possible in Prefect. (Note that the example below will complain about an open port if you have already instantiated a Dask cluster above.)



from prefect import flow, task
from prefect_dask import DaskTaskRunner

@flow(task_runner=DaskTaskRunner(cluster_class=SLURMCluster, cluster_kwargs=cluster_kwargs))
def workflow(a: float, b: float) -> float:
    output1 = add.submit(a, b)
    output2 = mult.submit(output1, b)
    return output2

@task
def add(a: float, b: float) -> float:
    return a + b

@task
def mult(a: float, b: float) -> float:
    return a * b

workflow(1, 2).result()