In [1]:
#!pip install prefect[dask] dask-jobqueue


In [2]:
#!prefect config set PREFECT_API_URL="https://ard-modeling-service.slac.stanford.edu/api" 


In [3]:

from dask.distributed import Client
from dask_jobqueue import SLURMCluster

# Simplify SLURMCluster parameters to basic working configuration

#SBATCH --qos=debug
#SBATCH --time=5
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=128
#SBATCH --constraint=cpu


cluster_kwargs = {
    "cores": 1,  
    "memory": "10GB",  # Correct spacing
    "shebang": "#!/bin/bash",
    #"account": "sanjeevc",
    "walltime": "00:10:00",
    "job_script_prologue": ["source ~/.bashrc"],
    # Only basic directives, add more as needed and test each step
    "job_extra_directives": ["-A m669","-q regular","--constraint=cpu"],
        "asynchronous": True
}
cluster = SLURMCluster(**cluster_kwargs)
print(cluster.job_script())


#!/bin/bash

#SBATCH -J dask-worker
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=10G
#SBATCH -t 00:10:00
#SBATCH -A m669
#SBATCH -q regular
#SBATCH --constraint=cpu
source ~/.bashrc
/global/common/software/nersc/pe/conda-envs/24.1.0/python-3.11/nersc-python/bin/python -m distributed.cli.dask_worker tcp://<insert-scheduler-address-here>:8786 --name dummy-name --nthreads 1 --memory-limit 9.31GiB --nanny --death-timeout 60



In [4]:
#!dask-scheduler


In [5]:
#!prefect config set PREFECT_API_URL="https://ard-modeling-service.slac.stanford.edu/api" 

In [6]:
#!prefect config set PREFECT_API_URL="http://localhost:8000/api"


In [7]:
%time
slurm_jobs = 10
cluster.scale(jobs=slurm_jobs)
client = Client(cluster)
client

In [8]:
print(client.dashboard_link)




In [9]:
from prefect import flow, get_run_logger, task
from prefect_dask.task_runners import DaskTaskRunner


@task
def say_hello(name: str) -> None:
    # logs not currently working see https://github.com/PrefectHQ/prefect/issues/5850
    logger = get_run_logger()
    logger.info(f"hello {name}")


@task
def say_goodbye(name: str) -> None:
    logger = get_run_logger()
    logger.info(f"goodbye {name}")


# TODO: can the task runner be parameterised so we don't duplicate the flow with dask_kubes_flow?
# see https://github.com/PrefectHQ/prefect/issues/5560


# creates a LocalCluster https://docs.dask.org/en/stable/deploying-python.html#localcluster
@flow(task_runner=DaskTaskRunner(cluster=cluster))
def dask(names: list[str]) -> None:
    for name in names:
        # tasks must be submitted to run on dask
        # if called without .submit() they are still tracked but
        # run immediately and locally rather than async on dask
        say_hello.submit(name)
        say_goodbye.submit(name)


if __name__ == "__main__":
    dask(["arthur", "trillian", "ford", "marvin"])

04:23:50.774 | [36mINFO[0m    | Task run 'say_goodbye-1' - goodbye trillian
04:23:50.816 | [36mINFO[0m    | Task run 'say_goodbye-1' - Finished in state [32mCompleted[0m()
04:23:50.845 | [36mINFO[0m    | Task run 'say_hello-0' - hello arthur
04:23:50.899 | [36mINFO[0m    | Task run 'say_hello-2' - hello ford
04:23:50.905 | [36mINFO[0m    | Task run 'say_hello-0' - Finished in state [32mCompleted[0m()
04:23:50.929 | [36mINFO[0m    | Task run 'say_goodbye-2' - goodbye ford
04:23:50.959 | [36mINFO[0m    | Task run 'say_hello-2' - Finished in state [32mCompleted[0m()
04:23:50.973 | [36mINFO[0m    | Task run 'say_goodbye-0' - goodbye arthur
04:23:50.984 | [36mINFO[0m    | Task run 'say_goodbye-2' - Finished in state [32mCompleted[0m()
04:23:50.997 | [36mINFO[0m    | Task run 'say_goodbye-3' - goodbye marvin
04:23:51.039 | [36mINFO[0m    | Task run 'say_goodbye-0' - Finished in state [32mCompleted[0m()
04:23:51.045 | [36mINFO[0m    | Task run 'say_goodbye-3'

# Temporary Dask Cluster
Some users may prefer to spin up a Dask cluster (i.e. Slurm job) for each indiviual @flow. This is also possible in Prefect. (Note that the example below will complain about an open port if you have already instantiated a Dask cluster above.)



from prefect import flow, task
from prefect_dask import DaskTaskRunner

@flow(task_runner=DaskTaskRunner(cluster_class=SLURMCluster, cluster_kwargs=cluster_kwargs))
def workflow(a: float, b: float) -> float:
    output1 = add.submit(a, b)
    output2 = mult.submit(output1, b)
    return output2

@task
def add(a: float, b: float) -> float:
    return a + b

@task
def mult(a: float, b: float) -> float:
    return a * b

workflow(1, 2).result()