In [45]:
#pip install prefect[dask] dask-jobqueue
# prefect config set PREFECT_API_URL="https://ard-modeling-service.slac.stanford.edu/api" 

from dask.distributed import Client
from dask_jobqueue import SLURMCluster

# Simplify SLURMCluster parameters to basic working configuration
#slac cluster_kwargs = {
 #   "cores": 12,  
  #  "memory": "10GB",  # Correct spacing
   # "shebang": "#!/bin/bash",
   # "account": "facet",
   # "walltime": "00:10:00",
   # "job_script_prologue": ["source ~/.bashrc"],
    # Only basic directives, add more as needed and test each step
   # "job_extra_directives": ["-q debug", "--partition=milano", "--qos=preemptable"],
#}



cluster_kwargs = {
    "cores": 1,  
    "memory": "10GB",  # Correct spacing
    "shebang": "#!/bin/bash",
    "account": "sanjeevc",
    "walltime": "00:10:10",
    #"job_script_prologue": ["source ~/.bashrc"],
    # Only basic directives, add more as needed and test each step
    "job_extra_directives": ["-A m669","-q regular","--constraint=gpu"],
}

cluster = SLURMCluster(**cluster_kwargs)

cluster.scheduler_options = {"interface": "hsn0"}

print(cluster.job_script())


#!/bin/bash

#SBATCH -J dask-worker
#SBATCH -A sanjeevc
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=10G
#SBATCH -t 00:00:10
#SBATCH -A m669
#SBATCH -q regular
#SBATCH --constraint=gpu

/global/common/software/nersc/pe/conda-envs/24.1.0/python-3.11/nersc-python/bin/python -m distributed.cli.dask_worker tcp://128.55.64.34:35169 --name dummy-name --nthreads 1 --memory-limit 9.31GiB --nanny --death-timeout 60



Perhaps you already have a cluster running?
Hosting the HTTP server on port 42807 instead


In [46]:
!prefect config set PREFECT_API_URL="https://ard-modeling-service.slac.stanford.edu/api" 

Set 'PREFECT_API_URL' to 'https://ard-modeling-service.slac.stanford.edu/api'.
[32mUpdated profile 'default'.[0m


In [47]:
slurm_jobs = 10
cluster.scale(jobs=slurm_jobs)
client = Client(cluster)

In [48]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://128.55.64.34:42807/status,

0,1
Dashboard: http://128.55.64.34:42807/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.55.64.34:35169,Workers: 0
Dashboard: http://128.55.64.34:42807/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [27]:
# Check cores and memory per worker
for worker, info in client.scheduler_info()['workers'].items():
    print(f"Worker: {worker}, Cores: {info['nthreads']}, Memory: {info['memory_limit'] / 1e9:.2f} GB")


In [28]:
client.scheduler_info()['workers']

{}

In [23]:
from prefect import flow, task
from prefect_dask import DaskTaskRunner

@flow(task_runner=DaskTaskRunner(address=client.scheduler.address))
def workflow(a: float, b: float) -> float:
    output1 = add.submit(a, b)
    output2 = mult.submit(output1, b)
    return output2

@task
def add(a: float, b: float) -> float:
    return a + b

@task
def mult(a: float, b: float) -> float:
    return a * b

In [24]:
output = workflow(1, 2)
print(output.result())

type='unpersisted' artifact_type='result' artifact_description='Unpersisted result of type `float`'


# Temporary Dask Cluster
Some users may prefer to spin up a Dask cluster (i.e. Slurm job) for each indiviual @flow. This is also possible in Prefect. (Note that the example below will complain about an open port if you have already instantiated a Dask cluster above.)



In [None]:
from prefect import flow, task
from prefect_dask import DaskTaskRunner

@flow(task_runner=DaskTaskRunner(cluster_class=SLURMCluster, cluster_kwargs=cluster_kwargs))
def workflow(a: float, b: float) -> float:
    output1 = add.submit(a, b)
    output2 = mult.submit(output1, b)
    return output2

@task
def add(a: float, b: float) -> float:
    return a + b

@task
def mult(a: float, b: float) -> float:
    return a * b

workflow(1, 2).result()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 38719 instead
