In [1]:
print('hello')

hello


In [2]:
import dask, dask.distributed
import dask_jobqueue

In [3]:
cluster = dask_jobqueue.SLURMCluster(

    # Dask worker size
    cores=8, memory='10GB',
    processes=1, # Dask workers per job
    
    # SLURM job script things
    queue='CPU', walltime='00:10:00',
    
    # No Nanny
    nanny=False,
    
    # Dask worker network and temporary storage
    interface='ib0', local_directory='/tmp' #local_directory='$TMPDIR'
)

client = dask.distributed.Client(cluster)

In [4]:
cluster.scale(jobs=6)

In [5]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://10.102.0.62:8787/status,

0,1
Dashboard: http://10.102.0.62:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.0.62:34140,Workers: 0
Dashboard: http://10.102.0.62:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [6]:
print(cluster.job_script())

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -p CPU
#SBATCH -n 1
#SBATCH --cpus-per-task=8
#SBATCH --mem=10G
#SBATCH -t 00:10:00

/gpfs/soma_fs/home/valerio/anaconda3/envs/neuron/bin/python -m distributed.cli.dask_worker tcp://10.102.0.62:34140 --nthreads 8 --memory-limit 9.31GiB --name dummy-name --no-nanny --death-timeout 60 --local-directory /tmp --interface ib0 --protocol tcp://



In [8]:
!squeue -u $USER

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
             96091       CPU dask-wor  valerio  R       1:56      1 somacpu010
             96092       CPU dask-wor  valerio  R       1:56      1 somacpu011
             96093       CPU dask-wor  valerio  R       1:56      1 somacpu005
             96094       CPU dask-wor  valerio  R       1:56      1 somacpu006
             96095       CPU dask-wor  valerio  R       1:56      1 somacpu007
             96096       CPU dask-wor  valerio  R       1:56      1 somacpu008


In [10]:
# Changing the number of jobs dynamically is not working in soma cluste
cluster.scale(jobs=2)

In [8]:
!squeue -u $USER

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
             96048       CPU dask-wor  valerio  R       0:42      1 somacpu055
             96049       CPU dask-wor  valerio  R       0:42      1 somacpu056
             96050       CPU dask-wor  valerio  R       0:42      1 somacpu057
             96051       CPU dask-wor  valerio  R       0:42      1 somacpu058
             96052       CPU dask-wor  valerio  R       0:42      1 somacpu059
             96053       CPU dask-wor  valerio  R       0:42      1 somacpu060


In [9]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://10.102.0.62:8787/status,

0,1
Dashboard: http://10.102.0.62:8787/status,Workers: 2
Total threads: 16,Total memory: 18.62 GiB

0,1
Comm: tcp://10.102.0.62:36558,Workers: 2
Dashboard: http://10.102.0.62:8787/status,Total threads: 16
Started: Just now,Total memory: 18.62 GiB

0,1
Comm: tcp://10.102.2.55:44663,Total threads: 8
Dashboard: http://10.102.2.55:44402/status,Memory: 9.31 GiB
Nanny: None,
Local directory: /tmp/dask-worker-space/worker-q_a_emd5,Local directory: /tmp/dask-worker-space/worker-q_a_emd5

0,1
Comm: tcp://10.102.2.56:39073,Total threads: 8
Dashboard: http://10.102.2.56:35881/status,Memory: 9.31 GiB
Nanny: None,
Local directory: /tmp/dask-worker-space/worker-km2mtxnc,Local directory: /tmp/dask-worker-space/worker-km2mtxnc


In [9]:
import numpy, dask.array

def calculate_pi(size_in_bytes, number_of_chunks):
    
    """Calculate pi using a Monte Carlo method."""
    
    array_shape = (int(size_in_bytes / 8 / 2), 2)
    chunk_size = (int(array_shape[0] / number_of_chunks), 2)
    
    # 2D random positions array using dask.array
    xy = dask.array.random.uniform(
        low=0.0, high=1.0, size=array_shape,
        # specify chunk size, i.e. task number
        chunks=chunk_size )
  
    xy_inside_circle = (xy ** 2).sum(axis=1) < 1 # boolean

    pi = 4 * xy_inside_circle.sum() / xy_inside_circle.size
    
    # start Dask calculation
    pi = pi.compute()

    print(f"\nfrom {xy.nbytes / 1e9} GB randomly chosen positions")
    print(f"   pi estimate: {pi}")
    print(f"   pi error: {abs(pi - numpy.pi)}\n")
    # display(xy)
    
    return pi

In [10]:
%time pi = calculate_pi(size_in_bytes=10_000_000_000, number_of_chunks=100) # 10 GB


from 10.0 GB randomly chosen positions
   pi estimate: 3.141632
   pi error: 3.9346410206864135e-05

CPU times: user 2.07 s, sys: 304 ms, total: 2.37 s
Wall time: 29.6 s


In [11]:
client.close()

In [12]:
cluster.close()