In [1]:
from dask_jobqueue import SLURMCluster
from dask.distributed import Client, progress
partition = 'milano'  # For LCLS II staff

cluster = SLURMCluster(
    queue=partition,
    account="lcls:data",
    local_directory='/sdf/home/m/monarin/tmp/',  # Local disk space for workers to use

    # Resources per SLURM job (per node, the way SLURM is configured on Roma)
    # processes=16 runs 16 Dask workers in a job, so each worker has 1 core & 32 GB RAM.
    cores=10, memory='512GB', 
)

In [2]:
cluster.scale(jobs=1)
cluster.job_script()

'#!/usr/bin/env bash\n\n#SBATCH -J dask-worker\n#SBATCH -p milano\n#SBATCH -A lcls:data\n#SBATCH -n 1\n#SBATCH --cpus-per-task=100\n#SBATCH --mem=477G\n#SBATCH -t 00:30:00\n\n/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/bin/python -m distributed.cli.dask_worker tcp://172.24.49.18:36657 --nthreads 10 --nworkers 10 --memory-limit 47.68GiB --name dummy-name --nanny --death-timeout 60 --local-directory /sdf/home/m/monarin/tmp/\n'

In [3]:
client = Client(cluster)

In [4]:
%%bash
hostname

sdfiana008


In [5]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://172.24.49.18:8787/status,

0,1
Dashboard: http://172.24.49.18:8787/status,Workers: 20
Total threads: 200,Total memory: 0.93 TiB

0,1
Comm: tcp://172.24.49.18:36657,Workers: 20
Dashboard: http://172.24.49.18:8787/status,Total threads: 200
Started: Just now,Total memory: 0.93 TiB

0,1
Comm: tcp://172.24.48.179:42111,Total threads: 10
Dashboard: http://172.24.48.179:39171/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:34289,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-fwcemh1x,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-fwcemh1x

0,1
Comm: tcp://172.24.48.179:40809,Total threads: 10
Dashboard: http://172.24.48.179:45925/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:36795,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-rfa80llu,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-rfa80llu

0,1
Comm: tcp://172.24.48.179:39417,Total threads: 10
Dashboard: http://172.24.48.179:41139/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:40401,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-oq7g0xt_,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-oq7g0xt_

0,1
Comm: tcp://172.24.48.179:34061,Total threads: 10
Dashboard: http://172.24.48.179:38169/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:34387,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-09uowqm3,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-09uowqm3

0,1
Comm: tcp://172.24.48.179:43909,Total threads: 10
Dashboard: http://172.24.48.179:46571/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:41103,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-x1yiv0zv,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-x1yiv0zv

0,1
Comm: tcp://172.24.48.179:46513,Total threads: 10
Dashboard: http://172.24.48.179:35319/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:44651,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-3htg46ao,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-3htg46ao

0,1
Comm: tcp://172.24.48.179:45121,Total threads: 10
Dashboard: http://172.24.48.179:38917/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:37711,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-5xo89b0f,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-5xo89b0f

0,1
Comm: tcp://172.24.48.179:36655,Total threads: 10
Dashboard: http://172.24.48.179:39065/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:36835,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-nvlxsisp,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-nvlxsisp

0,1
Comm: tcp://172.24.48.179:44039,Total threads: 10
Dashboard: http://172.24.48.179:41727/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:36659,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-goxzkyr2,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-goxzkyr2

0,1
Comm: tcp://172.24.48.179:35187,Total threads: 10
Dashboard: http://172.24.48.179:34413/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.179:43209,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-397q73ug,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-397q73ug

0,1
Comm: tcp://172.24.48.185:41039,Total threads: 10
Dashboard: http://172.24.48.185:46117/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:34453,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-t2yuo61h,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-t2yuo61h

0,1
Comm: tcp://172.24.48.185:41475,Total threads: 10
Dashboard: http://172.24.48.185:38413/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:38935,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-0e791lj5,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-0e791lj5

0,1
Comm: tcp://172.24.48.185:34535,Total threads: 10
Dashboard: http://172.24.48.185:44351/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:41365,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-chvuh3hk,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-chvuh3hk

0,1
Comm: tcp://172.24.48.185:42877,Total threads: 10
Dashboard: http://172.24.48.185:45533/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:33235,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-5uitwmd_,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-5uitwmd_

0,1
Comm: tcp://172.24.48.185:39061,Total threads: 10
Dashboard: http://172.24.48.185:35139/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:33805,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-f4a1m3e6,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-f4a1m3e6

0,1
Comm: tcp://172.24.48.185:33507,Total threads: 10
Dashboard: http://172.24.48.185:40713/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:40169,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-11cl_t2v,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-11cl_t2v

0,1
Comm: tcp://172.24.48.185:41883,Total threads: 10
Dashboard: http://172.24.48.185:35877/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:44241,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-otxj572d,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-otxj572d

0,1
Comm: tcp://172.24.48.185:33329,Total threads: 10
Dashboard: http://172.24.48.185:37717/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:36159,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-bu7oxev2,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-bu7oxev2

0,1
Comm: tcp://172.24.48.185:45909,Total threads: 10
Dashboard: http://172.24.48.185:41303/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:38845,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-4wmjz_6g,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-4wmjz_6g

0,1
Comm: tcp://172.24.48.185:46519,Total threads: 10
Dashboard: http://172.24.48.185:39055/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.185:39257,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-drarg6oz,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-drarg6oz


In [1]:
import h5py
import dask
import dask.array as da
import dask.dataframe as dd
import numpy as np
#f=h5py.File('/sdf/data/lcls/drpsrcf/ffb/users/monarin/h5/mylargeh5.h5', 'r')
f=h5py.File('/sdf/home/m/monarin/tmp/my1m.h5', 'r')

type: [Errno 2] Unable to open file (unable to open file: name = '/sdf/data/lcls/drpsrcf/ffb/users/monarin/h5/mylargeh5.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [7]:
%%time
ts_chunks = (100000,)
da_ts = da.from_array(f['timestamp'], chunks=ts_chunks)
dd_ts = dd.from_array(da_ts, columns=['timestamp'])

CPU times: user 6.28 ms, sys: 1.12 ms, total: 7.4 ms
Wall time: 6.34 ms


In [8]:
%%time
dd_ts.sort_values('timestamp')
inds = dd_ts.index.values

CPU times: user 3.32 s, sys: 152 ms, total: 3.47 s
Wall time: 8.41 s


In [11]:
%%time
dask.config.set(**{'array.slicing.split_large_chunks': True})
sorted_ts = da_ts[inds]

CPU times: user 4.2 ms, sys: 25 µs, total: 4.22 ms
Wall time: 3.67 ms


  p = blockwise(


In [10]:
%%time
out_f = h5py.File('/sdf/data/lcls/drpsrcf/ffb/users/monarin/h5/parallel_test.hdf5', 'w')
out_f.create_dataset('timestamp', data=sorted_ts)

CPU times: user 18.5 s, sys: 25.3 s, total: 43.8 s
Wall time: 1min 2s


<HDF5 dataset "timestamp": shape (1000000000,), type "<i8">

In [64]:
calib_chunks = (100000,1)
da_calib = da.from_array(f['calib'], chunks=calib_chunks)

In [65]:
%%time
sorted_calib = da_calib[inds]

CPU times: user 1.09 ms, sys: 2.08 ms, total: 3.18 ms
Wall time: 3.06 ms


  p = blockwise(


In [None]:
%%time
out_f.create_dataset('calib', data=sorted_calib.compute())

2023-12-12 11:14:22,684 - tornado.application - ERROR - Exception in callback <bound method BokehTornado._keep_alive of <bokeh.server.tornado.BokehTornado object at 0x7fb866622160>>
Traceback (most recent call last):
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/tornado/ioloop.py", line 919, in _run
    val = self.callback()
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/bokeh/server/tornado.py", line 781, in _keep_alive
    c.send_ping()
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/bokeh/server/connection.py", line 93, in send_ping
    self._socket.ping(str(self._ping_count).encode("utf-8"))
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/tornado/websocket.py", line 439, in ping
    raise WebSocketClosedError()
tornado.websocket.WebSocketClosedError


In [38]:
out_f.close()

In [None]:
f.close()