In [1]:
from dask_jobqueue import SLURMCluster
from dask.distributed import Client, progress
partition = 'milano'  # For LCLS II staff

cluster = SLURMCluster(
    queue=partition,
    account="lcls:data",
    local_directory='/sdf/home/m/monarin/tmp/',  # Local disk space for workers to use

    # Resources per SLURM job (per node, the way SLURM is configured on Roma)
    # processes=16 runs 16 Dask workers in a job, so each worker has 1 core & 32 GB RAM.
    cores=100, memory='512GB', 
)

In [2]:
cluster.scale(jobs=1)
cluster.job_script()

'#!/usr/bin/env bash\n\n#SBATCH -J dask-worker\n#SBATCH -p milano\n#SBATCH -A lcls:data\n#SBATCH -n 1\n#SBATCH --cpus-per-task=100\n#SBATCH --mem=477G\n#SBATCH -t 00:30:00\n\n/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/bin/python -m distributed.cli.dask_worker tcp://172.24.49.11:43405 --nthreads 10 --nworkers 10 --memory-limit 47.68GiB --name dummy-name --nanny --death-timeout 60 --local-directory /sdf/home/m/monarin/tmp/\n'

In [3]:
client = Client(cluster)

In [4]:
%%bash
hostname

sdfiana001


In [5]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://172.24.49.11:8787/status,

0,1
Dashboard: http://172.24.49.11:8787/status,Workers: 10
Total threads: 100,Total memory: 476.80 GiB

0,1
Comm: tcp://172.24.49.11:43405,Workers: 10
Dashboard: http://172.24.49.11:8787/status,Total threads: 100
Started: Just now,Total memory: 476.80 GiB

0,1
Comm: tcp://172.24.48.144:33901,Total threads: 10
Dashboard: http://172.24.48.144:37941/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:34725,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-r0unm354,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-r0unm354

0,1
Comm: tcp://172.24.48.144:43105,Total threads: 10
Dashboard: http://172.24.48.144:41871/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:39579,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-u93wmvvk,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-u93wmvvk

0,1
Comm: tcp://172.24.48.144:37883,Total threads: 10
Dashboard: http://172.24.48.144:45141/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:36219,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-4xxo16_s,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-4xxo16_s

0,1
Comm: tcp://172.24.48.144:37717,Total threads: 10
Dashboard: http://172.24.48.144:40017/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:36211,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-czh736yv,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-czh736yv

0,1
Comm: tcp://172.24.48.144:41167,Total threads: 10
Dashboard: http://172.24.48.144:34563/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:36301,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-pfv_6x59,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-pfv_6x59

0,1
Comm: tcp://172.24.48.144:40093,Total threads: 10
Dashboard: http://172.24.48.144:36743/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:34649,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-7o32pz3d,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-7o32pz3d

0,1
Comm: tcp://172.24.48.144:45189,Total threads: 10
Dashboard: http://172.24.48.144:33589/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:35091,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-q6jqrnk2,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-q6jqrnk2

0,1
Comm: tcp://172.24.48.144:43699,Total threads: 10
Dashboard: http://172.24.48.144:43649/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:33875,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-3yub8_7c,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-3yub8_7c

0,1
Comm: tcp://172.24.48.144:46383,Total threads: 10
Dashboard: http://172.24.48.144:41211/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:39443,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-kma61rlq,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-kma61rlq

0,1
Comm: tcp://172.24.48.144:35905,Total threads: 10
Dashboard: http://172.24.48.144:37977/status,Memory: 47.68 GiB
Nanny: tcp://172.24.48.144:32871,
Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-00f9olk2,Local directory: /sdf/home/m/monarin/tmp/dask-scratch-space/worker-00f9olk2


In [6]:
import h5py
import dask
import dask.array as da
import dask.dataframe as dd
import numpy as np
f=h5py.File('/sdf/data/lcls/drpsrcf/ffb/users/monarin/h5/mylargeh5.h5', 'r')

In [7]:
%%time
ts_chunks = (10000000,)
da_ts = da.from_array(f['timestamp'], chunks=ts_chunks)
dd_ts = dd.from_array(da_ts, columns=['timestamp'])

CPU times: user 3.73 ms, sys: 971 µs, total: 4.7 ms
Wall time: 3.87 ms


In [8]:
%%time
dd_ts.sort_values('timestamp')

CPU times: user 1.14 s, sys: 59.8 ms, total: 1.2 s
Wall time: 4.78 s


Unnamed: 0_level_0,timestamp
npartitions=100,Unnamed: 1_level_1
,int64
,...
...,...
,...
,...


In [9]:
%%time
inds = dd_ts.index.values

CPU times: user 1.57 ms, sys: 56 µs, total: 1.62 ms
Wall time: 1.64 ms


In [12]:
type(inds)

dask.array.core.Array

In [11]:
%%time
dask.config.set(**{'array.slicing.split_large_chunks': True})
sorted_ts = da_ts[inds]

CPU times: user 4.2 ms, sys: 25 µs, total: 4.22 ms
Wall time: 3.67 ms


  p = blockwise(


In [10]:
%%time
out_f = h5py.File('/sdf/data/lcls/drpsrcf/ffb/users/monarin/h5/inds.hdf5', 'w')
out_f.create_dataset('sorted_indices', data=inds)

KeyboardInterrupt: 

In [64]:
calib_chunks = (10000000,1)
da_calib = da.from_array(f['calib'], chunks=calib_chunks)

In [65]:
%%time
sorted_calib = da_calib[inds]

CPU times: user 1.09 ms, sys: 2.08 ms, total: 3.18 ms
Wall time: 3.06 ms


  p = blockwise(


In [None]:
%%time
out_f.create_dataset('calib', data=sorted_calib.compute())

2023-12-12 11:14:22,684 - tornado.application - ERROR - Exception in callback <bound method BokehTornado._keep_alive of <bokeh.server.tornado.BokehTornado object at 0x7fb866622160>>
Traceback (most recent call last):
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/tornado/ioloop.py", line 919, in _run
    val = self.callback()
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/bokeh/server/tornado.py", line 781, in _keep_alive
    c.send_ping()
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/bokeh/server/connection.py", line 93, in send_ping
    self._socket.ping(str(self._ping_count).encode("utf-8"))
  File "/sdf/group/lcls/ds/ana/sw/conda2/inst/envs/ps-4.6.1/lib/python3.9/site-packages/tornado/websocket.py", line 439, in ping
    raise WebSocketClosedError()
tornado.websocket.WebSocketClosedError


In [38]:
out_f.close()

In [None]:
f.close()