In [1]:
import glob
from os.path import join, expanduser

import xarray as xr
import dask
import datetime as dt

from dask.distributed import Client
from dask_jobqueue import PBSCluster

## Find your files:

In [2]:
sim_path = "/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/" 
variable = "PRECT"

# example for one ensemble member: 
ensemble_member = "011"
sim_files = sorted(glob.glob(join(sim_path,variable,"b.e21.BHISTsmbb.f09_g17.LE2-1301."+ensemble_member+".*.nc")))

# How many files?
print (len(sim_files))
print("\n".join(sim_files))


17
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PRECT/b.e21.BHISTsmbb.f09_g17.LE2-1301.011.cam.h1.PRECT.18500101-18591231.nc
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PRECT/b.e21.BHISTsmbb.f09_g17.LE2-1301.011.cam.h1.PRECT.18600101-18691231.nc
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PRECT/b.e21.BHISTsmbb.f09_g17.LE2-1301.011.cam.h1.PRECT.18700101-18791231.nc
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PRECT/b.e21.BHISTsmbb.f09_g17.LE2-1301.011.cam.h1.PRECT.18800101-18891231.nc
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PRECT/b.e21.BHISTsmbb.f09_g17.LE2-1301.011.cam.h1.PRECT.18900101-18991231.nc
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PRECT/b.e21.BHISTsmbb.f09_g17.LE2-1301.011.cam.h1.PRECT.19000101-19091231.nc
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PRECT/b.e21.BHISTsmbb.f09_g17.LE2-1301.011.cam.h1.PRECT.19100101-19191231.nc
/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/PR

## Using Dask: 
Let's scale everything up using Dask.

In [3]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk23-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = '/local_scratch/pbs.$PBS_JOBID/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '30:00',
    interface = 'ext'
)

In [4]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/negins/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/negins/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.106:42951,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/negins/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
num_workers = 32 
cluster.scale(num_workers)

client.wait_for_workers(num_workers)

In [14]:
!qstat -u $USER

                                                            Req'd  Req'd   Elap
Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time  S Time
--------------- -------- -------- ---------- ------ --- --- ------ ----- - -----
2081278.casper* negins   htc      STDIN        8965   1   1    4gb 24:00 R 00:56
2081294.casper* negins   jhublog* cr-login-*  32756   1   1    4gb 720:0 R 00:53
2081757.casper* negins   htc      STDIN       71473   1   2   64gb 02:00 R 00:25
2082058.casper* negins   htc      dask-wk23*  28758   1   1    4gb 00:30 R 00:01
2082059.casper* negins   htc      dask-wk23*   5138   1   1    4gb 00:30 R 00:00
2082060.casper* negins   htc      dask-wk23*   5151   1   1    4gb 00:30 R 00:00
2082061.casper* negins   htc      dask-wk23*   5164   1   1    4gb 00:30 R 00:00
2082062.casper* negins   htc      dask-wk23*   5175   1   1    4gb 00:30 R 00:00
2082063.casper* negins   htc      dask-wk23*   5189   1   1    4gb 00:30 R 00:00
2082064.casper* negins   htc  

## Read in all Files: 

In [6]:
%%time
ds = xr.open_mfdataset(
    sorted(sim_files),
    # concatenate files in the order provided
    combine="by_coords",
    # parallelize the reading of individual files using dask
    # This means the returned arrays will be dask arrays
    parallel=True,
    # these are netCDF4 files, use the h5netcdf package to read them
    engine="h5netcdf",
    # hold off on decoding time
    decode_cf=False,
    # specify that data should be automatically chunked
    chunks="auto",
)
ds = xr.decode_cf(ds)


CPU times: user 1.86 s, sys: 313 ms, total: 2.18 s
Wall time: 20.9 s


In [7]:
ds

Unnamed: 0,Array,Chunk
Bytes,88.22 MiB,5.35 MiB
Shape,"(60226, 192)","(3650, 192)"
Dask graph,17 chunks in 53 graph layers,17 chunks in 53 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 88.22 MiB 5.35 MiB Shape (60226, 192) (3650, 192) Dask graph 17 chunks in 53 graph layers Data type float64 numpy.ndarray",192  60226,

Unnamed: 0,Array,Chunk
Bytes,88.22 MiB,5.35 MiB
Shape,"(60226, 192)","(3650, 192)"
Dask graph,17 chunks in 53 graph layers,17 chunks in 53 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.70 MiB,912.50 kiB
Shape,"(60226, 32)","(3650, 32)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 14.70 MiB 912.50 kiB Shape (60226, 32) (3650, 32) Dask graph 17 chunks in 52 graph layers Data type float64 numpy.ndarray",32  60226,

Unnamed: 0,Array,Chunk
Bytes,14.70 MiB,912.50 kiB
Shape,"(60226, 32)","(3650, 32)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.70 MiB,912.50 kiB
Shape,"(60226, 32)","(3650, 32)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 14.70 MiB 912.50 kiB Shape (60226, 32) (3650, 32) Dask graph 17 chunks in 52 graph layers Data type float64 numpy.ndarray",32  60226,

Unnamed: 0,Array,Chunk
Bytes,14.70 MiB,912.50 kiB
Shape,"(60226, 32)","(3650, 32)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.16 MiB,0.92 MiB
Shape,"(60226, 33)","(3650, 33)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.16 MiB 0.92 MiB Shape (60226, 33) (3650, 33) Dask graph 17 chunks in 52 graph layers Data type float64 numpy.ndarray",33  60226,

Unnamed: 0,Array,Chunk
Bytes,15.16 MiB,0.92 MiB
Shape,"(60226, 33)","(3650, 33)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.16 MiB,0.92 MiB
Shape,"(60226, 33)","(3650, 33)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.16 MiB 0.92 MiB Shape (60226, 33) (3650, 33) Dask graph 17 chunks in 52 graph layers Data type float64 numpy.ndarray",33  60226,

Unnamed: 0,Array,Chunk
Bytes,15.16 MiB,0.92 MiB
Shape,"(60226, 33)","(3650, 33)"
Dask graph,17 chunks in 52 graph layers,17 chunks in 52 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 235.26 kiB 14.26 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type int32 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 235.26 kiB 14.26 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type int32 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.92 MiB,57.03 kiB
Shape,"(60226, 2)","(3650, 2)"
Dask graph,17 chunks in 36 graph layers,17 chunks in 36 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 0.92 MiB 57.03 kiB Shape (60226, 2) (3650, 2) Dask graph 17 chunks in 36 graph layers Data type object numpy.ndarray",2  60226,

Unnamed: 0,Array,Chunk
Bytes,0.92 MiB,57.03 kiB
Shape,"(60226, 2)","(3650, 2)"
Dask graph,17 chunks in 36 graph layers,17 chunks in 36 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 36 graph layers,17 chunks in 36 graph layers
Data type,|S8 numpy.ndarray,|S8 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 36 graph layers Data type |S8 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 36 graph layers,17 chunks in 36 graph layers
Data type,|S8 numpy.ndarray,|S8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 36 graph layers,17 chunks in 36 graph layers
Data type,|S8 numpy.ndarray,|S8 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 36 graph layers Data type |S8 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 36 graph layers,17 chunks in 36 graph layers
Data type,|S8 numpy.ndarray,|S8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 235.26 kiB 14.26 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type int32 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 235.26 kiB 14.26 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type int32 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type float64 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type float64 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type float64 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type float64 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type float64 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 470.52 kiB 28.52 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type float64 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,470.52 kiB,28.52 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 235.26 kiB 14.26 kiB Shape (60226,) (3650,) Dask graph 17 chunks in 35 graph layers Data type int32 numpy.ndarray",60226  1,

Unnamed: 0,Array,Chunk
Bytes,235.26 kiB,14.26 kiB
Shape,"(60226,)","(3650,)"
Dask graph,17 chunks in 35 graph layers,17 chunks in 35 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.41 GiB,127.83 MiB
Shape,"(60226, 192, 288)","(606, 192, 288)"
Dask graph,116 chunks in 35 graph layers,116 chunks in 35 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 12.41 GiB 127.83 MiB Shape (60226, 192, 288) (606, 192, 288) Dask graph 116 chunks in 35 graph layers Data type float32 numpy.ndarray",288  192  60226,

Unnamed: 0,Array,Chunk
Bytes,12.41 GiB,127.83 MiB
Shape,"(60226, 192, 288)","(606, 192, 288)"
Dask graph,116 chunks in 35 graph layers,116 chunks in 35 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [8]:
precip = ds.PRECT
precip

Unnamed: 0,Array,Chunk
Bytes,12.41 GiB,127.83 MiB
Shape,"(60226, 192, 288)","(606, 192, 288)"
Dask graph,116 chunks in 35 graph layers,116 chunks in 35 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 12.41 GiB 127.83 MiB Shape (60226, 192, 288) (606, 192, 288) Dask graph 116 chunks in 35 graph layers Data type float32 numpy.ndarray",288  192  60226,

Unnamed: 0,Array,Chunk
Bytes,12.41 GiB,127.83 MiB
Shape,"(60226, 192, 288)","(606, 192, 288)"
Dask graph,116 chunks in 35 graph layers,116 chunks in 35 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Calculate Maximum Precip Per Year Per Ensemble: 

Note how the ouput dimensions (year: 166lat: 192lon: 288)

In [9]:
## calculate annual maximum: 

max_precip = precip.groupby(precip.time.dt.year, ).max()
max_precip

Unnamed: 0,Array,Chunk
Bytes,35.02 MiB,648.00 kiB
Shape,"(166, 192, 288)","(3, 192, 288)"
Dask graph,99 chunks in 338 graph layers,99 chunks in 338 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 35.02 MiB 648.00 kiB Shape (166, 192, 288) (3, 192, 288) Dask graph 99 chunks in 338 graph layers Data type float32 numpy.ndarray",288  192  166,

Unnamed: 0,Array,Chunk
Bytes,35.02 MiB,648.00 kiB
Shape,"(166, 192, 288)","(3, 192, 288)"
Dask graph,99 chunks in 338 graph layers,99 chunks in 338 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Now you want to to save maximum Precip per year for each ensemble member: 

In [10]:
%%time
## check out the dashboard for compute heavy tasks: 
save_precip = max_precip.compute()

CPU times: user 2.13 s, sys: 455 ms, total: 2.59 s
Wall time: 54.8 s


In [11]:
output_file = "save_output_"+ensemble_member+".nc"
# Now, save the annual maxima time series for each ensemble member
save_precip.to_netcdf("b.e21.BHISTsmbb.f09_g17.LE2-1301.001.cam.h1.Rx1day.18500101-20141231.nc")