# Working rechunking with AMPS



In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import os

import gcsfs
from tqdm import tqdm
import fsspec
import xarray as xr


In [2]:
from dask.distributed import Client
import dask_gateway
gateway = dask_gateway.Gateway()
cluster = gateway.new_cluster()

In [3]:
cluster.scale(20)
client = Client(cluster)
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

### setup runtime parameters

In [4]:
gcsdir = 'gs://ldeo-glaciology'
ampsdir = 'AMPS'
amps_ver = 'WRF_24'

domain = 'domain_03'
filepattern = 'wrfout_d03_20190930*'
pattern = os.path.join(gcsdir, ampsdir, amps_ver, domain, filepattern)
print(pattern)

gs://ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_20190930*


### list the netcdf files

In [5]:
fs = gcsfs.GCSFileSystem(project='ldeo-glaciology', mode='ab', cache_timeout = 0)

In [8]:
NCs = fs.glob(pattern)
print(f"Total of {len(NCs)} wrf files.\n")
NCs

Total of 8 wrf files.



['ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f003.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f006.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f009.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f012.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f003.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f006.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f009.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f012.nc']

### It can only handle 4 files at a time in my experience

In [9]:
NCs_urls = ['gs://' + x + '#mode=bytes' for x in NCs]
print(NCs_urls[0])


openfile = fs.open(NCs_urls[0], mode='rb') 
ds = xr.open_dataset(openfile, engine='h5netcdf',chunks={'south_north': -1, 
                                                               'west_east': -1,
                                                               'Time': -1})
for i in tqdm(range(1, 4)):#len(NCs_urls))):
    openfile = fs.open(NCs_urls[i], mode='rb') 
    temp = xr.open_dataset(openfile, engine='h5netcdf',chunks={'south_north': -1, 
                                                               'west_east': -1,
                                                               'Time': -1})
    ds = xr.concat([ds,temp],'Time')

print(f"Total size for {ds.DX[0]/1000:.2f} km output: {ds.nbytes / 1e9:.3f} Gb\n")        

gs://ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f003.nc#mode=bytes


100%|██████████| 3/3 [00:04<00:00,  1.54s/it]

Total size for 2.67 km output: 10.722 Gb






### Remove some variables

In [12]:
vars_save = [
#     'U', 'V', 'W', 'PH', 'PHB', 'T', 'P', 'PB', 'P_HYD',
             'Q2', 'T2', 'PSFC', 'U10', 'V10',
#              'QVAPOR', 'QICE', 'QCLOUD', 'QRAIN', 'QSNOW', 
             'LANDMASK', 'HGT',
             'SST', 'SSTSK', 'TSK',
             'RAINC', 'RAINSH', 'RAINNC', 
             'QFX', 'HFX', 'LH', 'PBLH', 'ACSNOM', 'SWDOWN', 'LWDNB', 'GLW',
            'SNOW', 'SNOWH'
]

ds_stripped = ds[vars_save]
print(f'{ds_stripped.nbytes/1e9} Gb')
print(f'Originally {ds.nbytes/1e9} Gb')

0.279450032 Gb
Originally 10.721515004 Gb


## persist the result 

In [88]:
ds_stripped = ds_stripped.persist()

## Rechunk to group the 4 timesteps together



In [13]:
ds_stripped = ds_stripped.chunk({"Time": -1})

## persist the result

In [14]:
ds_stripped = ds_stripped.persist()

In [91]:
ds_stripped

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Count,1 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 32 B 32 B Shape (4,) (4,) Count 1 Tasks 1 Chunks Type datetime64[ns] numpy.ndarray",4  1,

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Count,1 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.18 MB 11.18 MB Shape (4, 1035, 675) (4, 1035, 675) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  4,

Unnamed: 0,Array,Chunk
Bytes,11.18 MB,11.18 MB
Shape,"(4, 1035, 675)","(4, 1035, 675)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray


distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
asyncio.exceptions.CancelledError
Exception in callback None()
handle: <Handle cancelled>
Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/iostream.py", line 1391, in _do_ssl_handshake
    self.socket.do_handshake()
  File "/srv/conda/envs/notebook/lib/python3.8/ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate (_ssl.c:1124)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/srv/conda/envs/notebook/lib/p

In [69]:
client.cancel(ds_stripped)

The lines below fail!

In [52]:
ds = ds.chunk({"Time": -1})


In [53]:
ds = ds.persist()

distributed.batched - INFO - Batched Comm Closed: in <closed TLS>: Stream is closed
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
asyncio.exceptions.CancelledError
Exception in callback None()
handle: <Handle cancelled>
Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/iostream.py", line 1391, in _do_ssl_handshake
    self.socket.do_handshake()
  File "/srv/conda/envs/notebook/lib/python3.8/ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate (_ssl.c:1124)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/asyncio/events.py", line 81, in _run
    s

In [40]:
ds.nbytes/1e9

10.721515004