# Appending WRF data to the ldeo-glaciology bucket without a cluster

This NB demonstrates the issue raised in [this issue](https://github.com/pydata/xarray/issues/5878), but using real WRF data. 


In [2]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import os

import gcsfs
from tqdm import tqdm
import fsspec
import xarray as xr
xr.set_options(display_style="html");
import json
import pandas as pd


### list the netcdf files

In [3]:
fs = gcsfs.GCSFileSystem(project='ldeo-glaciology', mode='ab', cache_timeout = 0)

In [4]:
gcsdir = 'gs://ldeo-glaciology'
ampsdir = 'AMPS'
amps_ver = 'WRF_24'
domain = 'domain_03'
filepattern = 'wrfout_d03_20190930*'
pattern = os.path.join(gcsdir, ampsdir, amps_ver, domain, filepattern)
NCs = fs.glob(pattern)
print(f"Total of {len(NCs)} wrf files.\n")
NCs

Total of 8 wrf files.



['ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f003.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f006.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f009.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093000_f012.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f003.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f006.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f009.nc',
 'ldeo-glaciology/AMPS/WRF_24/domain_03/wrfout_d03_2019093012_f012.nc']

### load two netcdfs

In [6]:
ds = []
openfile = fs.open(f'gs://{NCs[0]}', mode='rb') 
ds1 = xr.open_dataset(openfile, engine='h5netcdf', chunks={'south_north': -1, 'west_east': -1})
openfile = fs.open(f'gs://{NCs[1]}', mode='rb') 
ds2 = xr.open_dataset(openfile, engine='h5netcdf', chunks={'south_north': -1, 'west_east': -1})

### remove some of the variables

In [7]:
vars_save = [
#     'U', 'V', 'W', 'PH', 'PHB', 'T', 'P', 'PB', 'P_HYD',
             'Q2', 'T2', 'PSFC', 'U10', 'V10',
#              'QVAPOR', 'QICE', 'QCLOUD', 'QRAIN', 'QSNOW', 
             'LANDMASK', 'HGT',
             'SST', 'SSTSK', 'TSK',
             'RAINC', 'RAINSH', 'RAINNC', 
             'QFX', 'HFX', 'LH', 'PBLH', 'ACSNOM', 'SWDOWN', 'LWDNB', 'GLW',
            'SNOW', 'SNOWH'
]

In [8]:
ds1_stripped = ds1[vars_save]
ds2_stripped = ds2[vars_save]
ds1_stripped_rechunked = ds1_stripped.chunk({"Time": -1})   # doesnt do anything because the ds in only 1 long in the time dimension
ds2_stripped_rechunked = ds2_stripped.chunk({"Time": -1})   # doesnt do anything because the ds in only 1 long in the time dimension
print(f'stripped ds is {ds1_stripped_rechunked.nbytes/1e9} Gb')
print(f'Original ds was {ds1.nbytes/1e9} Gb')

stripped ds is 0.069862508 Gb
Original ds was 2.680378751 Gb


### compute the results (does not need a cluster because they are small)

In [9]:
ds1_stripped_rechunked.compute()
ds2_stripped_rechunked.compute()

### write the first ds

In [10]:
outdir = os.path.join(gcsdir, ampsdir, amps_ver, domain, 'zarr-raw/')

with open('../secrets/ldeo-glaciology-bc97b12df06b.json') as token_file:
    token = json.load(token_file)
amps_mapper = fsspec.get_mapper(outdir + 'wrfout_d03_201909_2D_rechunked_appendingTest5.zarr', mode='a',
                            token=token)

ds1_stripped_rechunked.to_zarr(amps_mapper, 
                    mode='w', 
                    consolidated='True');

### append the 2nd ds

In [11]:
ds2_stripped_rechunked.to_zarr(amps_mapper, mode="a",
           consolidated=True,
           append_dim="Time")

<xarray.backends.zarr.ZarrStore at 0x7f50ae742520>

## load the results and see how many time steps it has

In [12]:
both_stripped_rechunked_reloaded = xr.open_zarr(amps_mapper)
both_stripped_rechunked_reloaded

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,2 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 8 B 8 B Shape (1,) (1,) Count 2 Tasks 1 Chunks Type datetime64[ns] numpy.ndarray",1  1,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,2 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.79 MB 2.79 MB Shape (1, 1035, 675) (1, 1035, 675) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",675  1035  1,

Unnamed: 0,Array,Chunk
Bytes,2.79 MB,2.79 MB
Shape,"(1, 1035, 675)","(1, 1035, 675)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


### loading the XTIME coordinates of the reloaded zarr and the original two datasets suggests that the second one is not getting written

In [76]:
print(both_stripped_rechunked_reloaded.XTIME.values)
print(ds1_stripped_rechunked.XTIME.values)
print(ds2_stripped_rechunked.XTIME.values)


['2019-09-30T03:00:00.000000000']
['2019-09-30T03:00:00.000000000']
['2019-09-30T06:00:00.000000000']
