# Tess

## Install and import things
Installations only required when the notebook server restarts (not the kernel).

In [None]:
!pip install fsspec==0.8.4
!pip install 'git+git://github.com/tjcrone/gdrivefs.git@540a55fae06cbed4f7d9f8d540ac29fcb640c38a'

In [1]:
import xarray as xr
import pandas as pd
import gdrivefs
import cftime

## Read dataset and rename dims
Here I'm working with just sp.cdf.

In [50]:
ds = xr.open_dataset('sp.cdf', decode_times=False)
ds = ds.rename({'T': 'time', 'X': 'lat', 'Y': 'lon'})
ds.attrs['units'] = 'mbar'
ds

## Convert time to datetime64
Converting to proper datetime64s will allow us to use the datetime functionality of Xarray.

In [51]:
ds['time'] = cftime.num2date(ds['time'], 'months since 1979-01-01', '360_day')
ds['time'] = ds.indexes['time'].to_datetimeindex()
ds

  


## Load and chunk into Dask array
Chunking into 1-day chunks seems reasonable.

In [69]:
ds = ds.load().chunk({'time': 1})
ds

Unnamed: 0,Array,Chunk
Bytes,127.19 MB,260.64 kB
Shape,"(488, 181, 360)","(1, 181, 360)"
Count,489 Tasks,488 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 127.19 MB 260.64 kB Shape (488, 181, 360) (1, 181, 360) Count 489 Tasks 488 Chunks Type float32 numpy.ndarray",360  181  488,

Unnamed: 0,Array,Chunk
Bytes,127.19 MB,260.64 kB
Shape,"(488, 181, 360)","(1, 181, 360)"
Count,489 Tasks,488 Chunks
Type,float32,numpy.ndarray


## Save locally as Zarr

In [53]:
filename = 'sp.zarr'

In [54]:
ds.to_zarr(filename, consolidated=True, compute=True)

<xarray.backends.zarr.ZarrStore at 0x7f73300e9fb0>

#### test open
This works.

In [55]:
ds_local = xr.open_zarr(filename, consolidated=True, engine='zarr', chunks='auto')

In [56]:
ds_local

Unnamed: 0,Array,Chunk
Bytes,127.19 MB,260.64 kB
Shape,"(488, 181, 360)","(1, 181, 360)"
Count,489 Tasks,488 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 127.19 MB 260.64 kB Shape (488, 181, 360) (1, 181, 360) Count 489 Tasks 488 Chunks Type float32 numpy.ndarray",360  181  488,

Unnamed: 0,Array,Chunk
Bytes,127.19 MB,260.64 kB
Shape,"(488, 181, 360)","(1, 181, 360)"
Count,489 Tasks,488 Chunks
Type,float32,numpy.ndarray


## Zip Zarr

In [75]:
!zip -r sp.zarr.zip sp.zarr

  adding: sp.zarr/ (stored 0%)
  adding: sp.zarr/sp/ (stored 0%)
  adding: sp.zarr/sp/387.0.0 (deflated 6%)
  adding: sp.zarr/sp/455.0.0 (deflated 6%)
  adding: sp.zarr/sp/297.0.0 (deflated 8%)
  adding: sp.zarr/sp/168.0.0 (deflated 6%)
  adding: sp.zarr/sp/60.0.0 (deflated 6%)
  adding: sp.zarr/sp/337.0.0 (deflated 6%)
  adding: sp.zarr/sp/334.0.0 (deflated 6%)
  adding: sp.zarr/sp/87.0.0 (deflated 6%)
  adding: sp.zarr/sp/281.0.0 (deflated 6%)
  adding: sp.zarr/sp/95.0.0 (deflated 6%)
  adding: sp.zarr/sp/429.0.0 (deflated 6%)
  adding: sp.zarr/sp/288.0.0 (deflated 8%)
  adding: sp.zarr/sp/312.0.0 (deflated 6%)
  adding: sp.zarr/sp/343.0.0 (deflated 6%)
  adding: sp.zarr/sp/487.0.0 (deflated 6%)
  adding: sp.zarr/sp/283.0.0 (deflated 6%)
  adding: sp.zarr/sp/365.0.0 (deflated 6%)
  adding: sp.zarr/sp/369.0.0 (deflated 6%)
  adding: sp.zarr/sp/307.0.0 (deflated 6%)
  adding: sp.zarr/sp/24.0.0 (deflated 6%)
  adding: sp.zarr/sp/356.0.0 (deflated 6%)
  adding: sp.zarr/sp/406.0.0 (deflat

## Save to Google Drive as Zarr
It looks like there is a bug in the save function of gdrivefs. Nothing in this section appears to work correctly.

In [57]:
gdfid = '1PCBDhk5f3v5PoPCY3Rdcqgy4S_Yj2kCC'

In [76]:
gdfs = gdrivefs.GoogleDriveFileSystem(root_file_id=gdfid, token='cache')

In [70]:
mapper = gdfs.get_mapper(filename)

In [None]:
ds.to_zarr(mapper, consolidated=True, append_dim='time')

In [64]:
ds.load().to_zarr(mapper, consolidated=True)
#ds.to_zarr(mapper, consolidated=True, append_dim='time')

<xarray.backends.zarr.ZarrStore at 0x7f7330419830>

#### test open
I downloaded the Zarr zip file and unzipped it locally, and then dragged it in to my Google Drive and this worked.

In [73]:
ds_gdrive = xr.open_zarr(mapper, consolidated=True, chunks='auto')

In [74]:
ds_gdrive

Unnamed: 0,Array,Chunk
Bytes,127.19 MB,260.64 kB
Shape,"(488, 181, 360)","(1, 181, 360)"
Count,489 Tasks,488 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 127.19 MB 260.64 kB Shape (488, 181, 360) (1, 181, 360) Count 489 Tasks 488 Chunks Type float32 numpy.ndarray",360  181  488,

Unnamed: 0,Array,Chunk
Bytes,127.19 MB,260.64 kB
Shape,"(488, 181, 360)","(1, 181, 360)"
Count,489 Tasks,488 Chunks
Type,float32,numpy.ndarray
