# ERA5: netCDF into Zarr

- A short bit of code showing the transformation of ERA5 data into a Zarr store
- When I try to just directly write the ERA5 into Zarr the kernel resets
- Rechunking the data fixes this, so I rechunk then write

In [None]:
# filter some warning messages
import warnings 
warnings.filterwarnings("ignore") 

#libraries
import datetime as dt
import xarray as xr
import fsspec
import s3fs
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
# make datasets display nicely
xr.set_options(display_style="html")  

#magic fncts #put static images of your plot embedded in the notebook
%matplotlib inline  
plt.rcParams['figure.figsize'] = 12, 6
%config InlineBackend.figure_format = 'retina' 

## A function to read either all the data or just a single variable

In [None]:
def get_era5(var,lyr):
    syr=str(lyr).zfill(4)
    fs = s3fs.S3FileSystem(anon=True)
    if var=='all':
        file_location = fs.glob('s3://era5-pds/'+syr+'/*/*.nc')
    else:
        file_location = fs.glob('s3://era5-pds/'+syr+'/*/data/'+var+'*.nc')
    file_ob = [fs.open(file) for file in file_location]        
    ds=xr.open_mfdataset(file_ob,combine='nested',concat_dim='time0') 
    #ds['sea_surface_temperature']-=273.15
    #ds['sea_surface_temperature'].attrs['units'] = '$^\circ$C'
    return ds

## Loop through years, appending to the Zarr store

In [None]:
%%time
for lyr in range(1979,2018):
    ds = get_era5('sea_surface_temperature',lyr) 
    _, index = np.unique(ds['time0'], return_index=True) #remove any duplicates
    ds = ds.isel(time0=index)
    dy = ds.resample(time0='1D').mean(keep_attrs=True,skipna=False)
    dy = dy.chunk({'lat':100,'lon':100,'time0':100})
    if lyr==1979:
        dy.to_zarr('./../../data/era5zarr_all')
    else:
        dy.to_zarr('./../../data/era5zarr_all',append_dim='time0')


## Test opening the data
- because it is a local Zarr store of the data, you don't need to create a fsspec mapping

In [None]:
ds = xr.open_zarr('./../../data/era5zarr_all')
ds


In [None]:
ds.sea_surface_temperature[-1,:,:].plot()