In [None]:
#!conda install intake fsspec intake-xarray intake-thredds -c conda-forge -y

In [25]:
#!conda install eccodes cfgrib -c conda-forge -y

In [None]:
#!pip install climetlab --quiet

# GEFS 6h forecasts

In [1]:
import fsspec
import intake
# specify caching location, where to store files to with their original names
fsspec.config.conf['simplecache'] = {'cache_storage': '../my_caching_folder', 'same_names': True}
import numpy as np
import intake_xarray
import xarray as xr
import climpred
import climetlab



## Find the data

GEFS output can be found on a `THREDDS` server: https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html

Here, we use `intake-thredds` to access the files and cache them with `fsspec`. However, you can also download the files manually, e.g. with `wget`:

- https://intake.readthedocs.io/en/latest/
- https://intake-thredds.readthedocs.io/en/latest/
- https://filesystem-spec.readthedocs.io/en/latest/

In [None]:
# all the metadata about GEFS
cat = intake.open_thredds_cat('https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html', name='name')
cat



Opening without `backend_kwargs` raised `DatasetBuildError`. Need to specify variable by `filter_by_keys` for `grib` files.

DatasetBuildError: multiple values for unique key, try re-open the file with one of:

- filter_by_keys={'typeOfLevel': 'isobaricInhPa'}
- filter_by_keys={'typeOfLevel': 'surface'}
- filter_by_keys={'typeOfLevel': 'depthBelowLandLayer'}
- filter_by_keys={'typeOfLevel': 'heightAboveGround'}
- filter_by_keys={'typeOfLevel': 'atmosphereSingleLayer'}
- filter_by_keys={'typeOfLevel': 'atmosphere'}
- filter_by_keys={'typeOfLevel': 'nominalTop'}
- filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'}
- filter_by_keys={'typeOfLevel': 'meanSea'}

In [None]:
# https://github.com/ecmwf/cfgrib/issues/170
intake_xarray.NetCDFSource('simplecache::https://www.ncei.noaa.gov/thredds/fileServer/model-gefs-003/202008/20200831/gens-a_3_20200831_1800_000_20.grb2',
                               xarray_kwargs=dict(engine='cfgrib', backend_kwargs=dict(filter_by_keys={'typeOfLevel': 'heightAboveGround', 'shortName':'2t'})),
    ).to_dask().t2m.plot()

## Download the forecasts

In [22]:
inits_time='0000'
inits = ['20200829','20200830','20200831'] # four initial dates from that one day
members = range(5) # 5 members out of 20
leads = np.arange(0, 6 * 4 * 2 + 1, 6) # 6h lead forecasts, 8 leads

In [24]:
%%time
for init in inits:
    for lead in leads:
        for member in members:
            try:
                #print(f'download init = {init}, lead = {lead}, member = {member}')
                intake_xarray.NetCDFSource(f'simplecache::https://www.ncei.noaa.gov/thredds/fileServer/model-gefs-003/202008/{init}/gens-a_3_{init}_{inits_time}_{str(lead).zfill(3)}_{str(member).zfill(2)}.grb2',
                                           xarray_kwargs=dict(engine='cfgrib', backend_kwargs=dict(filter_by_keys={'typeOfLevel': 'heightAboveGround', 'shortName':'2t'})),
                ).to_dask()
            except Exception as e:
                print('failed',type(e).__name__,e)

CPU times: user 25.7 s, sys: 5.15 s, total: 30.9 s
Wall time: 43min 3s


In [25]:
init = xr.concat(
    [xr.concat(
    [xr.open_mfdataset(f'../my_caching_folder/gens-a_3_{init}_{inits_time}_{str(lead).zfill(3)}_*.grb2',
                       concat_dim='member', combine='nested',
                 engine='cfgrib', backend_kwargs=dict(filter_by_keys={'typeOfLevel': 'heightAboveGround', 'shortName':'2t'})) for lead in leads],
                          dim='step') for init in inits],
               dim='time')
init.coords

Coordinates:
    number             (member) int64 0 1 2 3 4
  * time               (time) datetime64[ns] 2020-08-29 2020-08-30 2020-08-31
  * step               (step) timedelta64[ns] 0 days 00:00:00 ... 2 days 00:0...
    heightAboveGround  float64 2.0
  * latitude           (latitude) float64 90.0 89.0 88.0 ... -88.0 -89.0 -90.0
  * longitude          (longitude) float64 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0
    valid_time         (time, step) datetime64[ns] 2020-08-29 ... 2020-09-02

In [26]:
# save time when reproducing
init = init.compute()
init.to_netcdf('tmp_GEFS_a.nc')

In [2]:
init = xr.open_dataset('tmp_GEFS_a.nc')

In [3]:
# rename to climpred dims
init = init.rename({'step':'lead','number':'member','time':'init'}) #.drop('valid_time')

In [4]:
# set climpred lead units
init['lead']=np.arange(0,6*init.lead.size,6)
init.lead.attrs['units']='hours'

## download observations

`climetlab` wraps `cdsapi` to download from the Copernicus Climate Data Store (CDS):

- https://cds.climate.copernicus.eu/cdsapp#!/home
- https://climetlab.readthedocs.io/en/latest/
- https://github.com/ecmwf/cdsapi/

In [5]:
# need temperature for 20200831 00:00 + 48h

obs = climetlab.load_source("cds",
        "reanalysis-era5-single-levels",
        product_type="reanalysis",             
        time=['00:00','06:00','12:00','18:00'],
        grid=[1.0, 1.0],
        param="2t",
        date=["2020-08-29","2020-08-30","2020-08-31",'2020-09-01','2020-09-02','2020-09-03']).to_xarray()

In [6]:
# climetlab or cds enable logging.INFO
import logging
logger = logging.getLogger()
logger.setLevel(logging.ERROR)

In [7]:
# observations should only have time and no coordinates about number/member, step/lead or valid_time 
obs = obs.drop(['number','step','surface','valid_time'])

## Forecast skill verification with `climpred.HindcastEnsemble`

In [103]:
alignment='same_inits'
hindcast = climpred.HindcastEnsemble(init.drop('valid_time')).add_observations(obs)

In [None]:
# still experimental https://github.com/pangeo-data/climpred/issues/605
#hindcast = hindcast.remove_bias(alignment=alignment, cross_validate=False, how='mean')

In [None]:
skill = hindcast.isel(lead=range(1,9)).verify(metric='crps', comparison='m2o', alignment=alignment, dim=['init','member'])

skill.t2m.plot(col='lead', col_wrap=4, robust=True)

In [None]:
# zooming into north america
skill.sel(longitude=slice(200,320), latitude=slice(70,15)).t2m.plot(col='lead', col_wrap=4, robust=True, aspect=2.5)