In [None]:
#!conda install intake fsspec intake-xarray intake-thredds -c conda-forge -y

In [25]:
#!conda install eccodes cfgrib -c conda-forge -y

# GEFS 6h forecasts

In [1]:
import fsspec
import intake
# specify caching location, where to store files to with their original names
fsspec.config.conf['simplecache'] = {'cache_storage': '../my_caching_folder', 'same_names': True}
import numpy as np
import intake_xarray
import xarray as xr
import climpred



## Find the data

GEFS output can be found on a `THREDDS` server: https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html

Here, we use `intake-thredds` to access the files and cache them with `fsspec`. However, you can also download the files manually, e.g. with `wget`.
- https://intake.readthedocs.io/en/latest/
- https://intake-thredds.readthedocs.io/en/latest/
- https://filesystem-spec.readthedocs.io/en/latest/

In [36]:
# all the metadata about GEFS
cat = intake.open_thredds_cat('https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html', name='name')
cat



name:
  args:
    name: name
    url: https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html
  description: ''
  driver: intake_thredds.cat.ThreddsCatalog
  metadata:
    authority:
    - gov.noaa.ncdc
    contributor:
      Scientific Contact:
      - Zoltar Toth
      Technical Contact:
      - Yuejian Zhu
    creator:
    - {}
    dataType: GRID
    date:
    - type: created
      value: '2007-03-27'
    - type: issued
      value: '2007-10-01'
    documentation:
      abstract:
      - The Global Ensemble Forecast System (GEFS) is a weather forecast model made
        up of 21 separate forecasts, or ensemble members. The National Centers for
        Environmental Prediction (NCEP) started the GEFS to address the nature of
        uncertainty in weather observations, which are used to initialize weather
        forecast models. The proverbial butterfly flapping her wings can have a cascading
        effect leading to wind gusts thousands of miles away.

Needs to specify `filer_by_keys` for `grib` files:

DatasetBuildError: multiple values for unique key, try re-open the file with one of:
-    filter_by_keys={'typeOfLevel': 'isobaricInhPa'}
-    filter_by_keys={'typeOfLevel': 'surface'}
-    filter_by_keys={'typeOfLevel': 'depthBelowLandLayer'}
-    filter_by_keys={'typeOfLevel': 'heightAboveGround'}
-    filter_by_keys={'typeOfLevel': 'atmosphereSingleLayer'}
-    filter_by_keys={'typeOfLevel': 'atmosphere'}
-    filter_by_keys={'typeOfLevel': 'nominalTop'}
-    filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'}
-    filter_by_keys={'typeOfLevel': 'meanSea'}

In [35]:
import cfgrib

# https://github.com/ecmwf/cfgrib/issues/170
intake_xarray.NetCDFSource('simplecache::https://www.ncei.noaa.gov/thredds/fileServer/model-gefs-003/202008/20200831/gens-a_3_20200831_1800_000_20.grb2',
                               xarray_kwargs=dict(engine='cfgrib', backend_kwargs=dict(filter_by_keys={'typeOfLevel': 'heightAboveGround', 'shortName':'2t'})),
    ).to_dask().t2m.plot()

# Download the data

In [40]:
inits = [0,6,12,18] # four initial dates from that one day
members = range(3) # 3 members out of 20
leads = np.arange(0, 6 * 4 * 2 + 1, 6) # 6h lead forecasts, 8 leads

In [None]:
%%time
for init in inits:
    for lead in leads:
        for member in members:
            try:
                #print(f'init ={init}, lead ={lead}, member ={member}')
                intake_xarray.NetCDFSource(f'simplecache::https://www.ncei.noaa.gov/thredds/fileServer/model-gefs-003/202008/20200831/gens-a_3_20200831_{str(init).zfill(2)}00_{str(lead).zfill(3)}_{str(member).zfill(2)}.grb2',
                                           xarray_kwargs=dict(engine='cfgrib', backend_kwargs=dict(filter_by_keys={'typeOfLevel': 'heightAboveGround', 'shortName':'2t'})),
                ).to_dask()
            except Exception as e:
                print('failed',type(e).__name__,e)

In [134]:
init = xr.concat(
    [xr.concat(
    [xr.open_mfdataset(f'../my_caching_folder/gens-a_3_20200831_{str(time).zfill(2)}00_{str(lead).zfill(3)}_*.grb2',
                       concat_dim='member', combine='nested',
                 engine='cfgrib', backend_kwargs=dict(filter_by_keys={'typeOfLevel': 'heightAboveGround', 'shortName':'2t'})) for lead in leads],
                          dim='step') for time in inits],
               dim='time')
init.coords

Coordinates:
    number             (member) int64 0 1 2
  * time               (time) datetime64[ns] 2020-08-31 ... 2020-08-31T18:00:00
  * step               (step) timedelta64[ns] 0 days 00:00:00 ... 2 days 00:0...
    heightAboveGround  float64 2.0
  * latitude           (latitude) float64 90.0 89.0 88.0 ... -88.0 -89.0 -90.0
  * longitude          (longitude) float64 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0
    valid_time         (time, step) datetime64[ns] 2020-08-31 ... 2020-09-02T...

In [137]:
# save time when reproducing
init = init.compute()
init.to_netcdf('tmp_GEFS_a.nc')

In [138]:
init = xr.open_dataset('tmp_GEFS_a.nc')

In [139]:
# rename to climpred dims
init = init.rename({'step':'lead','number':'member','time':'init'}) #.drop('valid_time')

In [140]:
# set climpred lead units
init['lead']=np.arange(0,6*init.lead.size,6)
init.lead.attrs['units']='hours'

## Hindcast

In [141]:
# need temperature for 20200831 00:00 + 48h

In [105]:
!pip install climetlab --quiet

In [89]:
import climetlab

obs = climetlab.load_source("cds",
        "reanalysis-era5-single-levels",
        product_type="reanalysis",             
        time=['00:00','06:00','12:00','18:00'],
        grid=[1.0, 1.0],
        param="2t",
        date=["2020-08-31",'2020-09-01','2020-09-02']).to_xarray()

In [90]:
# climetlab or cds enable logging.INFO
import logging
logger = logging.getLogger()
logger.setLevel(logging.ERROR)

In [91]:
obs=obs.drop(['number','step','surface','valid_time'])

# bias reduction by hand

In [150]:
# should be replaced by # hindcast = hindcast.remove_bias('same_inits') # but doesnt work
bias = (init.mean('member') - obs.rename({'time':'valid_time'}).sel(valid_time=init.valid_time)).mean('init')
#bias.t2m.plot(col='lead', robust=True)

In [143]:
init = init - bias

# skill

In [144]:
hindcast = climpred.HindcastEnsemble(init).add_observations(obs)

In [145]:
# hindcast = hindcast.remove_bias('same_inits')

In [146]:
skill = hindcast[['t2m']].verify(metric='rmse', comparison='e2o', alignment='same_inits', dim='init')

In [None]:
skill.t2m.isel(lead=range(5)).plot(col='lead', robust=True)

In [None]:
# zooming into north america
# skill.sel(longitude=slice(200,320), latitude=slice(60,15)).t2m.plot(col='lead', col_wrap=4, robust=True, aspect=2.5)