# Evaluate forecasts

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns
from src.score import *

In [3]:
sns.set_style('darkgrid')
sns.set_context('notebook')

In [4]:
DATADIR = '/data/weather-benchmark/5.625deg/'
PREDDIR = '/data/weather-benchmark/predictions/'

## Load data

In [5]:
# Load the validation subset of the data: 2017 and 2018
z500_valid = load_test_data(f'{DATADIR}geopotential_500', 'z')
t850_valid = load_test_data(f'{DATADIR}temperature_850', 't')

In [6]:
persistence = xr.open_dataset(f'{PREDDIR}persistence.nc')
climatology = xr.open_dataset(f'{PREDDIR}climatology.nc')
weekly_climatology = xr.open_dataset(f'{PREDDIR}weekly_climatology.nc')

In [8]:
lr_3d = xr.open_dataset(f'{PREDDIR}fc_lr_3d.nc')
lr_5d = xr.open_dataset(f'{PREDDIR}fc_lr_5d.nc')
lr_6h_iter = xr.open_dataset(f'{PREDDIR}fc_lr_6h_iter.nc')

## TIGGE and IFT T21 data

For these we need to do some preprocessing to get the data into the same format as our ERA5 files.

In [72]:
# Convert from geopotential height to geopotential
tigge = xr.open_mfdataset('/data/weather-benchmark/tigge/5.625deg/*').squeeze()

will change. To retain the existing behavior, pass
combine='nested'. To use future default behavior, pass
combine='by_coords'. See
http://xarray.pydata.org/en/stable/combining.html#combining-multi

  
future, to continue concatenating without supplying dimension
coordinates, please use the new `combine_nested` function (or the
`combine='nested'` option to open_mfdataset.The datasets supplied require both concatenation and merging. From
xarray version 0.14 this will operation will require either using the
new `combine_nested` function (or the `combine='nested'` option to
open_mfdataset), with a nested list structure such that you can combine
along the dimensions None. Alternatively if your datasets have global
dimension coordinates then you can use the new `combine_by_coords`
function.
  from_openmfds=True,


In [73]:
tigge['z'] = tigge.gh * 9.807
tigge

<xarray.Dataset>
Dimensions:  (lat: 32, lon: 64, time: 12768)
Coordinates:
    lev      float64 5e+04
  * lat      (lat) float64 -87.19 -81.56 -75.94 -70.31 ... 75.94 81.56 87.19
  * lon      (lon) float64 0.0 5.625 11.25 16.88 ... 337.5 343.1 348.8 354.4
  * time     (time) datetime64[ns] 2017-01-01 ... 2017-11-05T12:00:00
Data variables:
    gh       (time, lat, lon) float32 dask.array<chunksize=(1302, 32, 64), meta=np.ndarray>
    t        (time, lat, lon) float32 dask.array<chunksize=(1302, 32, 64), meta=np.ndarray>
    z        (time, lat, lon) float32 dask.array<chunksize=(1302, 32, 64), meta=np.ndarray>
Attributes:
    CDI:          Climate Data Interface version ?? (http://mpimet.mpg.de/cdi)
    Conventions:  CF-1.4
    history:      Tue Dec 03 13:14:18 2019: cdo -f nc copy /data/weather-benc...
    institution:  European Centre for Medium-Range Weather Forecasts
    CDO:          Climate Data Operators version 1.7.0 (http://mpimet.mpg.de/...

In [74]:
t21 = xr.open_mfdataset(f'/media/rasp/Elements/weather-benchmark/IFS_T21/raw/output_pl_5.625*.nc')

will change. To retain the existing behavior, pass
combine='nested'. To use future default behavior, pass
combine='by_coords'. See
http://xarray.pydata.org/en/stable/combining.html#combining-multi

  """Entry point for launching an IPython kernel.
future, to continue concatenating without supplying dimension
coordinates, please use the new `combine_nested` function (or the
`combine='nested'` option to open_mfdataset.
  from_openmfds=True,


In [75]:
t21

<xarray.Dataset>
Dimensions:  (lat: 32, lev: 4, lon: 64, time: 42340)
Coordinates:
  * lev      (lev) float64 1e+05 8.5e+04 5e+04 2e+04
  * lat      (lat) float64 87.19 81.56 75.94 70.31 ... -75.94 -81.56 -87.19
  * lon      (lon) float64 -180.0 -174.4 -168.8 -163.1 ... 163.1 168.8 174.4
  * time     (time) datetime64[ns] 2017-01-01 ... 2018-01-07T18:00:00
Data variables:
    z        (time, lev, lat, lon) float32 dask.array<chunksize=(20996, 4, 32, 64), meta=np.ndarray>
    t        (time, lev, lat, lon) float32 dask.array<chunksize=(20996, 4, 32, 64), meta=np.ndarray>
    r        (time, lev, lat, lon) float32 dask.array<chunksize=(20996, 4, 32, 64), meta=np.ndarray>
    u        (time, lev, lat, lon) float32 dask.array<chunksize=(20996, 4, 32, 64), meta=np.ndarray>
    v        (time, lev, lat, lon) float32 dask.array<chunksize=(20996, 4, 32, 64), meta=np.ndarray>
Attributes:
    CDI:          Climate Data Interface version ?? (http://mpimet.mpg.de/cdi)
    Conventions:  CF-1.4
    

In [76]:
t21['lat'] = -tigge.lat
t21 = t21.roll(lon=32)

  


In [77]:
def convert_time(ds, nlead_time):
    """Converts to lead_time format"""
    init_times = ds.time[::nlead_time]
    lead_times = xr.DataArray(
        np.arange(0, 6*nlead_time, 6), dims=['lead_time'], coords={'lead_time': np.arange(0, 6*nlead_time, 6)})
    
    coords = dict(ds.coords); coords.pop('time')
    coords['time'] = init_times; coords['lead_times'] = lead_times
    ds_by_lead = xr.merge([xr.DataArray(
        ds[var].values.reshape((len(init_times), len(lead_times)) + ds[var].shape[1:]),
        dims = ('time', 'lead_time') + ds[var].dims[1:],
        coords = coords,
        name=var
    ) for var in ds])
    return ds_by_lead

In [78]:
tiggea = convert_time(tigge, 21)
t21 = convert_time(t21, 29)

In [79]:
# Save for later usage
tigge.to_netcdf(f'{PREDDIR}/tigge_5.625deg.nc')
t21.to_netcdf(f'{PREDDIR}/t21_5.625deg.nc')

## Compute RMSE