In [2]:
import xarray as xr
import pandas as pd

In [3]:
mdl_data = '/bg/data/s2s/TABN/01_raw_forecasts/zarr_stores/SEAS5_t2m_01_0.1_calib_linechunks.zarr'
ref_data = '/bg/data/s2s/TABN/02_reference/target_resolution/ERA5_Land_t2m_*'
fcst_data = '/bg/data/s2s/TABN/01_raw_forecasts/target_resolution/SEAS5_t2m_202501_0.1.nc'

In [18]:
t2m_mdl = xr.open_zarr(mdl_data)
t2m_ref = xr.open_mfdataset(ref_data)
t2m_fcst = xr.open_dataset(fcst_data)

At that point, we have ensemble temperature forecasts for 35 years. The next steps are
- removal of ensemble members
- truncation to 15 years
- truncation to smaller domain across the Ethiopian Highlands

By this, we want to substantially reduce the file size of the demo data

Throw away all ensemble member but keep the ensemble dimension

In [19]:
t2m_mdl_trunc = t2m_mdl.isel(ens=slice(0, 2), drop=False)
t2m_fcst_trunc = t2m_fcst.isel(ens=slice(0, 2), drop=False)

Reduce the length of the calibration period to 15 years

In [6]:
t2m_mdl_trunc = t2m_mdl_trunc.sel(time=slice('2000-01-01', '2016-08-02'))
t2m_ref_trunc = t2m_ref.sel(time=slice('2000-01-01', '2016-08-02'))

Truncate the domain to the Ethiopian Highlands

In [7]:
bbox = {
    'lat_min': 10, 
    'lat_max': 13, 
    'lon_min': 36, 
    'lon_max': 39
}

In [11]:
t2m_mdl_trunc = t2m_mdl_trunc.sel(lat=slice(bbox['lat_min'], bbox['lat_max']), lon=slice(bbox['lon_min'], bbox['lon_max']))
t2m_ref_trunc = t2m_ref_trunc.sel(lat=slice(bbox['lat_min'], bbox['lat_max']), lon=slice(bbox['lon_min'], bbox['lon_max']))
t2m_fcst_trunc = t2m_fcst_trunc.sel(lat=slice(bbox['lat_min'], bbox['lat_max']), lon=slice(bbox['lon_min'], bbox['lon_max']))

In [8]:
encoding = {
    't2m': {
        "zlib": True,
        "complevel": 1,
        "_FillValue": -9999,
        "scale_factor": 0.01,
        "add_offset": 273.15,
        "dtype": 'int16',
    },
    'lat': {
        "_FillValue": None,
        "dtype": "float"
    },
    'lon': {
        "_FillValue": None,
        "dtype": "float"
    },
    'time': {
        "_FillValue": None,
        "units": 'days since 1950-01-01 00:00:00',
        "dtype": "int32"
    }   
}


In [14]:
t2m_mdl_trunc['lon'] = t2m_mdl_trunc.lon.round(decimals=1)
t2m_mdl_trunc['lat']  = t2m_mdl_trunc.lat.round(decimals=1)

In [15]:
t2m_ref_trunc['lon'] = t2m_ref_trunc.lon.round(decimals=1)
t2m_ref_trunc['lat']  = t2m_ref_trunc.lat.round(decimals=1)

In [16]:
t2m_mdl_trunc.to_netcdf('/bg/home/lorenz-c/SEAS5_t2m_Jan_2000_to_2016.nc', encoding=encoding)

In [17]:
t2m_ref_trunc.to_netcdf('/bg/home/lorenz-c/ERA5_Land_t2m_2000_to_2016.nc', encoding=encoding)

In [21]:
t2m_fcst_trunc.to_netcdf('/bg/home/lorenz-c/SEAS5_t2m_202501.nc', encoding=encoding)

In [22]:
ref_loaded = xr.open_dataset('/bg/home/lorenz-c/ERA5_Land_t2m_2000_to_2016.nc')
mdl_loaded = xr.open_dataset('/bg/home/lorenz-c/SEAS5_t2m_Jan_2000_to_2016.nc')
fcst_loaded =  xr.open_dataset('/bg/home/lorenz-c/SEAS5_t2m_202501.nc')

In [24]:
fcst_loaded

In [99]:
mdl_loaded