In [1]:
# builtins
import sys
import os
import time
import logging
from datetime import timedelta
from logging.config import dictConfig
import numpy as np
import datetime
import pathlib
import pandas as pd
import joblib

# externals
import xarray as xr

from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

# locals
from downscaleml.core.dataset import ERA5Dataset, NetCDFDataset

from downscaleml.main.config import (NET, ERA5_PLEVELS, ERA5_PREDICTORS, PREDICTAND,
                                     CALIB_PERIOD, VALID_PERIOD, DOY, NORM,
                                     OVERWRITE, DEM, DEM_FEATURES, STRATIFY,
                                     WET_DAY_THRESHOLD, VALID_SIZE, 
                                     start_year, end_year, CHUNKS)

from downscaleml.main.inputoutput import (ERA5_PATH, OBS_PATH, DEM_PATH, MODEL_PATH, TARGET_PATH)

from downscaleml.core.constants import (ERA5_P_VARIABLES, ERA5_P_VARIABLES_SHORTCUT, ERA5_P_VARIABLE_NAME,
                                        ERA5_S_VARIABLES, ERA5_S_VARIABLES_SHORTCUT, ERA5_S_VARIABLE_NAME,
                                        ERA5_VARIABLES, ERA5_VARIABLE_NAMES, ERA5_PRESSURE_LEVELS,
                                        PREDICTANDS, ERA5_P_VARIABLES, ERA5_S_VARIABLES)

from downscaleml.core.utils import NAMING_Model, normalize, search_files, LogConfig
from downscaleml.core.logging import log_conf
    
# module level logger
LOGGER = logging.getLogger(__name__)

def stacker(xarray_dataset):
    # stack along the lat and lon dimensions
    stacked = xarray_dataset.stack()
    dask_arr = stacked.to_array().data
    xarray_dataset = dask_arr.T
    LogConfig.init_log('Shape of the {} is in (spatial, time, variables):{}'.format(xarray_dataset, xarray_dataset.shape))
    return xarray_dataset

if __name__ == '__main__':

    # initialize timing
    start_time = time.monotonic()
        
    # initialize network filename
    state_file = NAMING_Model.state_file(
        NET, PREDICTAND, ERA5_PREDICTORS, ERA5_PLEVELS, WET_DAY_THRESHOLD, dem=DEM,
        dem_features=DEM_FEATURES, doy=DOY, stratify=STRATIFY)
    
    state_file = MODEL_PATH.joinpath(PREDICTAND, state_file)
    target = TARGET_PATH.joinpath(PREDICTAND)

    # check if output path exists
    if not target.exists():
        target.mkdir(parents=True, exist_ok=True)
    # initialize logging
    log_file = state_file.with_name(state_file.name + "_log.txt")
    
    if log_file.exists():
        log_file.unlink()
    dictConfig(log_conf(log_file))

    # check if target dataset already exists
    target = target.joinpath(state_file.name + '.nc')
    if target.exists() and not OVERWRITE:
        LogConfig.init_log('{} already exists.'.format(target))
        sys.exit()

    LogConfig.init_log('Initializing downscaling for period: {}'.format(
        ' - '.join([str(CALIB_PERIOD[0]), str(CALIB_PERIOD[-1])])))

    # initialize ERA5 predictor dataset
    LogConfig.init_log('Initializing ERA5 predictors.')
    Era5 = ERA5Dataset(ERA5_PATH.joinpath('ERA5'), ERA5_PREDICTORS,
                       plevels=ERA5_PLEVELS)
    Era5_ds = Era5.merge(chunks=CHUNKS)
    Era5_ds = Era5_ds.rename({'longitude': 'x','latitude': 'y'})
    
    # initialize OBS predictand dataset
    LogConfig.init_log('Initializing observations for predictand: {}'
                       .format(PREDICTAND))

    # read in-situ gridded observations
    Obs_ds = search_files(OBS_PATH.joinpath(PREDICTAND), '.nc$').pop()
    Obs_ds = xr.open_dataset(Obs_ds)
    Obs_ds = Obs_ds.rename({'longitude': 'x','latitude': 'y'})

    # whether to use digital elevation model
    if DEM:
        # digital elevation model: Copernicus EU-Dem v1.1
        dem = search_files(DEM_PATH, '^interTwin_dem.nc$').pop()

        # read elevation and compute slope and aspect
        dem = ERA5Dataset.dem_features(
            dem, {'y': Era5_ds.y, 'x': Era5_ds.x},
            add_coord={'time': Era5_ds.time})

        # check whether to use slope and aspect
        if not DEM_FEATURES:
            dem = dem.drop_vars(['slope', 'aspect']).chunk(Era5_ds.chunks)

        # add dem to set of predictor variables
        dem = dem.chunk(Era5_ds.chunks)
        Era5_ds = xr.merge([Era5_ds, dem])

    # initialize training data
    LogConfig.init_log('Initializing training data.')

    # split calibration period into training and validation period
    if PREDICTAND == 'pr' and STRATIFY:
        # stratify training and validation dataset by number of
        # observed wet days for precipitation
        wet_days = (Obs_ds.sel(time=CALIB_PERIOD).mean(dim=('y', 'x'))
                    >= WET_DAY_THRESHOLD).to_array().values.squeeze()
        train, valid = train_test_split(
            CALIB_PERIOD, stratify=wet_days, test_size=VALID_SIZE)

        # sort chronologically
        train, valid = sorted(train), sorted(valid)
        Era5_train, Obs_train = Era5_ds.sel(time=train), Obs_ds.sel(time=train)
        Era5_valid, Obs_valid = Era5_ds.sel(time=valid), Obs_ds.sel(time=valid)
    else:
        LogConfig.init_log('We are not calculating Stratified Precipitation based on Wet Days here!')

    # training and validation dataset
    Era5_train, Obs_train = Era5_ds.sel(time=CALIB_PERIOD), Obs_ds.sel(time=CALIB_PERIOD)
    Era5_valid, Obs_valid = Era5_ds.sel(time=VALID_PERIOD), Obs_ds.sel(time=VALID_PERIOD)

downscaleml.core.utils: --------------------------------------------------------------------------------
downscaleml.core.utils: 2023-11-27T12:22:02: Initializing downscaling for period: 1985-01-02 - 2015-12-31
downscaleml.core.utils: --------------------------------------------------------------------------------
downscaleml.core.utils: --------------------------------------------------------------------------------
downscaleml.core.utils: 2023-11-27T12:22:02: Initializing ERA5 predictors.
downscaleml.core.utils: --------------------------------------------------------------------------------
downscaleml.core.utils: Searching: /mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/REANALYSIS/ERA5/geopotential, pattern: .nc$
downscaleml.core.utils: Searching: /mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/REANALYSIS/ERA5/temperature, pattern: .nc$
downscaleml.core.utils: Searching: /mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/REANALYSIS/ERA5/u_compon

In [2]:
Era5_train

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 3 graph layers,32 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 3 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 3 graph layers,32 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 3 graph layers,32 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 3 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 3 graph layers,32 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 556.02 MiB 17.93 MiB Shape (11321, 103, 125) (365, 103, 125) Dask graph 32 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  11321,

Unnamed: 0,Array,Chunk
Bytes,556.02 MiB,17.93 MiB
Shape,"(11321, 103, 125)","(365, 103, 125)"
Dask graph,32 chunks in 4 graph layers,32 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [3]:
Era5_valid

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.41 MiB 10.41 MiB Shape (212, 103, 125) (212, 103, 125) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",125  103  212,

Unnamed: 0,Array,Chunk
Bytes,10.41 MiB,10.41 MiB
Shape,"(212, 103, 125)","(212, 103, 125)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [4]:
import torch
import dask.array as da
from datetime import date


class EoDataset(torch.utils.data.Dataset):
    @staticmethod
    def encode_cyclical_features(feature, max_val):
        """Encode a cyclical feature to the range [-1, 1].

        Parameters
        ----------
        feature : :py:class:`numpy.ndarray`
            The cyclcical feature to encode.
        max_val : `float`
            Maximum physically possible value of ``feature``.

        Returns
        -------
        encoded : `tuple` [:py:class:`numpy.ndarray`]
            The encoded feature in the range [-1, 1].

        """
        return (da.sin(2 * np.pi * feature / max_val).astype(np.float32),
                da.cos(2 * np.pi * feature / max_val).astype(np.float32))

    @staticmethod
    def add_coordinates(array, dims=('time', 'y', 'x')):
        return (dims, array)

    @staticmethod
    def repeat_along_axis(array, repeats, axis):
        return da.repeat(da.array(array), repeats, axis)

    @staticmethod
    def encode_doys(ds, dims=('time', 'y', 'x'), chunks=None):

        # compute day of the year
        LOGGER.info('Encoding day of the year to cyclical feature ...')
        doys = ds.time.values.astype('datetime64[D]')
        doys = da.asarray(
            [date.timetuple(doy.astype(date)).tm_yday for doy in doys])

        # reshape doys to correct shape: from (t,) to (t, y, x)
        # this expands the doy values to each pixel (y, x)
        target = (len(doys), len(ds.y), len(ds.x))
        repeat = int(target[-1] * target[-2])

        # encode day of the year as cyclical feature: convert to dask array
        sin_doy, cos_doy = EoDataset.encode_cyclical_features(doys, 365)

        # lazily repeat encoded doys along time
        sin_doy, cos_doy = (
            EoDataset.repeat_along_axis(sin_doy, repeat, 0).reshape(target),
            EoDataset.repeat_along_axis(cos_doy, repeat, 0).reshape(target))

        # chunk data for parallel loading
        if chunks is not None:
            sin_doy = sin_doy.rechunk(
                {dims.index(k): v for k, v in chunks.items()})
            cos_doy = cos_doy.rechunk(
                {dims.index(k): v for k, v in chunks.items()})

        return {'sin_doy': EoDataset.add_coordinates(sin_doy, dims),
                'cos_doy': EoDataset.add_coordinates(cos_doy, dims)}

def doy_encoding(X, y=None, doy=False):

    # whether to include the day of the year as predictor variable
    if doy:
        # add doy to set of predictor variables
        LOGGER.info('Adding day of the year to predictor variables ...')
        X = X.assign(EoDataset.encode_doys(X, chunks=X.chunks))

    print(X)
    return X

In [5]:
Era5_train = doy_encoding(Era5_train, Obs_train, doy=DOY)
Era5_valid = doy_encoding(Era5_valid, Obs_valid, doy=DOY)


__main__: Adding day of the year to predictor variables ...
__main__: Encoding day of the year to cyclical feature ...
__main__: Adding day of the year to predictor variables ...
__main__: Encoding day of the year to cyclical feature ...


<xarray.Dataset>
Dimensions:    (x: 125, y: 103, time: 11321)
Coordinates:
  * x          (x) float64 3.715 3.822 3.93 4.038 ... 16.75 16.85 16.96 17.07
  * y          (y) float64 50.52 50.45 50.37 50.3 ... 43.27 43.2 43.13 43.05
  * time       (time) datetime64[ns] 1985-01-02 1985-01-03 ... 2015-12-31
Data variables: (12/17)
    z_500      (time, y, x) float32 dask.array<chunksize=(363, 103, 125), meta=np.ndarray>
    z_850      (time, y, x) float32 dask.array<chunksize=(363, 103, 125), meta=np.ndarray>
    t_500      (time, y, x) float32 dask.array<chunksize=(363, 103, 125), meta=np.ndarray>
    t_850      (time, y, x) float32 dask.array<chunksize=(363, 103, 125), meta=np.ndarray>
    u_500      (time, y, x) float32 dask.array<chunksize=(363, 103, 125), meta=np.ndarray>
    u_850      (time, y, x) float32 dask.array<chunksize=(363, 103, 125), meta=np.ndarray>
    ...         ...
    tp         (time, y, x) float32 dask.array<chunksize=(363, 103, 125), meta=np.ndarray>
    elevation  