# Postprocessing trained downscaling models

In [63]:
import os, sys
sys.path.append("../models/")
sys.path.append("../utils/")
sys.path.append("../handle_data/")
import tensorflow as tf
import tensorflow.keras as keras
from handle_data_unet import *
from handle_data_class import  *
from plotting import *
import datetime as dt
import numpy as np
import xarray as xr
import json as js
import gc

from collections import OrderedDict

## Base directories for test dataset and model

Adapt `datadir`, `model_base_dir` and `model_name`.
 - `datadir`: directory where the test dataset is stored
 - `model_base_dir`: top-level directory where trained downscaling models are saved
 - `model_name`: name of trained model
 - `lztar`: flag if high-resolved (target) topography is part of the input data

In [80]:
data_dir = "/p/scratch/deepacf/maelstrom/maelstrom_data/ap5_michael/preprocessed_era5_crea6/netcdf_data/all_files/"
model_base_dir = "/p/home/jusers/langguth1/juwels/downscaling_maelstrom/downscaling_jsc_repo/downscaling_ap5/trained_models"
# name of the model to be postprocessed
model_name = "wgan_era5_to_crea6_epochs40_supervision_ztar2in_noes"
lztar = True

# constrct model directory paths
model_base = os.path.join(model_base_dir, model_name)
model_dir = os.path.join(model_base, f"{model_name}_generator")

Next, we load the model and also retrieve the testing dataset by reading the corresponding netCDF-file.

In [83]:
print(f"Load model '{model_name}'")
trained_model = keras.models.load_model(model_dir, compile=False)
print(f"Read training dataset from {datadir}") 
ds_test = xr.open_dataset(os.path.join(data_dir, "preproc_era5_crea6_test.nc"))

Load model 'wgan_era5_to_crea6_epochs40_supervision_ztar2in_noes'
Read training dataset from /p/scratch/deepacf/maelstrom/maelstrom_data/ap5_michael/preprocessed_era5_crea6/netcdf_data/all_files/


## Data preprocessing

After retrieving the reference data (i.e. the ground truth data)...

In [29]:
ground_truth = ds_test["t_2m_tar"]

... we preprocess the input from the test dataset. For this, the data is reshaped into a xarray DataArray whose last dimension corresponds to the variables (the feature channels).

In [32]:
# Get the normalization parameters from saved json file
js_norm = os.path.join(model_dir, "..", "z_norm_dict.json")

try:
    with open(js_norm, "r") as f:
        norm_dict = js.load(f)
except FileNotFoundError as e:
    raise FileNotFoundError(f"Could not find '{js_norm}'. Please check model-directory '{model_dir}'.")

train_vars = list(ds_test.keys())
mu_train, std_train = np.asarray(norm_dict["mu"]), np.asarray(norm_dict["std"])
da_test = HandleDataClass.reshape_ds(ds_test)
da_test = HandleUnetData.z_norm_data(da_test, norm_method="norm", save_path=model_base)

Read parameters for normalization from file /p/home/jusers/langguth1/juwels/downscaling_maelstrom/downscaling_jsc_repo/downscaling_ap5/trained_models/wgan_era5_to_crea6_epochs40_supervision_ztar2in_noes2/z_norm_dict.json...


In [35]:
# Split the inputs and the target data
da_test_in, da_test_tar = HandleDataClass.split_in_tar(da_test)
if lztar:
    print("Add high-resolved target topography to input features.")
    da_test_in = xr.concat([da_test_in, da_test_tar.sel({"variables": "hsurf_tar"})], dim="variables")

Add high-resolved target topography to input features.


## Create predictions from trained model

The preprocessed data is fed into the trained model to obtain the downscalted 2m tmepertaure which is subject to evaluation later.

In [84]:
print("Start inference from trained model...")
y_pred_trans =  trained_model.predict(da_test_in.squeeze().values, batch_size=32, verbose=1)

Start inference from trained model...


For evaluation, we have to denormalize the data. 

In [85]:
# get coordinates and dimensions from target data
coords = da_test_tar.isel(variables=0).squeeze().coords
dims = da_test_tar.isel(variables=0).squeeze().dims
y_pred = xr.DataArray(y_pred_trans[0].squeeze(), coords=coords, dims=dims)
# perform denormalization
y_pred = HandleUnetData.denormalize(y_pred.squeeze(), 
                                    norm_dict["mu"]["t_2m_tar"], 
                                    norm_dict["std"]["t_2m_tar"])
y_pred = xr.DataArray(y_pred, coords=coords, dims=dims)

Parameters for normalization are parsed directly to the method.


## Evaluation

Subsequently, the produced downscaling product is evaluated using the following scores
- MSE
- Bias
- Horizontal gradient ratio

In [54]:
print(y_pred.sel(time=dt.time.season("JJA")))

AttributeError: type object 'datetime.time' has no attribute 'season'

In [86]:
seasons = ["DJF", "MAM", "JJA", "SON"]
mse_mean, mse_std = np.zeros(24), np.zeros(24)
mse_mean_sea, mse_std_sea = np.zeros((4, 24)), np.zeros((4, 24))

for i, hh in enumerate(np.arange(0, 24)):
    mse_all = ((y_pred.sel(time=dt.time(hh)) - ground_truth.sel(time=dt.time(hh)))**2).mean(dim=["rlat", "rlon"])
    mse_mean[i], mse_std[i] = mse_all.mean().values, mse_all.std().values
    mse_mean_sea[:,i], mse_std_sea[:,i] = mse_all.groupby("time.season").mean().values, \
                                          mse_all.groupby("time.season").std().values 
    
mse_mean = xr.DataArray(mse_mean, coords = {"daytime": np.arange(0,24)}, dims=["daytime"]) 
mse_std = xr.DataArray(mse_std, coords = {"daytime": np.arange(0,24)}, dims=["daytime"])


mse_mean_sea = xr.DataArray(mse_mean_sea, coords = {"season": seasons, "daytime": np.arange(0,24)},
                            dims = ["season", "daytime"])
mse_std_sea = xr.DataArray(mse_std_sea, coords = {"season": seasons, "daytime": np.arange(0,24)},
                            dims = ["season", "daytime"])

# Visualize the results

In [87]:
def create_line_plot(data: xr.DataArray, data_std: xr.DataArray, model_name: str, metric: dict,
                     filename: str):
    
    fig, (ax) = plt.subplots(1,1)
    ax.plot(data["daytime"].values, data.values, 'k-', label=model_name)
    ax.fill_between(data["daytime"].values, data.values-data_std.values, data.values+data_std.values, facecolor="blue", alpha=0.2)
    ax.set_ylim(0.,4.)
    # label axis
    ax.set_xlabel("daytime [UTC]", fontsize=16)
    metric_name, metric_unit = list(metric.keys())[0], list(metric.values())[0]
    ax.set_ylabel(f"{metric_name} T2m [{metric_unit}]", fontsize=16)
    ax.tick_params(axis="both", which="both", direction="out", labelsize=14)

    ax.legend(handles, labels, loc='upper right', numpoints=1)
    # save plot to file
    fig.savefig(filename)

In [89]:
plot_dir = os.path.join(".", model_name)
os.makedirs(plot_dir, exist_ok=True)

create_line_plot(np.sqrt(mse_mean), np.sqrt(mse_std), "WGAN",
                 {"RMSE": "K"}, os.path.join(plot_dir, "downscaling_wgan_rmse.png"))

for sea in seasons:
    create_line_plot(np.sqrt(mse_mean_sea.sel({"season": sea})), 
                     np.sqrt(mse_std_sea.sel({"season": sea})),
                     "WGAN", {"RMSE": "K"},
                     os.path.join(plot_dir, f"downscaling_wgan_rmse_{sea}.png"))

In [None]:
# choose a time index
tind = 380

y_pred_eval = y_pred_trans#.sel(time=dt.time(12))

# plot the full 2m temperature
plt_fname_exp = "./plot_temp_pred_real"
create_plots(y_pred_eval.isel(time=tind), ds_test["t2m_tar"].isel(time=tind), plt_fname_exp,
             opt_plot={"title1": "downscaled T2m", "title2": "target T2m", "levels": np.arange(-3, 27., 1.)})

plt_fname_diff = "./plot_temp_diff"
diff_in_tar = ds_test["2t_in"].isel(time=tind)-ds_test["t2m_tar"].isel(time=tind) + 273.15
diff_down_tar = y_pred_eval.isel(time=tind)-ds_test["t2m_tar"].isel(time=tind) + 273.15
create_plots(diff_in_tar, diff_down_tar, plt_fname_diff,
             opt_plot={"title1": "diff. input-target", "title2": "diff. downscaled-target",
                       "levels": np.arange(-3., 3.1, .2)})