# Notebook to add constant variables to competing AtmoRep downscaling data

This Notebook processes the files generated with `preprocees_data_atmorep.sh` to add the surface topography from ERA5 and COSMO REA6 data which both constitute invariant fields, but have to be expanded to include a time-dimension.

In [None]:
! pip install findlibs

In [None]:
import os
import sys
import glob
from tqdm import tqdm

import pandas as pd
import numpy as np
import xarray as xr
import cfgrib

Parameters:

In [None]:
data_dir="/p/scratch/deepacf/maelstrom/maelstrom_data/ap5/competing_atmorep/"
invar_file_era5 = "/p/scratch/deepacf/maelstrom/maelstrom_data/ap5/competing_atmorep/reanalysis_orography.nc"
invar_file_crea6 = "/p/scratch/atmo-rep/data/cosmo_rea6/static/cosmo_rea6_orography.nc"

The file 'invar_file_era5' has been generated with the following CDO-command:
``` 
cdo --reduce_dim -t ecmwf -f nc copy -remapbil,~/downscaling_maelstrom/downscaling_jsc_repo/downscaling_ap5/grid_des/crea6_reg_grid reanalysis_orography.grib reanalysis_orography.nc
``` 
where the original grib-file was obatined from AtmoRep (```/p/scratch/atmo-rep/data/era5/static```).

In [None]:
file_list = glob.glob(os.path.join(data_dir, "downscaling_atmorep*.nc"))

if len(file_list) == 0:
    raise FileNotFoundError(f"Could not find any datafiles under '{data_dir}'...")

In [None]:
ds_invar_era5 = xr.open_dataset(invar_file_era5)
ds_invar_crea6 = xr.open_dataset(invar_file_crea6).sel({"lat": ds_invar_era5["lat"], "lon": ds_invar_era5["lon"]})
ds_invar_crea6 = ds_invar_crea6.drop_vars("FR_LAND")

In [None]:
for f in tqdm(file_list):
    # read current file
    print(f"Process data-file '{f}'...")
    ds_now = xr.open_dataset(f)
    var_list = list(ds_now.data_vars)
    lchange = False
    
    if "z_in" not in var_list:
        print(f"Add surface topography from ERA5...")
        dst = ds_invar_era5.expand_dims(time=ds_now["time"])
        dst = dst.rename({"Z": "z_in"})
    
        ds_all = xr.merge([ds_now, dst])
        lchange = True
        
    if "hsurf_tar" not in var_list:
        print(f"Add surface topography from CREA6...")
        dst = ds_invar_crea6.expand_dims(time=ds_now["time"])
        dst = dst.rename({"z": "hsurf_tar"})
    
        ds_all = xr.merge([ds_all , dst])
        lchange = True
        
    if "t2m_ml0_tar" in var_list:
        ds_all = ds_all.rename({"t2m_ml0_tar": "t2m_tar"})
        lchange = True
    
    if lchange:
        print(f"Write modified dataset back to '{f}'...")
        ds_all.to_netcdf(f.replace(".nc", "_new.nc"))
    else:
        print(f"No changes to data from '{f}' applied. Continue...")
    