# Prepare data for Figure "DGVM" - Regridd other

In [None]:
# Libraries
import os, shutil
import numpy as np
import xarray as xr

In [None]:
# Directories
dir01 = '../paper_deficit/output/01_prep/'
dir05x = '../paper_deficit/output/05_prep_other/fig_dgvm/'

---

### Create tifs for regridding

In [None]:
# Get primary data and export as tif
xr.open_dataset(os.path.join(dir05x, 'luh2/ds_luh2_prim_1700.nc')) \
    .prim_1700 \
    .rio.to_raster(os.path.join(dir05x, 'luh2/da_luh2_prim_1700.tif'))

In [None]:
# Create tifs from prepared other data
for m in ['pot', 'erb', 'walker', 'mo', 
         'sanderman030', 'sanderman100', 'sanderman200']:
    # Get data
    ds = xr.open_zarr(os.path.join(dir05x, m, f'ds_{m}.zarr'))

    for i in list(ds.data_vars):
        da = ds[i].rename(lat='y', lon='x')
        # Ensure float dtype for pot (to_raster changes dtype for csoil otherwise)
        if m == 'pot':
            da = da.astype('float32')
        # Export as tif
        da.rio.to_raster(os.path.join(dir05x, m, f'da_{i}.tif'))

    # Get land sea mask and export as tif
    xr.open_zarr(os.path.join(dir05x, m, f'ds_{m}_land.zarr')) \
        ['land_sea_mask'] \
        .astype('float32') \
        .rename(lat='y', lon='x') \
        .rio.to_raster(os.path.join(dir05x, m, f'da_{m}_land.tif'))

# Create tifs from prepared land sea masks of mo
for l in ['land_act', 'land_pot']:
    xr.open_zarr(os.path.join(dir05x, 'mo', f'ds_mo_{l}.zarr')) \
        ['land_sea_mask'] \
        .astype('float32') \
        .rename(lat='y', lon='x') \
        .rio.to_raster(os.path.join(dir05x, 'mo' , f'da_mo_{l}.tif'))

---

### Regridd to LUH2 resolution

In [None]:
# Import regridding function
from regrid_high_res_v1_01 import regrid_high_res, prep_tif

In [None]:
def regrid_da(f_source, dir_target, dir_source, dir_out, 
              size_tiles, fill_value=None, olap=1):  
    """Regrid large xarray dataarrays.

    Args:
        f_source (str): The filename (without extension) of the source .tif file to be regridded.
        dir_target (str): Directory containing target grid .tif file.
        dir_source (str): Directory containing the the source  .tif file.
        dir_out (str): Directory to store the output and intermediate files.
        size_tiles (int): Size of the regridding tiles in degrees.
        fill_value (float, optional): Fill value to use in the regridding process. Defaults to None.
        olap (int, optional): Overlap size in degrees for regridding tiles. Defaults to 1.
        
    Returns:
        xarray.Dataset: The combined dataset after regridding.
    """
    # Prepare the target and source data arrays from TIFF files
    da_target = prep_tif(os.path.join(dir05x, 'luh2/da_luh2_prim_1700.tif'), 'prim_1700')
    da_source = prep_tif(dir_source + f_source + '.tif', f_source)
    # Regridd source array to target grid
    regrid_high_res(da_target, da_source, dir_out,
                    account='bm0891', partition='compute',
                    size_tiles=size_tiles, olap=olap, fill_value = fill_value,
                    type_export='zarr', del_interm=False)

---

In [None]:
# Regridd pot
for i in ['s2_cveg', 's3_cveg', 's2_csoil', 's3_csoil', 'land']:
    f_source = f'da_pot_{i}'
    dir_target = dir05x
    dir_source = os.path.join(dir05x, 'pot/')
    dir_out = os.path.join(dir05x, 'pot/')
    %time regrid_da(f_source, dir_target, dir_source, dir_out, 45, np.nan, olap=0.5)

In [None]:
# Regridd Erb
for i in['s2_cveg', 's3_cveg', 'land']:
    f_source = f'da_erb_{i}'
    dir_target = dir05x
    dir_source = os.path.join(dir05x, 'erb/')
    dir_out = os.path.join(dir05x, 'erb/')
    %time regrid_da(f_source, dir_target, dir_source, dir_out, 45, np.nan, olap=0.5)

In [None]:
for str_sand in ['sanderman030', 'sanderman100', 'sanderman200']:
    for i in ['s2_csoil', 's3_csoil', 'land']:
        f_source = f'da_{str_sand}_{i}'
        dir_target = dir05x
        dir_source = os.path.join(dir05x, f'{str_sand}/')
        dir_out = os.path.join(dir05x, f'{str_sand}/')
        %time regrid_da(f_source, dir_target, dir_source, dir_out, 45, -32767, olap=0.5)

In [None]:
# Regridd Erb
for i in['s2_cveg', 's3_cveg', 's2_csoil', 's3_csoil', 'land']:
    f_source = f'da_walker_{i}'
    dir_target = dir05x
    dir_source = os.path.join(dir05x, 'walker/')
    dir_out = os.path.join(dir05x, 'walker/')
    %time regrid_da(f_source, dir_target, dir_source, dir_out, 25, -32768, olap=0.5)

In [None]:
# Regridd Mo    
for i in['s2_cveg', 's3_cveg', 'land', 'land_act', 'land_pot']:
    f_source = f'da_mo_{i}'
    dir_target = dir05x
    dir_source = os.path.join(dir05x, 'mo/')
    dir_out = os.path.join(dir05x, 'mo/')
    %time regrid_da(f_source, dir_target, dir_source, dir_out, 25, np.nan, olap=0.5)

---

### Regridd primary land data to other data resolution

In [None]:
def regrid_da(f_target, f_source, dir_target, dir_source, dir_out, 
              size_tiles, fill_value=None, olap=1):  
    """Regrid large xarray dataarrays.

    Args:
        f_target (str): The filename (without extension) of the target .tif file.
        f_source (str): The filename (without extension) of the source .tif file to be regridded.
        dir_target (str): Directory containing target grid .tif file.
        dir_source (str): Directory containing the the source  .tif file.
        dir_out (str): Directory to store the output and intermediate files.
        size_tiles (int): Size of the regridding tiles in degrees.
        fill_value (float, optional): Fill value to use in the regridding process. Defaults to None.
        olap (int, optional): Overlap size in degrees for regridding tiles. Defaults to 1.
        
    Returns:
        xarray.Dataset: The combined dataset after regridding.
    """
    # Prepare the target and source data arrays from TIFF files
    da_target = prep_tif(os.path.join(dir_target + f'da_{f_target}.tif'), f_target)
    da_source = prep_tif(os.path.join(dir_source + f'da_{f_source}.tif'), f_source)
    # Regridd source array to target grid
    regrid_high_res(da_target, da_source, dir_out,
                    account='bm0891', partition='compute',
                    size_tiles=size_tiles, olap=olap, fill_value = fill_value,
                    type_export='zarr', del_interm=False)

In [None]:
# Regridd primary land data to resolution of other data
for m in ['pot', 'mo', 'erb', 'sanderman030', 'sanderman100', 'sanderman200', 
          'walker']:

    f_target = f'{m}_land'
    f_source = 'luh2_prim_1700'
    dir_target = f'{dir05x}{m}/'
    dir_source = f'{dir05x}luh2/'
    dir_out = dir_target
    
    if m != 'walker':
        size_tiles = 45
    if m == 'walker':
        size_tiles = 25

    %time regrid_da(f_target, f_source, dir_target, dir_source, dir_out, \
                    size_tiles, fill_value=None, olap=1)

---

### Merge regridded other data

In [None]:
# Merge data at luh2 resolution
for m in ['pot', 'erb', 'walker', 'mo', 
          'sanderman030', 'sanderman100', 'sanderman200']:
    # Get data
    ds = xr.open_mfdataset(
        os.path.join(dir05x, m, f'ds_regridded_da_{m}_*.zarr'),
        engine='zarr')
    # Create dataset and add dataarrays
    ds_out = xr.Dataset()
    for i in ds.data_vars:
        if 'land' not in i:
            ds_out[i[13:]] = ds[i]
    # Export
    ds_out.to_netcdf(os.path.join(dir05x, m, f'ds_{m}_luh2res.nc'), mode='w')

    # Export land sea mask seperately 
    ds['regridded_da_' + m + '_land'] \
        .rename('land_sea_mask') \
        .to_netcdf(os.path.join(dir05x, m, f'ds_{m}_land_luh2res.nc'), mode='w')

In [None]:
# Get and export primary data at other data resolution
for m in ['pot', 'erb', 'walker', 'mo', 
          'sanderman030', 'sanderman100', 'sanderman200']:
    xr.open_zarr(os.path.join(dir05x, m, 'ds_regridded_luh2_prim_1700.zarr')) \
        .regridded_luh2_prim_1700 \
        .rename('prim_1700') \
        .to_zarr(os.path.join(dir05x, m, f'ds_{m}_prim.zarr'), mode='w')

---