# Prepare data for Figure "Example"

In [None]:
dir_data = '../data/'
dir05 = '../paper_deficit/output/05_prep_other/'

In [None]:
import os
import rioxarray
import xarray as xr

---

In [None]:
# Libraries
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
import dask

# Initialize dask
cluster = SLURMCluster(
    queue='compute',                      # SLURM queue to use
    cores=12,                             # Number of CPU cores per job
    memory='256 GB',                      # Memory per job
    account='bm0891',                     # Account allocation
    interface="ib0",                      # Network interface for communication
    walltime='02:00:00',                  # Maximum runtime per job
    local_directory='../dask/',           # Directory for local storage
    job_extra_directives=[                # Additional SLURM directives for logging
        '-o ../dask/LOG_worker_%j.o',     # Output log
        '-e ../dask/LOG_worker_%j.e'      # Error log
    ]
)

# Scale dask cluster
cluster.scale(jobs=3)

# Configurate dashboard url
dask.config.config.get('distributed').get('dashboard').update(
    {'link': '{JUPYTERHUB_SERVICE_PREFIX}/proxy/{port}/status'}
)

# Create client
client = Client(cluster)

client

---

### Walker

In [None]:
# Get data
def open_walker_orig(fstr):
    return rioxarray.open_rasterio(
        os.path.join(dir_data, f'walker2022/{fstr}.tif'),
        chunks=dict(y=5000, x=5000))

da_walker_agb_pot = open_walker_orig('Base_Pot_AGB_MgCha_500m')
da_walker_agb_cur = open_walker_orig('Base_Cur_AGB_MgCha_500m')
da_walker_bgb_pot = open_walker_orig('Base_Pot_BGB_MgCha_500m')
da_walker_bgb_cur = open_walker_orig('Base_Cur_BGB_MgCha_500m')
da_walker_soc_pot = open_walker_orig('Base_Pot_SOC_MgCha_500m')
da_walker_soc_cur = open_walker_orig('Base_Cur_SOC_MgCha_500m')

# Create agbc_act and agbc_pot arrays
da_walker_agbc_pot = \
    (da_walker_agb_pot.where(da_walker_agb_pot != -32768) + 
     da_walker_bgb_pot.where(da_walker_bgb_pot != -32768).data) \
    .fillna(-32768) \
    .astype('int16')
da_walker_agbc_act = \
    (da_walker_agb_cur.where(da_walker_agb_cur != -32768) + 
     da_walker_bgb_cur.where(da_walker_bgb_cur != -32768).data) \
    .fillna(-32768) \
    .astype('int16')

# Create dataset with all data
ds_walker = xr.Dataset()
ds_walker['walker_cveg_pot'] = da_walker_agbc_pot
ds_walker['walker_cveg_act'] = (('band', 'y', 'x'), da_walker_agbc_act.data)
ds_walker['walker_csoil_pot'] = (('band', 'y', 'x'), da_walker_soc_pot.data)
ds_walker['walker_csoil_act'] = (('band', 'y', 'x'), da_walker_soc_cur.data)

# Export dataset
#ds_walker.to_zarr(dirx + 'data_eval/examples/ds_temp_walker.zarr', mode='w')
# Import dataset
#ds_walker = xr.open_zarr(dirx + 'data_eval/examples/ds_temp_walker.zarr')

# Reproject
for i in list(ds_walker.data_vars):
    ds_walker[i] \
        .rio.reproject('epsg:4326') \
        .rio.to_raster(
            os.path.join(dir05, f'fig_example/da_temp_{i}.tif'))
    
# Reprojected tifs to dataset
def open_da_temp_x_tif(fstr):
    return rioxarray.open_rasterio(
        os.path.join(dir05, f'fig_example/da_temp_walker_{fstr}.tif'),
        chunks=dict(y=5000, x=5000))

ds_walker_reproj = xr.Dataset()
for fstr in ['cveg_act', 'cveg_pot', 'csoil_act', 'csoil_pot']:
    ds_walker_reproj[fstr] = open_da_temp_x_tif(fstr)

# Process and export as zarr
ds_walker_reproj \
    .rename(y='lat', x='lon') \
    .squeeze('band') \
    .drop_vars(['band', 'spatial_ref']) \
    .to_zarr(os.path.join(dir05, 'fig_example/ds_walker_preped.zarr'), 
             mode='w');

# Delete intermediate files
for i in list(ds_walker.data_vars):
    os.remove(os.path.join(dir05, f'fig_example/da_temp_{i}.tif'))

---

### Mo

In [None]:
def prep_mo_data(fstr, pot_act):
    
    """ Get and prepare Mo et al. 2023 potential data"""    
    
    # Get potential data
    fstr_pot = fstr + '_Full_TGB_carbon_density_Map_Merged.tif'
    da_pot = rioxarray.open_rasterio(
        os.path.join(dir_data, f'mo2023/v1_1/{fstr_pot}'),
        chunks = dict(y=5000, x=5000))[0]
    da_pot = da_pot.where(da_pot != da_pot.attrs['_FillValue'])
    
    # Get net data
    fstr_net = fstr + '_Net_TGB_carbon_density_Map_Merged.tif'
    da_net = rioxarray.open_rasterio(
        os.path.join(dir_data, f'mo2023/v1_1/{fstr_net}'),
        chunks = dict(y=5000, x=5000))[0]
    da_net = da_pot.where(da_net != da_net.attrs['_FillValue'])    
    
    # Define potential and calculate actual data arrays
    if pot_act == 'pot':
        da_out = da_pot
    if pot_act == 'act':
        da_out = da_pot - da_net.data
        
    # Return
    return da_out \
        .rename(y='lat', x='lon') \
        .drop_vars(['band', 'spatial_ref'])

def prep_mo_data_act(fstr):

    """ Get and prepare Mo et al. 2023 present data"""
    
    # Get data
    fstr_act = fstr + '_Present_TGB_Density_Map_Merged.tif'
    da_act = rioxarray.open_rasterio(
        os.path.join(dir_data, f'mo2023/v1_1/{fstr_act}'),
        chunks = dict(y=5000, x=5000))[0]
    
    # Change nan value
    da_act = da_act.where(da_act != da_act.attrs['_FillValue'])
    
    # Return
    return da_act \
        .rename(y='lat', x='lon') \
        .drop_vars(['band', 'spatial_ref'])

# Create Dataset with potential and actual data
ds_mo = xr.Dataset(dict(lat=prep_mo_data('SD1', 'pot').lat, 
                        lon=prep_mo_data('SD1', 'pot').lon))

ds_mo['act_gsl'] = (('lat', 'lon'), prep_mo_data_act('GS_Max').data)
ds_mo['act_gsu'] = (('lat', 'lon'), prep_mo_data_act('GS_Mean').data)
ds_mo['act_h'] = (('lat', 'lon'), prep_mo_data_act('HM').data)
ds_mo['act_e'] = (('lat', 'lon'), prep_mo_data_act('SD').data)
ds_mo['act_w'] = (('lat', 'lon'), prep_mo_data_act('WK').data)

ds_mo['pot_gs1l'] = (('lat', 'lon'), prep_mo_data('GS_Mean1', 'pot').data)
ds_mo['pot_gs2l'] = (('lat', 'lon'), prep_mo_data('GS_Mean2', 'pot').data)
ds_mo['pot_gs1u'] = (('lat', 'lon'), prep_mo_data('GS_Max1', 'pot').data)
ds_mo['pot_gs2u'] = (('lat', 'lon'), prep_mo_data('GS_Max2', 'pot').data)
ds_mo['pot_h1'] = (('lat', 'lon'), prep_mo_data('HM1', 'pot').data)
ds_mo['pot_h2'] = (('lat', 'lon'), prep_mo_data('HM2', 'pot').data)
ds_mo['pot_e1'] = (('lat', 'lon'), prep_mo_data('SD1', 'pot').data)
ds_mo['pot_e2'] = (('lat', 'lon'), prep_mo_data('SD2', 'pot').data)
ds_mo['pot_w1'] = (('lat', 'lon'), prep_mo_data('WK1', 'pot').data)
ds_mo['pot_w2'] = (('lat', 'lon'), prep_mo_data('WK2', 'pot').data)


# Create arrays with mean of actual and potential
da_mo_cveg_act = ds_mo[[i for i in ds_mo.data_vars if 'act' in i]] \
    .to_array(dim='s').mean('s')
da_mo_cveg_pot = ds_mo[[i for i in ds_mo.data_vars if 'pot' in i]] \
    .to_array(dim='s').mean('s')

# Create dataset with only with actual and potential mean arrays
ds_mo_out = xr.Dataset(dict(lat=prep_mo_data('SD1', 'pot').lat, 
                            lon=prep_mo_data('SD1', 'pot').lon))

ds_mo_out = ds_mo_out.assign(mo_cveg_act = da_mo_cveg_act,
                             mo_cveg_pot = da_mo_cveg_pot)

# Export as zarr
ds_mo_out.to_zarr(os.path.join(dir05, 'fig_example/ds_mo_preped.zarr'),
              mode='w');

---

### Erb

In [None]:
def prep_erb_tif(file):
    
    """Get and prepare erb data"""
    
    # Get data
    da = rioxarray.open_rasterio(os.path.join(dir_data + f'erb2018/{file}'))
    # Set non-land grid cells to nan and rename coordinates
    return (da.where(da != da.attrs['_FillValue']) * 0.01)[0] \
        .drop_vars(['band', 'spatial_ref']) \
        .rename(dict(y='lat', x='lon'))

# Dataset with actual carbon densities
ds_erb_act = xr.Dataset()
for i in ['A', 'B', 'C', 'D', 'E', 'F', 'G']:
    da = prep_erb_tif(f'ExtDat_Fig3{i}_gcm.tif')
    ds_erb_act[f'erb2018_fig3{i.lower()}'] = da

# Dataset with potential carbon densities
ds_erb_pot = xr.Dataset()
for i in ['A', 'B', 'C', 'D', 'E', 'F']:
    da = prep_erb_tif(f'ExtDat_Fig4{i}_gcm.tif')
    ds_erb_pot[f'erb2018_fig4{i.lower()}'] = da

# Calculate mean of actual and potential
da_erb_cveg_act = ds_erb_act.to_array().mean('variable')
da_erb_cveg_pot = ds_erb_pot.to_array().mean('variable')

# Dataset with mean arrays
ds_erb_out = xr.Dataset()
ds_erb_out = ds_erb_out.assign(erb_cveg_act = da_erb_cveg_act)
ds_erb_out = ds_erb_out.assign(erb_cveg_pot = da_erb_cveg_pot)

# Export
ds_erb_out.to_zarr(os.path.join(dir05, 'fig_example/ds_erb_preped.zarr'),
              mode='w');

---

### Sanderman

In [None]:
def prep_sand_tif(base_dir, file_name):
    
    """Get and prepare Sanderman et al. 2017 data"""
    # Get data
    file_path = os.path.join(base_dir, file_name)
    da = rioxarray.open_rasterio(file_path)
    # Set non-land grid cells to NaN and rename coordinates
    return da.where(da != da.attrs['_FillValue'])[0] \
        .drop_vars(['band', 'spatial_ref']) \
        .rename(dict(y='lat', x='lon'))

# Base directory for Sanderman data
sanderman_dir = os.path.join(dir_data, 'sanderman2017')

# Load datasets
da_sand_act = prep_sand_tif(sanderman_dir, 'SOCS_0_30cm_year_2010AD_10km.tif')
da_sand_pot = prep_sand_tif(sanderman_dir, 'SOCS_0_30cm_year_NoLU_10km.tif')

# Combine into a single dataset
ds_sand_out = xr.Dataset({
    'sand_soc_act': da_sand_act,
    'sand_soc_pot': da_sand_pot,
})

# Export
ds_sand_out.to_zarr(os.path.join(dir05, 'fig_example/ds_sand_preped.zarr'),
                   mode='w');

---

### Check Walker vs. Sanderman (not in paper)

In [None]:
# Libraries
import matplotlib.pyplot as plt
import rioxarray

In [None]:
# Directories
dir_data = '../data/'

In [None]:
# sanderman actual
da_sand_act = rioxarray.open_rasterio(
    dir_data + 'sanderman2017/SOCS_0_200cm_year_2010AD_10km.tif', 
    chunks = dict(y=5000, x=5000))

da_sand_act = da_sand_act.rename(y='lat', x='lon')
da_sand_act = da_sand_act.where(da_sand_act != da_sand_act.attrs['_FillValue'])

# sanderman potential
da_sand_pot = rioxarray.open_rasterio(
    dir_data + 'sanderman2017/SOCS_0_200cm_year_NoLU_10km.tif', 
    chunks = dict(y=5000, x=5000))

da_sand_pot = da_sand_pot.rename(y='lat', x='lon')
da_sand_pot = da_sand_pot.where(da_sand_pot != da_sand_pot.attrs['_FillValue'])

In [None]:
#walker actual
da_walk_act = rioxarray.open_rasterio(
    dir_data + 'walker2022/Base_Cur_SOC_MgCha_500m.tif', 
    chunks = dict(y=5000, x=5000))

da_walk_act = da_walk_act \
    .rio.reproject('epsg:4326') \
    .chunk(dict(y=5000, x=5000)) \
    .rename(y='lat', x='lon')

da_walk_act = da_walk_act.where(da_walk_act != da_walk_act.attrs['_FillValue'])

# walker potential
da_walk_pot = rioxarray.open_rasterio(
    dir_data + 'walker2022/Base_Pot_SOC_MgCha_500m.tif', 
    chunks = dict(y=5000, x=5000))

da_walk_pot = da_walk_pot \
    .rio.reproject('epsg:4326') \
    .chunk(dict(y=5000, x=5000)) \
    .rename(y='lat', x='lon')

da_walk_pot = da_walk_pot.where(da_walk_pot != da_walk_pot.attrs['_FillValue'])

In [None]:
# function to plot walker and sanderman
def plot_sand_walk(latmin, latmax, lonmin, lonmax):
    da_sand_act_sel = da_sand_act[0] \
        .sel(lat=slice(latmax, latmin), lon=slice(lonmin, lonmax))
    da_sand_pot_sel = da_sand_pot[0] \
        .sel(lat=slice(latmax, latmin), lon=slice(lonmin, lonmax))
    da_walk_act_sel = da_walk_act[0] \
        .sel(lat=slice(latmax, latmin), lon=slice(lonmin, lonmax))
    da_walk_pot_sel = da_walk_pot[0] \
        .sel(lat=slice(latmax, latmin), lon=slice(lonmin, lonmax))

    fig, axs = plt.subplots(figsize=(10, 10), ncols=2, nrows=3)
    axs = axs.ravel()

    da_sand_act_sel.plot.imshow(ax=axs[0])
    da_walk_act_sel.plot.imshow(ax=axs[1])

    da_sand_pot_sel.plot.imshow(ax=axs[2])
    da_walk_pot_sel.plot.imshow(ax=axs[3])

    (da_sand_pot_sel - da_sand_act_sel).plot.imshow(ax=axs[4], vmin=0)
    (da_walk_pot_sel - da_walk_act_sel).plot.imshow(ax=axs[5], vmin=0)
    
    plt.tight_layout();

In [None]:
plot_sand_walk(40, 50, 5, 15)

In [None]:
plot_sand_walk(46, 48, 9, 11)

In [None]:
plot_sand_walk(46, 46.25, 10, 10.25)

In [None]:
cluster.close()