# Calculate global carbon stocks for 1700 primary land area

In [None]:
# Libraries
import os
import pandas as pd
import xarray as xr

In [None]:
# Directories
dir05x = '../paper_deficit/output/05_prep_other/fig_dgvm/'

---

In [None]:
# Get data
ds = xr.open_dataset(os.path.join(dir05x, 'dgvms_and_other_luh2res.nc'))

In [None]:
def prep_ds_carbon_sum_luh2res(m):
    """Calculate carbon stocks on primary land of dgvm"""

    # Get data
    dsm = ds[[i for i in ds.data_vars 
              if i.startswith(m) and i.endswith(('cveg', 'csoil'))]]
    da_land = ds[f'{m}_land']
    da_prim = ds.luh2_primary_1700
    da_area = ds.grid_cell_area_ha

    # Create primary land mask
    da_prim_land = xr.where(da_prim == da_prim, 1, 0)
    # Calculate carbon stocks
    ds_sum = (dsm * da_land * da_prim * da_prim_land * da_area) \
        .sum(['lat', 'lon']) * 0.000000001
    # Create dataframe with carbon stocks of vegetation, soil and litter of
    # s2 and s3 simulations
    df_sum = pd.DataFrame()
    df_sum['model'] = [m]
    for i in ['s2_cveg', 's3_cveg', 's2_csoil', 's3_csoil', 's2_clit', 's3_clit']:
        if m + '_' + i in ds_sum.data_vars:
            df_sum[i] = [ds_sum[m + '_' + i].values]
            df_sum[i] = df_sum[i].astype('float32')
    return df_sum

In [None]:
# List of dgvms
list_dgvm = ['cablepop', 'classic', 'clm', 'dlem', 'ibis', 'isam', 'jsbach', 
             'jules', 'lpjguess', 'lpjwsl', 'orchidee']
# List of dgvms and other data
list_m = ['ganzenmueller', 'erb', 'walker', 'mo', 'sanderman030', 
          'sanderman100', 'sanderman200', * list_dgvm]

# Create dataframe with carbon stocks on primary land data of 
# dgvms, this study, walker, erb, and sanderman
df_sum = pd.concat([prep_ds_carbon_sum_luh2res(m) for m in list_m])

# Add rows for dgvm mean
df_sum_dgvm = df_sum[df_sum.model.isin(list_dgvm)]
new_row = ['dgvm_mean', *df_sum_dgvm.iloc[:,1:].mean(axis=0)]
df_sum.loc[len(df_sum)] = new_row

# Column vegetation deficit
df_sum['cveg_d'] = df_sum.s3_cveg - df_sum.s2_cveg
# Colum vegetationd deficit in %
df_sum['cveg_dp'] = 1-(df_sum.s3_cveg / df_sum.s2_cveg)
# Column soil deficit
df_sum['csoil_d'] = df_sum.s3_csoil - df_sum.s2_csoil
# Column soil deficit in %
df_sum['csoil_dp'] =1-( df_sum.s3_csoil / df_sum.s2_csoil)
# Select and order relevant columns
df_sum = df_sum[['model', 's2_cveg', 's3_cveg', 'cveg_d', 'cveg_dp',
                 's2_csoil', 's3_csoil', 'csoil_d', 'csoil_dp']] \
    .reset_index(drop=True) \
    .round(2)

# Export
df_sum.to_csv(os.path.join(dir05x, 'data_dgvm_global.csv'), index=False)

df_sum

---

### Calculate based on original resolution (should be the same result)

In [None]:
# Libraries
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
import dask

# Initialize dask
cluster = SLURMCluster(
    queue='compute',                      # SLURM queue to use
    cores=24,                             # Number of CPU cores per job
    memory='256 GB',                      # Memory per job
    account='bm0891',                     # Account allocation
    interface="ib0",                      # Network interface for communication
    walltime='02:00:00',                  # Maximum runtime per job
    local_directory='../dask/',           # Directory for local storage
    job_extra_directives=[                # Additional SLURM directives for logging
        '-o ../dask/LOG_worker_%j.o',     # Output log
        '-e ../dask/LOG_worker_%j.e'      # Error log
    ]
)

# Scale dask cluster
cluster.scale(jobs=2)

# Configurate dashboard url
dask.config.config.get('distributed').get('dashboard').update(
    {'link': '{JUPYTERHUB_SERVICE_PREFIX}/proxy/{port}/status'}
)

# Create client
client = Client(cluster)

client

In [None]:
def prep_ds_carbon_sum(m):
    
    """Calculate carbon stocks on primary land of dgvm"""
    
    # Get data
    if m in list_dgvm:
        ds = xr.open_dataset(os.path.join(dir05x, m, f'ds_{m}.nc'))
        ds_land = xr.open_dataset(os.path.join(dir05x, m, f'ds_{m}_land.nc'))
        ds_prim = xr.open_dataset(os.path.join(dir05x, m, f'ds_{m}_prim.nc'))
        ds_prim_land = xr.open_dataset(os.path.join(dir05x, m, f'ds_luh2_land_{m}res.nc'))
        ds_area = xr.open_dataset(os.path.join(dir05x, m, f'ds_{m}_grid_cell_area.nc'))
        da_prim_land = ds_prim_land.land_sea_mask

    if m in list_other:
        ds = xr.open_zarr(os.path.join(dir05x, m, f'ds_{m}.zarr')) \
            .chunk(dict(lat=5000, lon=5000))
        ds_land = xr.open_zarr(os.path.join(dir05x, m, f'ds_{m}_land.zarr')) \
            .chunk(dict(lat=5000, lon=5000))
        ds_prim = xr.open_zarr(os.path.join(dir05x, m, f'ds_{m}_prim.zarr')) \
            .chunk(dict(lat=5000, lon=5000))
        ds_area = xr.open_zarr(os.path.join(dir05x, m, f'ds_{m}_grid_cell_area.zarr')) \
            .chunk(dict(lat=5000, lon=5000))
        da_prim_land = xr.where(ds_prim == ds_prim, 1, 0) \
            .prim_1700.rename('land_sea_mask')

    # Create arrays from datasets
    da_land = ds_land.land_sea_mask
    da_prim = ds_prim.prim_1700
    da_area = ds_area.grid_cell_area_ha

    # Calculate carbon stocks
    ds_sum = (ds * da_land.data * da_prim.data * da_prim_land.data * da_area.data) \
        .sum(['lat', 'lon']) * 0.000000001
    
    # Create dataframe with carbon stocks of vegetation, soil and litter of
    # s2 and s3 simulations
    df_sum = pd.DataFrame()
    df_sum['model'] = [m]
    for i in ['s2_cveg', 's3_cveg', 's2_csoil', 's3_csoil', 's2_clit', 
              's3_clit']:
        if m + '_' + i in ds_sum.data_vars:
            df_sum[i] = [ds_sum[m + '_' + i].values]
            df_sum[i] = df_sum[i].astype('float32')
    return df_sum

In [None]:
# List of dgvms
list_dgvm = ['cablepop', 'classic', 'clm', 'dlem', 'ibis', 'isam', 'jsbach', 
             'jules', 'lpjguess', 'lpjwsl', 'orchidee']
# List of other data
list_other = ['pot', 'erb', 'walker', 'mo', 'sanderman030', 'sanderman100',
              'sanderman200']
# List of dgvms and other data
list_m = [*list_other, * list_dgvm]

# Create dataframe with carbon stocks on primary land data of 
# dgvms, this study, walker, erb, and sanderman
df_sum = pd.concat([prep_ds_carbon_sum(m) for m in list_m])

# Add rows for dgvm mean
df_sum_dgvm = df_sum[df_sum.model.isin(list_dgvm)]
new_row = ['dgvm_mean', *df_sum_dgvm.iloc[:,1:].mean(axis=0)]
df_sum.loc[len(df_sum)] = new_row

# Column vegetation deficit
df_sum['cveg_d'] = df_sum.s3_cveg - df_sum.s2_cveg
# Colum vegetationd deficit in %
df_sum['cveg_dp'] = 1-(df_sum.s3_cveg / df_sum.s2_cveg)
# Column soil deficit
df_sum['csoil_d'] = df_sum.s3_csoil - df_sum.s2_csoil
# Column soil deficit in %
df_sum['csoil_dp'] =1-( df_sum.s3_csoil / df_sum.s2_csoil)
# Column litter deficit
df_sum['clit_d'] = df_sum.s3_clit - df_sum.s2_clit
# Column litter deficit in %
df_sum['clit_dp'] = 1-(df_sum.s3_clit / df_sum.s2_clit)
# Select relevant columns
df_sum = df_sum[['model', 's2_cveg', 's3_cveg', 'cveg_d', 'cveg_dp', 
                 's2_csoil', 's3_csoil', 'csoil_d', 'csoil_dp',
                 's2_clit', 's3_clit', 'clit_d', 'clit_dp']] \
    .reset_index(drop=True) \
    .round(2)
# Export
#df_sum.to_csv(os.path.join(dir05x, 'data_dgvm_global.csv'), index=False)

df_sum