# Calculate ecozone carbon stocks for 1700 primary land area

In [None]:
# Directories
dir_data = '../data/'
dir05x = '../paper_deficit/output/05_prep_other/fig_dgvm/'

In [None]:
# Libraries
import os
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd

---

In [None]:
# Get data
ds = xr.open_dataset(os.path.join(dir05x, 'dgvms_and_other_luh2res.nc'))

In [None]:
# Ecozone shapefile
shp = gpd.read_file(dir_data + 'fao2010gez/gez_2010_wgs84.shp')

# Group regions
list_trop1 = [11]
list_trop2 = [12, 13, 14, 15, 16]
list_subt = [21, 22, 23, 24, 25]
list_temp = [31, 32, 33, 34, 35]
list_bore = [41, 42, 43]
list_pola = [50]

# Assign unique ecozone values and names
shp = shp.assign(code2 = np.nan)
shp.loc[shp.gez_code.isin(list_trop1), 'code2'] = 0
shp.loc[shp.gez_code.isin(list_trop2), 'code2'] = 1
shp.loc[shp.gez_code.isin(list_subt), 'code2'] = 2
shp.loc[shp.gez_code.isin(list_temp), 'code2'] = 3
shp.loc[shp.gez_code.isin(list_bore), 'code2'] = 4
shp.loc[shp.gez_code.isin(list_pola), 'code2'] = 5
shp.loc[shp.gez_code.isin(list_trop1), 'name'] = 'trop1'
shp.loc[shp.gez_code.isin(list_trop2), 'name'] = 'trop2'
shp.loc[shp.gez_code.isin(list_subt), 'name'] = 'subt'
shp.loc[shp.gez_code.isin(list_temp), 'name'] = 'temp'
shp.loc[shp.gez_code.isin(list_bore), 'name'] = 'bore'
shp.loc[shp.gez_code.isin(list_pola), 'name'] = 'pola'

In [None]:
def prep_ds_carbon_sum_luh2res(m, cat):
    """ Calculate carbon stocks on primary land of dgvm"""

    # Get data
    dsm = ds[[i for i in ds.data_vars 
              if i.startswith(m) and i.endswith(('cveg', 'csoil'))]]
    da_land = ds[f'{m}_land']
    da_prim = ds.luh2_primary_1700
    da_area = ds.grid_cell_area_ha
    da_fao = ds[f'fao2010_{cat}']

    # Create primary land mask
    da_prim_land = xr.where(da_prim == da_prim, 1, 0)
    
    # Calculate carbon stocks
    ds_sum = (dsm * da_land * da_prim * da_prim_land * da_area) \
        .rio.write_crs('4326') \
        .rename(lat='y', lon='x') \
        .rio.clip(shp[shp.name == cat].geometry, all_touched=False) \
        .sum(['y', 'x']) * 0.000000001

    # Create dataframe with carbon stocks of vegetation, soil of
    # s2 and s3 simulations
    df_sum = pd.DataFrame()
    df_sum = df_sum.assign(model = [m], 
                           cat = [cat])
    for i in ['s2_cveg', 's3_cveg', 's2_csoil', 's3_csoil']:
        if m + '_' + i in ds_sum.data_vars:
            df_sum[i] = [ds_sum[m + '_' + i].values.astype(float).round(2)]
        
    return df_sum

In [None]:
# List of dgvms
list_dgvm = ['cablepop', 'classic', 'clm', 'dlem', 'ibis', 'isam', 'jsbach', 
             'jules', 'lpjguess', 'lpjwsl', 'orchidee']

# List of dgvms and other data
list_m = ['ganzenmueller', 'erb', 'walker', 'mo', 'sanderman030', 
          'sanderman100', 'sanderman200', * list_dgvm]

# List of ecozones
list_cat = ['trop1', 'trop2', 'subt', 'temp', 'bore', 'pola']

# Create dataframe with carbon stocks on primary land data of 
# dgvms, this study, walker, erb, and sanderman
df_sum = pd.concat([prep_ds_carbon_sum_luh2res(m, cat) 
           for m in list_m 
           for cat in list_cat])

df_sum_dgvm = df_sum[df_sum.model.isin(list_dgvm)]

# Add rows for dgvm mean
for cat in np.unique(df_sum.cat):
    new_row = ['dgvm_mean', cat, 
               *df_sum_dgvm[df_sum_dgvm.cat == cat].iloc[:,2:].mean(axis=0)]
    df_sum.loc[len(df_sum)] = new_row

# Column vegetation deficit
df_sum['cveg_d'] = df_sum.s3_cveg - df_sum.s2_cveg
# Colum vegetationd deficit in %
df_sum['cveg_dp'] = 1-(df_sum.s3_cveg / df_sum.s2_cveg)
# Column soil deficit
df_sum['csoil_d'] = df_sum.s3_csoil - df_sum.s2_csoil
# Column soil deficit in %
df_sum['csoil_dp'] =1-( df_sum.s3_csoil / df_sum.s2_csoil)
# Select relevant columns
df_sum[['model', 'cat', 's2_cveg', 's3_cveg', 'cveg_d', 'cveg_dp', 
        's2_csoil', 's3_csoil', 'csoil_d', 'csoil_dp']] \
    .reset_index(drop=True) \
    .round(2)
# Export
df_sum.to_csv(os.path.join(dir05x, 'data_dgvm_ecozone.csv'), index=False)

df_sum

---

### Check difference between global values and sum of ecosystem values

In [None]:
# Get data
df_g = pd.read_csv(os.path.join(dir05x, 'data_dgvm_global.csv'))
df_e = pd.read_csv(os.path.join(dir05x, 'data_dgvm_ecozone.csv'))

# Calculate differences
df_g = df_g.set_index('model')
df_e2 = df_e.groupby('model').sum().drop(columns='cat').round(2)
df_g.subtract(df_e2)[df_g.columns]
# differences are quite small for deficit estimates