In [None]:
import numpy as np
import xarray as xr
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt


In [None]:
dir_ELUC       = '/GCB2021/Data/Output_GCB2021/202109_GCB2021_run/run_v1_1700-2021/'
dir_ctrs       = '/Trendy/Data/data_ancillary/info_countries/'
dir_grids      = '/Trendy/Data/grids/'
dir_ELUC_2021  = '/GCB2021_commentary/Data/ELUC_countries/'
dir_ELUC_NGHGI = '/GCB2021_commentary/Data/data_ELUC_NGHGI/'
dir_peat       = '/GCB2021/Data/Peat_data/'
dir_tmp        = '/Trendy/Data/tmp/'


## Calculate gridded ELUC

In [None]:
#Select time
time_sta = '2000'
time_end = '2020'

#Define necessary variables
variables = ['CD_A', 'CD_A_c', 'CD_A_p', 'CD_A_dec_h', 'CD_A_deg_d', 'CD_A_a', 'CD_A_reg_h', 'CD_A_deg_rec_d', 'cell_area']

#Read data
file_2021_HIS = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD_PerHistoryType.nc'
file_2021_COV = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD_PerCoverType.nc'
file_2021_SUM = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD.nc'
data_2021_HIS = xr.open_dataset(file_2021_HIS)
data_2021_COV = xr.open_dataset(file_2021_COV)
data_2021_SUM = xr.open_dataset(file_2021_SUM)

#Select in time period
data_2021_HIS = data_2021_HIS.sel(time=slice(time_sta, None))
data_2021_COV = data_2021_COV.sel(time=slice(time_sta, None))
data_2021_SUM = data_2021_SUM.sel(time=slice(time_sta, None))

#Select variables to extract for PerHistoryType file
variables_drop_HIS = [variab for variab in data_2021_HIS.data_vars if variab not in variables]
variables_drop_COV = [variab for variab in data_2021_COV.data_vars if variab not in variables]
variables_drop_SUM = [variab for variab in data_2021_SUM.data_vars if variab not in variables]

data_2021_HIS = data_2021_HIS.drop(variables_drop_HIS)
data_2021_COV = data_2021_COV.drop(variables_drop_COV)
data_2021_SUM = data_2021_SUM.drop(variables_drop_SUM)

data_sources = data_2021_COV['CD_A_c'] + data_2021_COV['CD_A_p'] + data_2021_HIS['CD_A_dec_h'] + data_2021_HIS['CD_A_deg_d']
data_sinks   = data_2021_HIS['CD_A_a'] + data_2021_HIS['CD_A_reg_h'] + data_2021_HIS['CD_A_deg_rec_d']
data_net     = data_2021_SUM['CD_A']

#Calculate ELUC
BLUE_ELUC_sources = data_sources.diff('time')
BLUE_ELUC_sinks   = data_sinks.diff('time')
BLUE_ELUC_net     = data_net.diff('time')
BLUE_ELUC_sources['time'] = BLUE_ELUC_sources['time'] - 1
BLUE_ELUC_sinks['time']   = BLUE_ELUC_sinks['time'] - 1
BLUE_ELUC_net['time']     = BLUE_ELUC_net['time'] - 1
BLUE_ELUC_sources = BLUE_ELUC_sources.sel(time=slice(time_sta, time_end))
BLUE_ELUC_sinks   = BLUE_ELUC_sinks.sel(time=slice(time_sta, time_end))
BLUE_ELUC_net     = BLUE_ELUC_net.sel(time=slice(time_sta, time_end))

#Convert to dataset and set units
BLUE_ELUC_sources = BLUE_ELUC_sources.to_dataset(name='ELUC')
BLUE_ELUC_sinks   = BLUE_ELUC_sinks.to_dataset(name='ELUC')
BLUE_ELUC_net     = BLUE_ELUC_net.to_dataset(name='ELUC')
BLUE_ELUC_sources.ELUC.attrs['unit'] = 'Tg C/year'
BLUE_ELUC_sinks.ELUC.attrs['unit']   = 'Tg C/year'
BLUE_ELUC_net.ELUC.attrs['unit']     = 'Tg C/year'

#Define compression level
comp = dict(zlib=True, complevel=2)

#Save in file
fname_out_sources = dir_ELUC_2021 + 'ELUC_BLUE_GCB2021_ELUC-sources-density_' + time_sta + '-' + time_end + '.nc'
fname_out_sinks = dir_ELUC_2021 + 'ELUC_BLUE_GCB2021_ELUC-sinks-density_' + time_sta + '-' + time_end + '.nc'
fname_out_net = dir_ELUC_2021 + 'ELUC_BLUE_GCB2021_ELUC-net-density_' + time_sta + '-' + time_end + '.nc'
if os.path.exists(fname_out_sources): os.remove(fname_out_sources)
if os.path.exists(fname_out_sinks): os.remove(fname_out_sinks)
if os.path.exists(fname_out_net): os.remove(fname_out_net)
BLUE_ELUC_sources.to_netcdf(fname_out_sources, encoding={var: comp for var in BLUE_ELUC_sources.data_vars})
BLUE_ELUC_sinks.to_netcdf(fname_out_sinks, encoding={var: comp for var in BLUE_ELUC_sinks.data_vars})
BLUE_ELUC_net.to_netcdf(fname_out_net, encoding={var: comp for var in BLUE_ELUC_net.data_vars})


## Read ELUC BLUE (GCB2021, without peat)

In [None]:
time_sta = 2000
time_end = 2020
time_len = time_end - time_sta + 1

#Read ELUC data and peat data
fname_BLUE_snk = dir_ELUC_2021 + 'ELUC_BLUE_countries-ISOcode_ELUC-sinks_vRemapCountries_2000-2020.xlsx'
fname_BLUE_src = dir_ELUC_2021 + 'ELUC_BLUE_countries-ISOcode_ELUC-sources_vRemapCountries_2000-2020.xlsx'
ELUC_BLUE_snk = pd.read_excel(fname_BLUE_snk, header=0, index_col=0)
ELUC_BLUE_src = pd.read_excel(fname_BLUE_src, header=0, index_col=0)

#Select time
ELUC_BLUE_snk = ELUC_BLUE_snk.loc[(ELUC_BLUE_snk.index>=time_sta) & (ELUC_BLUE_snk.index<=time_end)]
ELUC_BLUE_src = ELUC_BLUE_src.loc[(ELUC_BLUE_src.index>=time_sta) & (ELUC_BLUE_src.index<=time_end)]

#Check time selection
if (ELUC_BLUE_snk.index[0]!=time_sta) | (ELUC_BLUE_snk.index[-1]!=time_end) | (len(ELUC_BLUE_snk.index)!=time_len):  sys.exit('Check time selection of BLUE!')
if (ELUC_BLUE_src.index[0]!=time_sta) | (ELUC_BLUE_src.index[-1]!=time_end) | (len(ELUC_BLUE_src.index)!=time_len):  sys.exit('Check time selection of BLUE!')

# UNITS: Tg C / year


## Read ELUC H&N2021 (GCB2021, without peat)

In [None]:
#Read ELUC data and peat data
fname_HN21_snk = dir_ELUC_NGHGI + 'HN2021_ELUC-sinks_GCB2021_countries.xlsx'
fname_HN21_src = dir_ELUC_NGHGI + 'HN2021_ELUC-sources_GCB2021_countries.xlsx'
ELUC_HN21_snk = pd.read_excel(fname_HN21_snk, header=0, index_col=0)
ELUC_HN21_src = pd.read_excel(fname_HN21_src, header=0, index_col=0)

#Select time
ELUC_HN21_snk = ELUC_HN21_snk.loc[(ELUC_HN21_snk.index>=time_sta) & (ELUC_HN21_snk.index<=time_end)]
ELUC_HN21_src = ELUC_HN21_src.loc[(ELUC_HN21_src.index>=time_sta) & (ELUC_HN21_src.index<=time_end)]

#Check time selection
if (ELUC_HN21_snk.index[0]!=time_sta) | (ELUC_HN21_snk.index[-1]!=time_end) | (len(ELUC_HN21_snk.index)!=time_len):  sys.exit('Check time selection of H&N2021!')
if (ELUC_HN21_src.index[0]!=time_sta) | (ELUC_HN21_src.index[-1]!=time_end) | (len(ELUC_HN21_src.index)!=time_len):  sys.exit('Check time selection of H&N2021!')

# UNITS: Tg C / year


## Prepare data

In [None]:
#Select time
time_sta = '2000'
time_end = '2020'

#Read ISO codes for countries, IPCC countries, and conversions between ISO alpha-3 codes from IPCC and ISO numeric
fname_ctrs_ISO   = dir_ctrs + 'wrld_cntrs_BLUE_TN_upd.nc'
fname_IPCC_codes = dir_ctrs + 'IPCC_regions.xlsx'
fname_ISO_num    = dir_ctrs + 'iso_codes_alpha_numeric.xlsx'
fname_cntrs_3LET = dir_ctrs + 'Country codes 3 letters.xlsx'
data_IPCC_codes = pd.read_excel(fname_IPCC_codes, sheet_name='region_classification', header=0, usecols=[0, 1, 3])
data_alph_num   = pd.read_excel(fname_ISO_num, header=0)
data_cntrs_3LET = pd.read_excel(fname_cntrs_3LET, sheet_name=0, header=None, index_col=0)

#Regrid country ISO code to model grid
file_grid      = dir_grids + 'grid_xy_BLUE'
fname_ctrs_reg = dir_tmp + 'wrld_cntrs_BLUE_TN_upd_on_BLUE_grid.nc'
if os.path.exists(fname_ctrs_reg): os.remove(fname_ctrs_reg)
os.system('cdo remaplaf,' + file_grid + ' ' + fname_ctrs_ISO + ' ' + fname_ctrs_reg)

#Read regridded country ISO data
data_ctrs_ISO = xr.open_dataset(fname_ctrs_reg)

#Read BLUE ELUC on grid (sources and sinks)
fname_BLUE_snk_grid = dir_ELUC_2021 + 'ELUC_BLUE_GCB2021_ELUC-sinks-density_' + time_sta + '-' + time_end + '.nc'
fname_BLUE_src_grid = dir_ELUC_2021 + 'ELUC_BLUE_GCB2021_ELUC-sources-density_' + time_sta + '-' + time_end + '.nc'
data_BLUE_snk_grid = xr.open_dataset(fname_BLUE_snk_grid)
data_BLUE_src_grid = xr.open_dataset(fname_BLUE_src_grid)

#Read area of BLUE grid
fname_area = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD.nc'
data_area = xr.open_dataset(fname_area)
data_area = data_area.cell_area

#Get ISO values of all countries
ISO_values = np.unique(data_ctrs_ISO.ISOcode.values)
ISO_values = ISO_values[~np.isnan(ISO_values)]


## Calculation

In [None]:
version = ''

#List for collecting missing H&N countries
ctr_missing_HN = []
ctr_check = pd.DataFrame(columns=['test_perc', 'test_abs'])

#Loop over all countries/ISO values
create = 1
create2 = 1
for i, ISO_val in enumerate(ISO_values):
    
    if np.mod(i,10)==0:
        print('')
        print('Run ' + str(i+1) + ' of ' + str(len(ISO_values)), end=': ')
        
    print(int(ISO_val), end=', ')
    
    #Get country names
    country   = data_alph_num[data_alph_num['Numeric']==ISO_val]
    ctr_short = country['Alpha-3 code'].values[0]
    ctr_long  = data_cntrs_3LET.loc[ctr_short].item()

    #Adjust some country names to fit H&N2021 naming
    if "Bolivia (Plurinational State of)" in ctr_long:                        ctr_long = "Bolivia"
    elif "Congo, Democratic Republic of the" in ctr_long:                     ctr_long = "Democratic Republic of the Congo"
    elif "Côte d'Ivoire" in ctr_long:                                         ctr_long = "Cote d'Ivoire"
    elif "Czechia" in ctr_long:                                               ctr_long = "Czech Republic"
    elif "Guinea-Bissau" in ctr_long:                                         ctr_long = "Guinea Bissau"
    elif "Iran (Islamic Republic of)" in ctr_long:                            ctr_long = "Iran"
    elif "Korea (Democratic People's Republic of)" in ctr_long:               ctr_long = "Democratic People's Republic of the Korea"
    elif "Korea, Republic of" in ctr_long:                                    ctr_long = "Republic of Korea"
    elif "North Macedonia" in ctr_long:                                       ctr_long = "The former Yugoslav Republic of Macedonia"
    elif "Moldova, Republic of" in ctr_long:                                  ctr_long = "Republic of Moldova"
    elif "Tanzania, United Republic of" in ctr_long:                          ctr_long = "United Republic of Tanzania"
    elif "United Kingdom of Great Britain and Northern Ireland" in ctr_long:  ctr_long = "United Kingdom"
    elif "Venezuela (Bolivarian Republic of)" in ctr_long:                    ctr_long = "Venezuela"
    elif "Cabo Verde" in ctr_long:                                            ctr_long = "Cape Verde"  

    #Read H&N2021 country data
    try:
        sel_HN2021_snk = ELUC_HN21_snk[ctr_long].to_frame(ctr_short)
        sel_HN2021_src = ELUC_HN21_src[ctr_long].to_frame(ctr_short)
    except:
        sel_HN2021_snk = pd.DataFrame(np.NaN, columns=[ctr_short], index=ELUC_BLUE_snk.index)
        sel_HN2021_src = pd.DataFrame(np.NaN, columns=[ctr_short], index=ELUC_BLUE_src.index)
        sel_HN2021_snk = sel_HN2021_snk.rename_axis('index')
        sel_HN2021_src = sel_HN2021_src.rename_axis('index')
        ctr_missing_HN.append(ctr_long)
        
    #Read BLUE country data
    if ctr_short=='CHN':
        sel_BLUE_snk = ELUC_BLUE_snk['CHN'] + ELUC_BLUE_snk['TWN'] + ELUC_BLUE_snk['HKG']
        sel_BLUE_src = ELUC_BLUE_src['CHN'] + ELUC_BLUE_src['TWN'] + ELUC_BLUE_src['HKG']
        
    else:
        sel_BLUE_snk = ELUC_BLUE_snk[ctr_short].copy(deep=True)
        sel_BLUE_src = ELUC_BLUE_src[ctr_short].copy(deep=True)
    
    #Convert to data frame
    sel_BLUE_snk = sel_BLUE_snk.to_frame(ctr_short)
    sel_BLUE_src = sel_BLUE_src.to_frame(ctr_short)

    #Calculate scale factor
    scale_fac_snk = sel_HN2021_snk / sel_BLUE_snk
    scale_fac_src = sel_HN2021_src / sel_BLUE_src
    scale_fac_snk[np.isinf(scale_fac_snk)] = np.NaN
    scale_fac_src[np.isinf(scale_fac_src)] = np.NaN
    
    #Rename scale factor and set NaNs to 0
    scale_fac_snk = scale_fac_snk.rename(columns={ctr_short: 'scale_factor'})
    scale_fac_src = scale_fac_src.rename(columns={ctr_short: 'scale_factor'})
    scale_fac_snk[np.isnan(scale_fac_snk)] = 0
    scale_fac_src[np.isnan(scale_fac_src)] = 0
    
    #convert scale factor to xarray
    scale_snk_xr = scale_fac_snk.to_xarray()
    scale_src_xr = scale_fac_src.to_xarray()
    scale_snk_xr = scale_snk_xr.rename({'index': 'time'})
    scale_src_xr = scale_src_xr.rename({'index': 'time'})
    
    #Get gridded data in selected country (add Taiwan and Hongkong to China)
    if ctr_short=='CHN':
        sel1 = data_ctrs_ISO.ISOcode==data_alph_num[data_alph_num['Alpha-3 code']=='CHN']['Numeric'].values[0]
        sel2 = data_ctrs_ISO.ISOcode==data_alph_num[data_alph_num['Alpha-3 code']=='TWN']['Numeric'].values[0]
        sel3 = data_ctrs_ISO.ISOcode==data_alph_num[data_alph_num['Alpha-3 code']=='HKG']['Numeric'].values[0]
        sel_country = sel1 + sel2 + sel3
    else:
        sel_country = data_ctrs_ISO.ISOcode==ISO_val
        
    sel_BLUE_grid_snk = data_BLUE_snk_grid.where(sel_country, 0)
    sel_BLUE_grid_src = data_BLUE_src_grid.where(sel_country, 0)

    #Collect country selection for overall check
    if create2==1:
        data_mask_all = 1 * (data_ctrs_ISO.ISOcode==ISO_val)
        create2 = 0
    else:
        data_mask_all = data_mask_all + 1 * (data_ctrs_ISO.ISOcode==ISO_val)
    
    #Apply scale factor
    if version=='_vMean':
        sel_BLUE_grid_snk = sel_BLUE_grid_snk.sel(time=slice('2001', '2015'))
        sel_BLUE_grid_src = sel_BLUE_grid_src.sel(time=slice('2001', '2015'))
        scale_snk_xr      = scale_snk_xr.sel(time=slice('2001', '2015'))
        scale_src_xr      = scale_src_xr.sel(time=slice('2001', '2015'))
        data_HN21_snk_scaled = sel_BLUE_grid_snk.mean('time') * scale_snk_xr.scale_factor.mean('time')
        data_HN21_src_scaled = sel_BLUE_grid_src.mean('time') * scale_src_xr.scale_factor.mean('time')
    else:
        data_HN21_snk_scaled = sel_BLUE_grid_snk * scale_snk_xr.scale_factor
        data_HN21_src_scaled = sel_BLUE_grid_src * scale_src_xr.scale_factor
    
    #Distribute HN2021 ELUC sinks equally in country if BLUE ELUC is 0
    if np.any(sel_BLUE_snk==0):
        
        #Get countr area,  calculate sink density in country and sSelect times when BLUE is 0
        area_ctr = (data_area * sel_country).sum(('lat', 'lon')).values.item()
        density_snk = sel_HN2021_snk / area_ctr * 1e6 # unit: tC / ha
        density_snk[np.isnan(density_snk)] = 0
        density_snk_xr = density_snk.to_xarray().rename({'index': 'time'})
        density_snk_grid = density_snk_xr * sel_country
        density_snk_grid = density_snk_grid.rename({ctr_short: 'ELUC'})        
        select_time_snk = (sel_BLUE_snk!=0).to_xarray()
        select_time_snk = select_time_snk.rename({ctr_short: 'ELUC', [name for name in select_time_snk.coords][0]:'time'})
        
        #Add calculated density to grid
        data_HN21_snk_scaled = data_HN21_snk_scaled.where(select_time_snk, density_snk_grid)
        
    #Distribute HN2021 ELUC sources equally in country if BLUE ELUC is 0
    if np.any(sel_BLUE_src==0):
        
        #Get countr area, calculate sink density in country and sSelect times when BLUE is 0
        area_ctr = (data_area * sel_country).sum(('lat', 'lon')).values.item()
        density_src = sel_HN2021_src / area_ctr * 1e6 # unit: tC / ha
        density_src[np.isnan(density_src)] = 0
        density_src_xr = density_src.to_xarray().rename({'index': 'time'})
        density_src_grid = density_src_xr * sel_country
        density_src_grid = density_src_grid.rename({ctr_short: 'ELUC'})
        select_time_src = (sel_BLUE_src!=0).to_xarray()
        select_time_src = select_time_src.rename({ctr_short: 'ELUC', [name for name in select_time_src.coords][0]:'time'})
        
        #Add calculated density to grid
        data_HN21_src_scaled = data_HN21_src_scaled.where(select_time_src, density_src_grid)
        
    #Collect in dataset
    if create==1:
        data_HN2021_grid_snk = data_HN21_snk_scaled
        data_HN2021_grid_src = data_HN21_src_scaled
        create = 0
    else:
        data_HN2021_grid_snk = data_HN2021_grid_snk + ds_drain
        data_HN2021_grid_src = data_HN2021_grid_src + data_HN21_src_scaled

    if np.sum(~np.isnan(data_HN2021_grid_snk.isel(time=0).ELUC))==0:
        sdafasf        

        
#Define output file name
fname_out_snk = dir_ELUC_2021 + 'ELUC_H&N_GCB2021_ELUC-sinks-density_' + str(time_sta) + '-' + str(time_end) + version + '.nc'
fname_out_src = dir_ELUC_2021 + 'ELUC_H&N_GCB2021_ELUC-sources-density_' + str(time_sta) + '-' + str(time_end) + version + '.nc'
if os.path.exists(fname_out_snk): os.remove(fname_out_snk)
if os.path.exists(fname_out_src): os.remove(fname_out_src)
data_HN2021_grid_snk.to_netcdf(fname_out_snk)
data_HN2021_grid_src.to_netcdf(fname_out_src)
        