In [None]:
import numpy as np
import xarray as xr
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt


In [None]:
dir_ELUC  = '/GCB2021/Data/Output_GCB2021/202109_GCB2021_run/run_v1_1700-2021/'
dir_ctrs  = '/Trendy/Data/data_ancillary/info_countries/'
dir_grids = '/Trendy/Data/grids/'
dir_out   = '/GCB2021_commentary/Data/ELUC_countries/'
dir_tmp   = '/Trendy/Data/tmp/'


## Create BLUE grid

In [None]:
#Create reference grid
fname_in = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD.nc'
fname_grid = dir_out + 'grid_xy_BLUE'
os.system('cdo griddes -selvar,CD_A' + ' ' + fname_in + ' > ' + fname_grid)


## Calculate ELUC

In [None]:
# #Read ELUC data
# fname = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD.nc'
# data_read = xr.open_dataset(fname)

# #Select time
# time_sta = '2000'
# time_end = '2020'

# #Select time period and variable
# data_read = data_read.sel(time=slice(time_sta, None))
# data_ELUC = data_read['CD_A']

# #Calculate ELUC
# data_ELUC = data_ELUC.diff('time')
# data_ELUC['time'] = data_ELUC['time'] - 1
# data_ELUC = data_ELUC.sel(time=slice(time_sta, time_end))

# #Convert from tC/ha to Tg C
# data_ELUC = data_ELUC * data_read.cell_area / 1e6
# data_ELUC = data_ELUC.to_dataset(name='ELUC')
# data_ELUC.ELUC.attrs['unit'] = 'Tg C/year'

# #Define compression level
# comp = dict(zlib=True, complevel=2)
# encoding = {var: comp for var in data_ELUC.data_vars}

# #Save in file
# fname_out = dir_out + 'ELUC_BLUE_GCB2021_' + time_sta + '-' + time_end + '.nc'
# data_ELUC.to_netcdf(fname_out, encoding=encoding)


In [None]:
#Select time
time_sta = '2000'
time_end = '2020'

#Define necessary variables
variables = ['CD_A', 'CD_A_c', 'CD_A_p', 'CD_A_dec_h', 'CD_A_deg_d', 'CD_A_a', 'CD_A_reg_h', 'CD_A_deg_rec_d', 'cell_area']

#Read data
file_2021_HIS = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD_PerHistoryType.nc'
file_2021_COV = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD_PerCoverType.nc'
file_2021_SUM = dir_ELUC + '202109_GCB2021_run_v1_1700-2021__CurrentCPools_CD.nc'
data_2021_HIS = xr.open_dataset(file_2021_HIS)
data_2021_COV = xr.open_dataset(file_2021_COV)
data_2021_SUM = xr.open_dataset(file_2021_SUM)

#Select in time period
data_2021_HIS = data_2021_HIS.sel(time=slice(time_sta, None))
data_2021_COV = data_2021_COV.sel(time=slice(time_sta, None))
data_2021_SUM = data_2021_SUM.sel(time=slice(time_sta, None))

#Select variables to extract for PerHistoryType file
variables_drop_HIS = [variab for variab in data_2021_HIS.data_vars if variab not in variables]
variables_drop_COV = [variab for variab in data_2021_COV.data_vars if variab not in variables]
variables_drop_SUM = [variab for variab in data_2021_SUM.data_vars if variab not in variables]

data_2021_HIS = data_2021_HIS.drop(variables_drop_HIS)
data_2021_COV = data_2021_COV.drop(variables_drop_COV)
data_2021_SUM = data_2021_SUM.drop(variables_drop_SUM)

data_sources = data_2021_COV['CD_A_c'] + data_2021_COV['CD_A_p'] + data_2021_HIS['CD_A_dec_h'] + data_2021_HIS['CD_A_deg_d']
data_sinks   = data_2021_HIS['CD_A_a'] + data_2021_HIS['CD_A_reg_h'] + data_2021_HIS['CD_A_deg_rec_d']
data_net     = data_2021_SUM['CD_A']

#Calculate ELUC
BLUE_ELUC_sources = data_sources.diff('time')
BLUE_ELUC_sinks   = data_sinks.diff('time')
BLUE_ELUC_net     = data_net.diff('time')
BLUE_ELUC_sources['time'] = BLUE_ELUC_sources['time'] - 1
BLUE_ELUC_sinks['time']   = BLUE_ELUC_sinks['time'] - 1
BLUE_ELUC_net['time']     = BLUE_ELUC_net['time'] - 1
BLUE_ELUC_sources = BLUE_ELUC_sources.sel(time=slice(time_sta, time_end))
BLUE_ELUC_sinks   = BLUE_ELUC_sinks.sel(time=slice(time_sta, time_end))
BLUE_ELUC_net     = BLUE_ELUC_net.sel(time=slice(time_sta, time_end))

#Convert from tC/ha to Tg C
BLUE_ELUC_sources = BLUE_ELUC_sources * data_2021_SUM.cell_area / 1e6
BLUE_ELUC_sinks   = BLUE_ELUC_sinks * data_2021_SUM.cell_area / 1e6
BLUE_ELUC_net     = BLUE_ELUC_net * data_2021_SUM.cell_area / 1e6
BLUE_ELUC_sources = BLUE_ELUC_sources.to_dataset(name='ELUC')
BLUE_ELUC_sinks   = BLUE_ELUC_sinks.to_dataset(name='ELUC')
BLUE_ELUC_net     = BLUE_ELUC_net.to_dataset(name='ELUC')
BLUE_ELUC_sources.ELUC.attrs['unit'] = 'Tg C/year'
BLUE_ELUC_sinks.ELUC.attrs['unit']   = 'Tg C/year'
BLUE_ELUC_net.ELUC.attrs['unit']     = 'Tg C/year'

#Define compression level
comp = dict(zlib=True, complevel=2)

#Save in file
fname_out_sources = dir_out + 'ELUC_BLUE_GCB2021_ELUC-sources_' + time_sta + '-' + time_end + '.nc'
fname_out_sinks = dir_out + 'ELUC_BLUE_GCB2021_ELUC-sinks_' + time_sta + '-' + time_end + '.nc'
fname_out_net = dir_out + 'ELUC_BLUE_GCB2021_ELUC-net_' + time_sta + '-' + time_end + '.nc'
BLUE_ELUC_sources.to_netcdf(fname_out_sources, encoding={var: comp for var in BLUE_ELUC_sources.data_vars})
BLUE_ELUC_sinks.to_netcdf(fname_out_sinks, encoding={var: comp for var in BLUE_ELUC_sinks.data_vars})
BLUE_ELUC_net.to_netcdf(fname_out_net, encoding={var: comp for var in BLUE_ELUC_net.data_vars})


## Calculation

In [None]:
#Select time
time_sta = '2000'
time_end = '2020'

#Read ISO codes for countries, IPCC countries, and conversions between ISO alpha-3 codes from IPCC and ISO numeric
fname_ctrs_ISO   = dir_ctrs + 'wrld_cntrs_BLUE_TN_upd.nc'
fname_IPCC_codes = dir_ctrs + 'IPCC_regions.xlsx'
fname_ISO_num    = dir_ctrs + 'iso_codes_alpha_numeric.xlsx'
data_IPCC_codes = pd.read_excel(fname_IPCC_codes, sheet_name='region_classification', header=0, usecols=[0, 1, 3])
data_alph_num   = pd.read_excel(fname_ISO_num, header=0)

#Define sources, sinks, and net
selections = ['sources', 'sinks', 'net']

#Loop over sources, sinks, and net
for selection in selections:

    #Define output file name
    fname_out = dir_out + 'ELUC_BLUE_countries-ISOcode_ELUC-' + selection + '_vRemapCountries_' + time_sta + '-' + time_end + '.xlsx'
    if os.path.exists(fname_out): os.remove(fname_out)

    #Create dicts for storing data
    ELUC_ctrs = dict()

    #Create xlsx-file (it will be filled at end of loop with country data from every model)
    with pd.ExcelWriter(fname_out) as writer:

        #Read ELUC data
        fname = dir_out + 'ELUC_BLUE_GCB2021_ELUC-' + selection + '_' + time_sta + '-' + time_end + '.nc'
        data_ELUC = xr.open_dataset(fname)

        #Regrid country ISO code to model grid
        file_grid      = dir_out + 'grid_xy_BLUE'
        fname_ctrs_reg = dir_tmp + 'wrld_cntrs_BLUE_TN_upd_on_BLUE_grid.nc'
        os.system('cdo remaplaf,' + file_grid + ' ' + fname_ctrs_ISO + ' ' + fname_ctrs_reg)

        #Read regridded data
        data_ctrs_ISO = xr.open_dataset(fname_ctrs_reg)

        #Get lat and lon names
        if 'latitude' in data_ELUC.dims:  lat_name, lon_name = 'latitude', 'longitude'
        else:                             lat_name, lon_name = 'lat', 'lon'

        #Check that model grid and country grid agree
        check_lat = np.max(np.abs(data_ELUC[lat_name].values - data_ctrs_ISO[lat_name].values))
        check_lon = np.max(np.abs(data_ELUC[lon_name].values - data_ctrs_ISO[lon_name].values))
        if check_lat>0.01:  sys.exit('Latitudes do not agree')
        if check_lon>0.01:  sys.exit('Longitudes do not agree')

        #Re-index if there are small deviations in lat and lon
        if (check_lat!=0) or (check_lon!=0):
            print('Country data are re-indexed for ' + model)
            data_ctrs_ISO = data_ctrs_ISO.reindex({lat_name: data_ELUC[lat_name], lon_name: data_ELUC[lon_name]}, method='nearest')       

        #Loop over all country codes
        for i, iso_alpha3 in enumerate(data_IPCC_codes['ISO']):

            if np.mod(i,20)==0:
                print('  -run ' + str(i+1) + ' of ' + str(len(data_IPCC_codes['ISO'])))

            #Get numbeic ISO code of country
            iso_numeric = data_alph_num['Numeric'][data_alph_num['Alpha-3 code']==iso_alpha3].values[0]

            #Select country in country mask
            mask_ISO = data_ctrs_ISO.ISOcode==iso_numeric

            #Get ELUC sum in selected country
            data_sel = data_ELUC.where(mask_ISO).sum((lat_name, lon_name))

            #Save values in dict
            ELUC_ctrs[iso_alpha3] = data_sel.ELUC.values

        #Special cases for certain IPCC countries
        ELUC_ctrs['SXM'] = ELUC_ctrs['MAF']                                        # Saint Martin is French part of island with Sint Maarten (Dutch part) -> same values are counted for both
        ELUC_ctrs['ANT'] = ELUC_ctrs['BES'] + ELUC_ctrs['CUW'] + ELUC_ctrs['SXM']  # Netherlands Antilles (Bonaire, Saint Eustatius & Saba + Curacao + Sint Maarten)

        #Convert data to data frame (and sort by country name)
        ELUC_ctrs_df = pd.DataFrame(ELUC_ctrs, index=data_ELUC.time)
        ELUC_ctrs_df = ELUC_ctrs_df.reindex(sorted(ELUC_ctrs_df.columns), axis=1)

        #Adde units in first cell
        ELUC_ctrs_df = ELUC_ctrs_df.rename_axis('unit: Tg C/year')

        #Create sheet in xlsx for every model and store country data
        ELUC_ctrs_df.to_excel(writer, sheet_name='BLUE_ELUC_IPCC_ctrs', index=True, header=True, float_format='%.6f')

        #Remove temporarily regridded file with ISO country codes
        os.remove(fname_ctrs_reg)
