In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


## Define folders

In [None]:
dir_ELUC_NGHGI = '/Data/data_ELUC_NGHGI/'
dir_SLAND      = '/Data/SLAND_Trendy-v10_S2_countries/'
dir_ctrs       = '/Data/data_ancillary/info_countries/'
dir_ELUC_2021  = '/Data/ELUC_countries/'
dir_peat       = '/Data/Peat_data/'
dir_out        = '/Data/data_ELUC_NGHGI_SLAND_plot/'


## Read country names

In [None]:
fname_cntrs = dir_ctrs + 'Country codes 3 letters.xlsx'
data_cntrs  = pd.read_excel(fname_cntrs, sheet_name=0, header=None, index_col=0)


## Define time

In [None]:
#Define time
time_sta = 2001
time_end = 2015
time_len = time_end - time_sta + 1
time_str = str(time_sta) + '-' + str(time_end)

print(time_str)


## Read SLAND data

In [None]:
models_SLAND = ['CABLE-POP', 'CLASSIC', 'CLM5.0', 'DLEM', 'IBIS', 'ISAM', 'ISBA-CTRIP', 'JSBACH', 'JULES-ES-1.1',
                'LPJ-GUESS', 'LPJwsl', 'LPX-Bern', 'OCN', 'ORCHIDEEv3', 'SDGVM', 'VISIT', 'YIBs']#'CLASSIC-N', 'ORCHIDEE'

#Define parameters to select datasets
selection      = 'non-intact-forests'
data_set       = 'DGVMs-Forests-PFTs'
Han2013_thresh = 0.20

#Define file names
fname_SLAND_total_weighted       = dir_SLAND + 'SLAND-S2-countries_Weights-DGVMs-Forests-PFTs_total-SLAND_vRemapToForestMask-Hansen2010_IFL_2013_v2.xlsx'
fname_SLAND_forest2013_nonintact = dir_SLAND + 'SLAND-S2-countries_' + selection + '_Weights-' + data_set + '_MaxForCover' + '{:.2f}'.format(Han2013_thresh) + '_vRemapToForestMask-Hansen2010_IFL_2013_v2.xlsx'


#Collect all file names and names for dictionary
SLAND_versions= dict()
SLAND_versions['SLAND_total-weighted']             = fname_SLAND_total_weighted
SLAND_versions['SLAND_non-intact-forest_mask2013'] = fname_SLAND_forest2013_nonintact

#Dictionary for storing data
data_SLAND = dict()

for name_out, fname in SLAND_versions.items():

    print('Read data for ' + name_out, end=': ')
    
    #Create empyt dataframes
    data_SLAND_coll = pd.DataFrame()
    
    #Loop over models
    for model in models_SLAND:
        
        print(model, end=', ')
        
        #Read SLAND data
        data_SLAND_read = pd.read_excel(fname, sheet_name=model + '_SLAND_IPCC_ctrs', header=0, index_col=0)
        
        #Select time
        data_SLAND_read = data_SLAND_read.loc[(data_SLAND_read.index>=time_sta) & (data_SLAND_read.index<=time_end)]
        
        #Check time selection
        if (data_SLAND_read.index[0]!=time_sta) | (data_SLAND_read.index[-1]!=time_end) | (len(data_SLAND_read.index)!=time_len):
            sys.exit('Check time selection of SLAND!')
        
        #Calculate time average
        data_SLAND_read = data_SLAND_read.mean(axis=0)
        
        #Set correct sign vor SLAND
        data_SLAND_coll[model] = -data_SLAND_read

    #Save in dictionary
    data_SLAND[name_out] = data_SLAND_coll
    
    print('')
    print('{:.1f}'.format(data_SLAND_coll.sum(axis=0).mean() * 44 / 12 / 1000) + ' Pg CO2 / year')
    print('')
    
## UNITS
#data_SLAND has units: Tg C / year


## Read peat data

In [None]:
#Read peat data
fname_peat_drai = dir_peat + 'Country_emissions_peat-drainage_FAO_1850-2021.xlsx'
fname_peat_fire = dir_peat + 'Country_emissions_peat-fires_GFED4_1997-2021.xlsx'
data_peat_in_1 = pd.read_excel(fname_peat_drai, header=0, index_col=0)
data_peat_in_2 = pd.read_excel(fname_peat_fire, header=0, index_col=0)
data_peat_in_1 = data_peat_in_1.loc[(data_peat_in_1.index>=time_sta) & (data_peat_in_1.index<=time_end)]
data_peat_in_2 = data_peat_in_2.loc[(data_peat_in_2.index>=time_sta) & (data_peat_in_2.index<=time_end)]

#Select time
if (data_peat_in_1.index[0]!=time_sta) | (data_peat_in_1.index[-1]!=time_end) | (len(data_peat_in_1.index)!=time_len):  sys.exit('Check time selection of peat 1!')
if (data_peat_in_2.index[0]!=time_sta) | (data_peat_in_2.index[-1]!=time_end) | (len(data_peat_in_2.index)!=time_len):  sys.exit('Check time selection of peat 2!')


## Read data BLUE (LUH2021) and add peat

In [None]:
#Read ELUC data
fname_ELUC  = dir_ELUC_2021 + 'ELUC_BLUE_countries-ISOcode_ELUC-net_vRemapCountries_2000-2020.xlsx'
data_BLUE_read = pd.read_excel(fname_ELUC, header=0, index_col=0)

#Select time
data_BLUE_read = data_BLUE_read.loc[(data_BLUE_read.index>=time_sta) & (data_BLUE_read.index<=time_end)]

#Check time selection
if (data_BLUE_read.index[0]!=time_sta) | (data_BLUE_read.index[-1]!=time_end) | (len(data_BLUE_read.index)!=time_len):  sys.exit('Check time selection of BLUE!')

#Create empty dataframe
data_ELUC_BLUE_2021_all = data_BLUE_read.copy()
data_ELUC_BLUE_2021_all[:] = np.NaN

#Loop over all countries
for col in data_BLUE_read.columns:
    
    #Sum ELUC and peat
    if col in data_peat_in_1.columns:
        data_BLUE_read[col] = data_BLUE_read[col] + data_peat_in_1[col]
    if col in data_peat_in_2.columns:
        data_BLUE_read[col] = data_BLUE_read[col] + data_peat_in_2[col]

    #Save in dataframe
    data_ELUC_BLUE_2021_all[col] = data_BLUE_read[col]

#Calculate time average
data_ELUC_BLUE_2021 = data_ELUC_BLUE_2021_all.mean(axis=0)

#Convert to dataframe and convert to Tg C / year
data_ELUC_BLUE_2021 = data_ELUC_BLUE_2021.to_frame(name='ELUC')

## UNITS
#data_ELUC_BLUE_2021 has units: Tg C / year


## Read ELUC H&N2021

In [None]:
#Read ELUC data (which already include peat)
fname_ELUC = dir_ELUC_NGHGI + 'HN2021_ELUC-net-with-peat_GCB2021_countries.xlsx'
data_HN2021_read  = pd.read_excel(fname_ELUC, header=0, index_col=0)

#Select time
data_HN2021_read = data_HN2021_read.loc[(data_HN2021_read.index>=time_sta) & (data_HN2021_read.index<=time_end)]

#Check time selection
if (data_HN2021_read.index[0]!=time_sta) | (data_HN2021_read.index[-1]!=time_end) | (len(data_HN2021_read.index)!=time_len):  sys.exit('Check time selection of H&N2021!')

#Calculate time average
data_ELUC_HN_2021 = data_HN2021_read.mean(axis=0)

#Convert to dataframe and convert to Tg C / year
data_ELUC_HN_2021 = data_ELUC_HN_2021.to_frame(name='ELUC')
      
## UNITS
#data_ELUC_HN_2021 has units: Tg C / year


## Correct peak in 2010 for DR Congo

In [None]:
correct_COD = 0
if correct_COD==1:

    print(data_ELUC_BLUE_2021_all['COD'].mean())
    data_ELUC_BLUE_2021_all['COD'].loc[slice(2010, None)] = data_HN2021_read['Democratic Republic of the Congo'].loc[slice(2010, None)].values

    print(data_ELUC_BLUE_2021_all['COD'].mean())
    data_ELUC_BLUE_2021.loc['COD'] = data_ELUC_BLUE_2021_all['COD'].mean()
    
    outname = '_CODcorrected'
else:
    outname = ''

## Read OSCAR data

In [None]:
#Read ELUC data 
fname_ELUC = dir_ELUC_NGHGI + 'OSCAR_ELUC-net-with-peat_GCB2021_IPCCcountries.xlsx'
data_OSCAR_read = pd.read_excel(fname_ELUC, header=0, index_col=0)

#Select time
data_OSCAR_read = data_OSCAR_read.loc[(data_OSCAR_read.index>=time_sta) & (data_OSCAR_read.index<=time_end)]

#Check time selection
if (data_OSCAR_read.index[0]!=time_sta) | (data_OSCAR_read.index[-1]!=time_end) | (len(data_OSCAR_read.index)!=time_len):  sys.exit('Check time selection of BLUE!')

#Calculate time average
data_ELUC_OSCAR_2021 = data_OSCAR_read.mean(axis=0)
    
#Convert to dataframe and convert to Tg C / year
data_ELUC_OSCAR_2021 = data_ELUC_OSCAR_2021.to_frame(name='ELUC')
   
## UNITS
#data_ELUC_OSCAR_2021 has units: Tg C / year


## Read data NGHGI

In [None]:
#Read data   
fname_NGHGI = dir_ELUC_NGHGI + 'Grassi_Giacomo_NGHGI_2021_11_preliminary.xlsx'
data_NGHGI  = pd.read_excel(fname_NGHGI, sheet_name='summary DB2', header=2, index_col=0)

#Read new data for Indonesia
fname_IDN = dir_ELUC_NGHGI + 'LULUCF_Indonesia_UNFCCC_BUR3.xlsx'
data_IDN  = pd.read_excel(fname_IDN, header=0, index_col=0)

#Get column and index names
data_NGHGI.columns = data_NGHGI.iloc[0]
data_NGHGI.index = data_NGHGI['ISO']

#Select data
data_NGHGI = data_NGHGI.iloc[1:196, 12:16]
data_NGHGI = data_NGHGI.astype(float)

#Prepare data for China
data_China = pd.DataFrame(columns=['NGHGI'], index=np.arange(2000, 2021))
# data_China.loc[1994] =  -407479 / 1000
data_China.loc[2005] =  -803000 / 1000
data_China.loc[2010] = -1029720 / 1000
data_China.loc[2012] =  -575848 / 1000
data_China.loc[2014] = -1150910 / 1000
data_China = data_China.astype(float)

#Interpolate and extrapolate data
data_China = data_China.interpolate(limit_direction='both')

#Add data to NGHGI
data_NGHGI.loc['CHN'][0] = data_China[(data_China.index>=2001) & (data_China.index<=2005)].mean()
data_NGHGI.loc['CHN'][1] = data_China[(data_China.index>=2006) & (data_China.index<=2010)].mean()
data_NGHGI.loc['CHN'][2] = data_China[(data_China.index>=2011) & (data_China.index<=2015)].mean()
data_NGHGI.loc['CHN'][3] = data_China[(data_China.index>=2016) & (data_China.index<=2020)].mean()

#Define index for time selection (start)
if time_sta==2001:  i_sta = 0
else:               sys.exit('Start time is not well defined for NGHGI')

#Define index for time selection (end)
if time_end==2010:    i_end = 2
elif time_end==2015:  i_end = 3
elif time_end==2020:  i_end = 4
else:                 sys.exit('End time is not well defined for NGHGI')

#Select time and calculate average
data_NGHGI = data_NGHGI.iloc[:, i_sta:i_end].mean(axis=1)
data_NGHGI = pd.Series(data_NGHGI, index = data_NGHGI.index).to_frame(name='NGHGI')

#Calculate average for Indonesia in selected period and add replace old NGHGI data of IDN
data_IDN  = data_IDN[data_IDN.columns[(data_IDN.columns>=time_sta) & (data_IDN.columns<=time_end)]].mean(axis=1)
data_NGHGI.loc['IDN'] = data_IDN['LULUCF flux'] / 1000

#Convert from Mt CO2 yr-1 to Mt C yr-1 (=Tg C / year)
data_NGHGI = 12 / 44 * data_NGHGI

## UNITS
#data_NGHGI has units: Tg C / year


## Add REDD+ data for DR Congo

In [None]:
#Read data   
fname_REDD = dir_ELUC_NGHGI + 'REDDplus_DRC.xlsx'
data_REDD  = pd.read_excel(fname_REDD, header=1, index_col=1)

#Select data and time period
data_REDD = data_REDD.iloc[0:3,1::]
data_REDD = data_REDD.loc['Net']
data_REDD = data_REDD[(data_REDD.index>=time_sta) & (data_REDD.index<=time_end)].mean()

#Convert from kt CO2 yr-1 to Mt C yr-1 (=Tg C / year)
data_REDD = 12 / 44 / 1000 * data_REDD

#Add to NGHGI data
data_NGHGI.loc['COD_REDD+', 'NGHGI'] = data_REDD


## Extract data

In [None]:
#Get list of countries
countries_BLUE       = data_ELUC_BLUE_2021.index
countries_HN_2021    = data_ELUC_HN_2021.index
countries_OSCAR_2021 = data_ELUC_OSCAR_2021.index
countries_NGHGI   = data_NGHGI.index
countries_check1 = set(countries_BLUE)
countries_check3 = set(countries_HN_2021)
countries_check4 = set(countries_OSCAR_2021)
countries_check5 = set(countries_BLUE)

#Create empty dataframes
data_collect_ELUC_NGHGI       = pd.DataFrame()
data_collect_ELUC_NGHGI.index = countries_BLUE
data_collect_SLAND = dict()
for SLAND_name in data_SLAND.keys():
    data_collect_SLAND[SLAND_name]       = pd.DataFrame()
    data_collect_SLAND[SLAND_name].index = countries_BLUE
    
#Define country list for EU27 + UK
ctrs_EU = ['AUT', 'BEL', 'BGR', 'HRV', 'CYP', 'CZE', 'DNK', 'EST', 'FIN', 'FRA', 'DEU', 'GRC', 'HUN', 'IRL', 'ITA', 'LVA', 'LTU', 'LUX', 'MLT', 'NLD', 'POL', 'PRT', 'ROU', 'SVK', 'SVN', 'ESP', 'SWE', 'GBR']

#Loop over all IPCC countries
for i1, country in enumerate(countries_BLUE):

    #Get long name for country
    ctr_long = data_cntrs.loc[country].item()

    #Adjust some country names to fit NGHGI naming
    if "Bolivia (Plurinational State of)" in ctr_long:                        ctr_long = "Bolivia"
    elif "Congo, Democratic Republic of the" in ctr_long:                     ctr_long = "Democratic Republic of the Congo"
    elif "Côte d'Ivoire" in ctr_long:                                         ctr_long = "Cote d'Ivoire"
    elif "Czechia" in ctr_long:                                               ctr_long = "Czech Republic"
    elif "Guinea-Bissau" in ctr_long:                                         ctr_long = "Guinea Bissau"
    elif "Iran (Islamic Republic of)" in ctr_long:                            ctr_long = "Iran"
    elif "Korea (Democratic People's Republic of)" in ctr_long:               ctr_long = "Democratic People's Republic of the Korea"
    elif "Korea, Republic of" in ctr_long:                                    ctr_long = "Republic of Korea"
    elif "North Macedonia" in ctr_long:                                       ctr_long = "The former Yugoslav Republic of Macedonia"
    elif "Moldova, Republic of" in ctr_long:                                  ctr_long = "Republic of Moldova"
    elif "Tanzania, United Republic of" in ctr_long:                          ctr_long = "United Republic of Tanzania"
    elif "United Kingdom of Great Britain and Northern Ireland" in ctr_long:  ctr_long = "United Kingdom"
    elif "Venezuela (Bolivarian Republic of)" in ctr_long:                    ctr_long = "Venezuela"
    elif "Cabo Verde" in ctr_long:                                            ctr_long = "Cape Verde"  

    #Extract NGHGI values
    try:
        NGHGI_val = data_NGHGI.loc[country].item()
        countries_check1.discard(country)
    except:
        NGHGI_val = np.NaN
        if np.abs(data_ELUC_BLUE_2021.loc[country].item())>0.05:
            print('No NGHGI data for ' + ctr_long)

    #Extract ELUC for H&N2021
    try:
        ELUC_HN_2021_val = data_ELUC_HN_2021.loc[ctr_long].item()
        countries_check3.discard(ctr_long)
    except:
        ELUC_HN_2021_val = np.NaN        
    
    #Extract ELUC for OSCAR (GCB2021)
    try:
        ELUC_OSCAR_2021_val = data_ELUC_OSCAR_2021.loc[country].item()
        countries_check4.discard(country)
    except:
        ELUC_OSCAR_2021_val = np.NaN           
    
    #Extract SLAND values
    SLAND_coll = pd.DataFrame()
    for SLAND_name in data_SLAND.keys():
        
        try:
            SLAND_val = data_SLAND[SLAND_name].loc[country]
            countries_check5.discard(country)
        except:
            SLAND_val = pd.Series([np.nan] * len(models_SLAND), index=models_SLAND)

        #Collect in data frame
        SLAND_coll[SLAND_name] = SLAND_val
            
    #Extract ELUC for BLUE
    ELUC_BLUE_2021_val  = data_ELUC_BLUE_2021.loc[country].item()
    
    #Add values for Serbia and Montenegro for NGHGI
    if country=='SCG':
        NGHGI_val = data_NGHGI.loc['SRB'].item() + data_NGHGI.loc['MNE'].item()
        
    #Add values for China, Hong Kong, and Taiwan for BLUE ELUC
    if country=='CHN':
        ELUC_BLUE_2021_val  = data_ELUC_BLUE_2021.loc['CHN'].item() + data_ELUC_BLUE_2021.loc['TWN'].item() + data_ELUC_BLUE_2021.loc['HKG'].item()

    #Calculate gap between NGHGI and ELUC
    GAP_val = NGHGI_val - (ELUC_BLUE_2021_val + ELUC_HN_2021_val)/2
    
    #Save ELUC and NGHGI data in dataframe
    data_collect_ELUC_NGHGI.loc[country, 'NGHGI']           = NGHGI_val
    data_collect_ELUC_NGHGI.loc[country, 'ELUC_BLUE_2021']  = ELUC_BLUE_2021_val
    data_collect_ELUC_NGHGI.loc[country, 'ELUC_HN_2021']    = ELUC_HN_2021_val
    data_collect_ELUC_NGHGI.loc[country, 'ELUC_OSCAR_2021'] = ELUC_OSCAR_2021_val
    data_collect_ELUC_NGHGI.loc[country, 'GAP']             = GAP_val
    
    #Save SLAND data in datafame
    for SLAND_name in SLAND_coll.columns:

        #Add SLAND of single models to dataframe
        for model, value in SLAND_coll[SLAND_name].iteritems():
            data_collect_SLAND[SLAND_name].loc[country, model] = value

#Sort data and calculate ELUC for EU
data_collect_ELUC_NGHGI = data_collect_ELUC_NGHGI.sort_index()
data_collect_ELUC_NGHGI.loc['EU27_UK'] = data_collect_ELUC_NGHGI.loc[ctrs_EU].sum(axis=0)

#Add REDD+ data for COD to NGHGI
data_collect_ELUC_NGHGI.loc['COD_REDD+', 'NGHGI'] = data_NGHGI.loc['COD_REDD+', 'NGHGI']

#Save ELUC and NGHGI in file
file_name = dir_out + 'Collection_ELUC_NGHGI_' + time_str + outname + '.pickle'
data_collect_ELUC_NGHGI.to_pickle(file_name)

#Loop over different calculation methods for SLAND
for SLAND_name in data_SLAND.keys():
    
    #Sort data and calculate ELUC for EU
    data_SLAND_out = data_collect_SLAND[SLAND_name].sort_index()
    data_SLAND_out.loc['EU27_UK'] = data_SLAND_out.loc[ctrs_EU].sum(axis=0)
    
    #Save SLAND in file
    file_name = dir_out + 'Collection_' + SLAND_name + '_' + time_str + '.pickle'
    data_SLAND_out.to_pickle(file_name)
