In [None]:
import numpy as np
import xarray as xr
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt


In [None]:
dir_forest = '/Data/forest_masks/'
dir_ctrs   = '/Data/data_ancillary/info_countries/'
dir_grids  = '/Data/grids/'
dir_LSM    = '/Data/LSMs/'
dir_out    = '/Data/forest_area/'
dir_tmp    = '/Data/tmp/'


## Get gridarea for Hansen dataset

In [None]:
#Create reference grid
fname_in   = dir_forest + 'Hansen2010_IFL_2013.nc'
fname_area = dir_forest + 'Hansen2010_IFL_gridarea.nc'
os.system('cdo gridarea ' + fname_in + ' ' + fname_area)


## Calculate forest fraction and interpolate to grid of forest mask 

In [None]:
#Define file names
fname_area_for = dir_forest + 'ForestArea_0.5deg_2013.nc'
fname_area_tot = dir_forest + 'gridarea_ForestArea_0.5deg_2013.nc'
fname_for_frac = dir_forest + 'ForestFraction_0.5deg_2013.nc'
fname_for_frac_regr = dir_forest + 'ForestFraction_0.5deg_2013_regrid360x720-ForestMask.nc'
if os.path.exists(fname_area_tot): os.remove(fname_area_tot)
if os.path.exists(fname_for_frac): os.remove(fname_for_frac)
if os.path.exists(fname_for_frac_regr): os.remove(fname_for_frac_regr)
    
#Get cell area of forest cover map
os.system('cdo gridarea ' + fname_area_for + ' ' + fname_area_tot)

#Calculate forest fraction
data_area_for = xr.open_dataset(fname_area_for)
data_area_tot = xr.open_dataset(fname_area_tot)
data_area_for = data_area_for.where(~np.isnan(data_area_for.Band1), 0)
for_fraction  = data_area_for.Band1 / data_area_tot.cell_area
for_fraction  = for_fraction.to_dataset(name='forest_fraction')
for_fraction.to_netcdf(fname_for_frac)

#Interpolate forest fraction to grid of forest mask
fname_grid   = dir_grids +  'grid_xy_360x720-ForestMask'
os.system('cdo remapcon,' + fname_grid + " " + fname_for_frac + " " + fname_for_frac_regr)

#Remove temporary files
os.remove(fname_area_tot)


## Calculation

In [None]:
forest_sel = ['intact', 'non-intact', 'all']

#Read ISO codes for countries and conversions between ISO alpha-3 codes from IPCC and ISO numeric
fname_IPCC_codes = dir_ctrs + 'IPCC_regions.xlsx'
fname_ISO_num    = dir_ctrs + 'iso_codes_alpha_numeric.xlsx'
data_IPCC_codes = pd.read_excel(fname_IPCC_codes, sheet_name='region_classification', header=0, usecols=[0, 1, 3])
data_alph_num   = pd.read_excel(fname_ISO_num, header=0)

#Read ISO codes for countries, IPCC countries, and conversions between ISO alpha-3 codes from IPCC and ISO numeric
fname_ctrs_ISO = dir_ctrs + 'wrld_cntrs_BLUE_TN_upd.nc'
data_ctrs_ISO  = xr.open_dataset(fname_ctrs_ISO)

#Create NetCDF of global land area
fname_landarea_ISO = dir_ctrs + 'wrld_land-area_BLUE_TN_upd.nc'
if os.path.exists(fname_landarea_ISO): os.remove(fname_landarea_ISO)
data_landarea_ISO = 1 * (data_ctrs_ISO.ISOcode>0)
data_landarea_ISO = data_landarea_ISO.to_dataset(name='land_fraction')
data_landarea_ISO.to_netcdf(fname_landarea_ISO)

#Conservatively regrid global land area to DGVM grid
fname_landarea_ISO_regr = dir_tmp + 'land-area_cntrs_BLUE_TN_upd_regrid_Hansen_tmp.nc'
if os.path.exists(fname_landarea_ISO_regr): os.remove(fname_landarea_ISO_regr)
file_grid = dir_grids + 'grid_xy_360x720-ForestMask'
os.system('cdo -s remapcon,' + file_grid + ' ' + fname_landarea_ISO + ' ' + fname_landarea_ISO_regr)

#Read regridded land area file 
data_landarea_ISO = xr.open_dataset(fname_landarea_ISO_regr)

#Read cell area
fname_area = dir_forest + 'Hansen2010_IFL_gridarea.nc'
data_area = xr.open_dataset(fname_area)

#Read forest fraction
fname_for_frac_regr = dir_forest + 'ForestFraction_0.5deg_2013_regrid360x720-ForestMask.nc'
data_for_frac = xr.open_dataset(fname_for_frac_regr)

#Get lat and lon names
if 'latitude' in data_for_frac.dims:  lat_name, lon_name = 'latitude', 'longitude'
else:                                 lat_name, lon_name = 'lat', 'lon'

#Check that model grid and country grid agree
check_lat1 = np.max(np.abs(data_for_frac[lat_name].values - data_landarea_ISO[lat_name].values))
check_lon1 = np.max(np.abs(data_for_frac[lon_name].values - data_landarea_ISO[lon_name].values))
check_lat2 = np.max(np.abs(data_for_frac[lat_name].values - data_area[lat_name].values))
check_lon2 = np.max(np.abs(data_for_frac[lon_name].values - data_area[lon_name].values))
if check_lat1>0.001 or check_lon1>0.001:  sys.exit('Coordinates do not agree')
if check_lat2>0.001 or check_lon2>0.001:  sys.exit('Coordinates do not agree')

#Re-index if there are small deviations in lat and lon
if (check_lat1!=0) or (check_lon1!=0):
    data_landarea_ISO = data_landarea_ISO.reindex({lat_name: data_for_frac[lat_name], lon_name: data_for_frac[lon_name]}, method='nearest')
if (check_lat2!=0) or (check_lon2!=0):
    data_area = data_area.reindex({lat_name: data_for_frac[lat_name], lon_name: data_for_frac[lon_name]}, method='nearest')

#Calculate forest area
data_for_area = data_for_frac.forest_fraction * data_area.cell_area
    
#Create dicts for storing data
FOREST_ctrs = dict()

#Define output file name
fname_out = dir_out + 'HansenPotapov_forest_area_intact_non-intact_2013_v2.xlsx'
if os.path.exists(fname_out): os.remove(fname_out)

#Create xlsx-file (it will be filled at end of loop with country data from every model)
create = 1
with pd.ExcelWriter(fname_out) as writer:

    #Loop over intact/non-intact
    for selection in forest_sel:

        print(selection)

        #Read forest
        fname_in = dir_forest + 'Hansen2010_IFL_2013.nc'
        data_forest = xr.open_dataset(fname_in)

        if selection=='non-intact':
            data_forest = 1 * (data_forest.Band1!=2)
        elif selection=='intact':
            data_forest = 1 * (data_forest.Band1==2)
        elif selection=='all':
            data_forest = 1 + 0 *data_forest.Band1
        
        #Check that model grid and country grid agree
        check_lat = np.max(np.abs(data_forest[lat_name].values - data_for_area[lat_name].values))
        check_lon = np.max(np.abs(data_forest[lon_name].values - data_for_area[lon_name].values))
        if check_lat>0.01:  sys.exit('Latitudes do not agree')
        if check_lon>0.01:  sys.exit('Longitudes do not agree')

        #Re-index if there are small deviations in lat and lon
        if (check_lat!=0) or (check_lon!=0):
            data_forest = data_forest.reindex({lat_name: data_for_area[lat_name], lon_name: data_for_area[lon_name]}, method='nearest')

        #Multiply with area
        data_forest = data_forest * data_for_area

        #Loop over all country codes
        for i, iso_alpha3 in enumerate(data_IPCC_codes['ISO']):

            if np.mod(i, 20)==0:
                print('  -run ' + str(i+1) + ' of ' + str(len(data_IPCC_codes['ISO'])))

            #Get numbeic ISO code of country
            iso_numeric = data_alph_num['Numeric'][data_alph_num['Alpha-3 code']==iso_alpha3].values[0]
            
            #Select country in country mask
            mask_ISO = 1 * (data_ctrs_ISO.ISOcode==iso_numeric)

            #Define temporary file names for selecting and regridding country
            fname_tmp      = dir_tmp + 'country_fraction_' + iso_alpha3 + '_Hansen_FOREST_tmp.nc'
            fname_tmp_regr = dir_tmp + 'country_fraction_' + iso_alpha3 + '_Hansen_regr_FOREST_tmp.nc'
            if os.path.exists(fname_tmp):       os.remove(fname_tmp)
            if os.path.exists(fname_tmp_regr):  os.remove(fname_tmp_regr)

            #Save country fraction file in NetCDF
            mask_ISO = mask_ISO.to_dataset(name='country_fraction')
            mask_ISO.to_netcdf(fname_tmp)

            #Conservatively regrid country fraction to DGVM grid
            file_grid = dir_grids + 'grid_xy_360x720-ForestMask'
            os.system('cdo -s remapcon,' + file_grid + ' ' + fname_tmp + ' ' + fname_tmp_regr)

            #Read regridded country fraction file
            mask_ISO_regr = xr.open_dataset(fname_tmp_regr)

            #Perform calculation (if country fraction is not 0 everywhere)
            if len(mask_ISO_regr.data_vars)!=0:

                #Create weighting factor (= fraction of country / land fraction)
                weights = mask_ISO_regr.country_fraction / data_landarea_ISO.land_fraction

                #Get sum of forest area in  selected country
                data_sel = (data_forest * weights).sum((lat_name, lon_name))

                #Save values in dict
                FOREST_ctrs[iso_alpha3] = data_sel.values

            else:

                #Set area to 0 if country is too small
                FOREST_ctrs[iso_alpha3] =0

            #Remove temporary files
            os.remove(fname_tmp)
            os.remove(fname_tmp_regr)

        FOREST_ctrs_df = pd.DataFrame(FOREST_ctrs, index=[selection])
        
        #Convert data to data frame (and sort by country name)
        if create==1:
            FOREST_coll = FOREST_ctrs_df
            create = 0
        else:
            FOREST_coll = pd.concat((FOREST_coll, FOREST_ctrs_df), axis=0)
            
    FOREST_coll = FOREST_coll.reindex(sorted(FOREST_ctrs_df.columns), axis=1)

    #Adde units in first cell
    FOREST_coll = FOREST_coll.rename_axis('unit: m2')

    #Create sheet in xlsx for every model and store country data
    FOREST_coll.to_excel(writer, index=True, header=True, float_format='%.2f')

#Remove temporarily regridded file with ISO country codes
os.remove(fname_landarea_ISO_regr)
