In [2]:
import h5py
import iris
import numpy as np
import xarray as xr
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm_notebook as tqdm
plt.rcParams['mathtext.default'] = 'regular'

From GFED README:

'The emissions group contains, for 12 months (01,02,..,12), 5 datasets: emissions (carbon with units of g C m-2 month-1 and DM with units of kg DM m-2 month-1), small_fire_fraction (unitless), daily fraction (unitless), diurnal cycle (unitless), as well as a group partitioning which contains, for both C and DM, the datasets (all unitless):
- SAVA (Savanna, grassland and shrubland fires)
- BORF (Boreal forest fires)
- TEMF (Temperature forest fires)
- DEFO (Tropical forest fires [deforestation and degradation])
- PEAT (Peat fires)
- AGRI (Agricultural waste burning)'

Burdned area group contains two variables: burned_fraction and source. burned_fraction is the fraction of each grid cell that burned in that month according to the GFED4s burned area data, burned_area_source indicates what data was used to construct the burned area maps excluding small fires'.

In [3]:
# Read data
path_to_gfed = Path('../../external_data/GFED')
path_to_ukca = Path('../data')
ukca = iris.load_cube(str(path_to_ukca / 'xmeka' / 'xmeka_o3.nc'))

In [4]:
# Extract data on total dry matter emissions and source partitioning
group_name = 'emissions'
main_var = 'DM'
auxi_vars = ['AGRI', 'BORF', 'DEFO', 'PEAT', 'SAVA', 'TEMF', ]

monthly_dict = {}
monthly_dict[main_var.lower()] = []
for auxi_var in auxi_vars:
    monthly_dict[f'{main_var}_{auxi_var}'.lower()] = []

for fpath in tqdm(sorted(path_to_gfed.glob('*hdf5')), desc='files'):
    arr_dict = {}
    arr_dict[main_var.lower()] = []
    for auxi_var in auxi_vars:
        arr_dict[f'{main_var}_{auxi_var}'.lower()] = []

    with h5py.File(fpath, 'r') as f:
        lons = f['lon'].value[0, :]
        lats = f['lat'].value[:, 0]
        grid_cell_area = f['/ancill/grid_cell_area'].value
        xr_coord_dims = dict(coords=dict(lon=lons, lat=lats), dims=('lat', 'lon'))

        for month in f[group_name].keys():
            # Read dry matter monthly emissions
            arr_dict[main_var.lower()].append(xr.DataArray(f[group_name][month][main_var].value, 
                                                   **xr_coord_dims))
            # Read monthly data on the fraction the specific source contributes to dry matter emissions
            for auxi_var in auxi_vars:
                arr_dict[f'{main_var}_{auxi_var}'.lower()].append(xr.DataArray(f[group_name][month]['partitioning'][f'{main_var}_{auxi_var}'].value,
                                                       **xr_coord_dims))
        for key, val in arr_dict.items():
            monthly_dict[key].append(xr.concat(val, dim='month'))

    yearly_dict = {}
    for key, val in monthly_dict.items():
        yearly_dict[key] = xr.concat(val, dim='year')
        
# Combine all data into an xarray dataset
gfed = xr.Dataset(yearly_dict)
gfed['month'] = np.arange(1,13,1)
gfed['year'] = np.arange(1997, 2017, 1)

HBox(children=(IntProgress(value=0, description='files', max=20, style=ProgressStyle(description_width='initia…




In [6]:
gfed_multiyear_monthly_mean = gfed.mean(dim='year')
gfed_multiyear_monthly_mean.rename({'month':'time'}, inplace=True)
gfed_multiyear_monthly_mean.rename({'lat':'latitude'}, inplace=True)
gfed_multiyear_monthly_mean.rename({'lon':'longitude'}, inplace=True)
# FutureWarning: The inplace argument has been deprecated and will be removed in xarray 0.12.0.

  


<xarray.Dataset>
Dimensions:  (lat: 720, lon: 1440, time: 12)
Coordinates:
  * lon      (lon) float32 -179.875 -179.625 -179.375 ... 179.625 179.875
  * lat      (lat) float32 89.875 89.625 89.375 ... -89.375 -89.625 -89.875
  * time     (time) int64 1 2 3 4 5 6 7 8 9 10 11 12
Data variables:
    dm       (time, lat, lon) float32 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    dm_agri  (time, lat, lon) float32 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    dm_borf  (time, lat, lon) float32 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    dm_defo  (time, lat, lon) float32 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    dm_peat  (time, lat, lon) float32 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    dm_sava  (time, lat, lon) float32 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    dm_temf  (time, lat, lon) float32 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0

In [41]:
gfed_multiyear_monthly_mean.dm.attrs['long_name'] = 'dry matter emissions'
gfed_multiyear_monthly_mean.dm.attrs['units'] = 'kg/m2/month'
gfed_multiyear_monthly_mean.dm_agri.attrs['long_name'] = 'fractional contribution of agricultural waste burning to dry matter emissions'
gfed_multiyear_monthly_mean.dm_borf.attrs['long_name'] = 'fractional contribution of boreal forest fires to dry matter emissions'
gfed_multiyear_monthly_mean.dm_defo.attrs['long_name'] = 'fractional contribution of deforestation and degradation to dry matter emissions'
gfed_multiyear_monthly_mean.dm_peat.attrs['long_name'] = 'fractional contribution of peatland fires to dry matter emissions'
gfed_multiyear_monthly_mean.dm_sava.attrs['long_name'] = 'fractional contribution of savanna, grassland and shrubland fires to dry matter emissions'
gfed_multiyear_monthly_mean.dm_temf.attrs['long_name'] = 'fractional contribution of temperate forest fires to dry matter emissions'
gfed_multiyear_monthly_mean.dm_agri.attrs['units'] = '1'
gfed_multiyear_monthly_mean.dm_borf.attrs['units'] = '1'
gfed_multiyear_monthly_mean.dm_defo.attrs['units'] = '1'
gfed_multiyear_monthly_mean.dm_peat.attrs['units'] = '1'
gfed_multiyear_monthly_mean.dm_sava.attrs['units'] = '1'
gfed_multiyear_monthly_mean.dm_temf.attrs['units'] = '1'

In [42]:
gfed_multiyear_monthly_mean['grid_cell_area'] = (('lat', 'lon'), grid_cell_area)
gfed_multiyear_monthly_mean.grid_cell_area.attrs['long_name'] = 'grid cell area'
gfed_multiyear_monthly_mean.grid_cell_area.attrs['units'] = 'm2'

In [43]:
gfed_multiyear_monthly_mean.to_netcdf(path=path_to_gfed / 'GFED4.1s_1997-2016_multiyear_monthly_mean_dm_emissions.nc', mode='w', format='NETCDF4')