This notebook is similar to the previous notebook but creates a dataset containing just the yearly data and saves it to netcdf

In [1]:
import pickle
from datetime import datetime as DT

import numpy as np
from numpy import testing as npt
from tqdm import tqdm_notebook
from netCDF4 import Dataset as ds
import xarray as xr

In [2]:
with open('../PklJar/smi_dicts_2018_9km.pkl', 'rb') as fp:
    files_dict = pickle.load(fp)

In [3]:
files_dict.keys()

dict_keys(['aqua_chl_mc', 'aqua_chl_mo', 'aqua_sst_mc', 'aqua_sst_mo', 'aqua_bbp_mc', 'aqua_bbp_mo'])

In [4]:
def get_nc_data(file, prod):
    
    with ds(file) as data_ds:
        data_prod = data_ds[prod][:]
        lat = data_ds['lat'][:]
        lon = data_ds['lon'][:]
    return data_prod, lat, lon
        
            
def get_monthlies(data_dict, prod='chlor_a', **kwargs):
    lat_dim = kwargs.pop('lat_dim', 2160)
    lon_dim = kwargs.pop('lon_dim', 4320)
    months = data_dict.keys()
    time_dim = kwargs.pop('time_dim', len(months))
    year = kwargs.pop('year', 2016)
    data_prod = np.ma.zeros((time_dim, lat_dim, lon_dim))
    time_array = np.empty(time_dim, dtype=np.datetime64)
    for i, month in enumerate(tqdm_notebook(months)):
        data_prod[i], lat, lon = get_nc_data(data_dict[month], prod)            
    time_array = np.array([DT.strptime('%s-%d' %(month, year),
                                       '%b-%Y') for month in months],
                          dtype='datetime64[ns]')
    return {prod: data_prod, 'lat': lat,
            'lon': lon, 'time': time_array}


def make_dataarray(data_dict, prod='chlor_a', **kwargs):
    units = kwargs.pop('units', None)
    name = kwargs.pop('name', prod)
    coords = kwargs.pop('coords', ['time', 'lat', 'lon'])
    xr_data = xr.DataArray(data_dict[prod], 
                           coords=[data_dict[val] for val in coords],
                           dims = [key for key in coords])
    if units:
        xr_data.attrs['units'] = units
    xr_data.name = name
    return xr_data


def make_dataset(*data_array_list):
    xr_ds = xr.Dataset({data_array.name: data_array for data_array in data_array_list 
                        })
    return xr_ds

In [5]:
current_year = 2018
# make dictionary of monthlies
#viirs_chl_dict = get_monthlies(files_dict['viirs_chl_mo'], year=current_year)
aqua_sst_dict = get_monthlies(files_dict['aqua_sst_mo'], prod='sst', year=current_year)
aqua_chl_dict = get_monthlies(files_dict['aqua_chl_mo'], year=current_year)
aqua_bbp_dict = get_monthlies(files_dict['aqua_bbp_mo'], prod='bbp_443_giop')

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




In [8]:
# make dataarrayes from dictionary of monthlies
xr_aqua_sst_da = make_dataarray(aqua_sst_dict, prod='sst', units='deg C')
#xr_vi_chl_da = make_dataarray(viirs_chl_dict, prod='chlor_a', units='mg m^-3')
xr_aqua_chl_da = make_dataarray(aqua_chl_dict, prod='chlor_a', units='mg m^-3')
xr_aqua_bbp_da = make_dataarray(aqua_bbp_dict, prod='bbp_443_giop', units='m^-1')

In [9]:
# compute yearly mean
xr_aqua_sst_time_mean = xr_aqua_sst_da.mean(dim='time')
#xr_vi_chl_time_mean = xr_vi_chl_da.mean(dim='time')
xr_aqua_chl_time_mean = xr_aqua_chl_da.mean(dim='time')
xr_aqua_bbp_time_mean = xr_aqua_bbp_da.mean(dim='time')

In [10]:
# fill attributes
#xr_vi_chl_time_mean.name='year_avg_viirs_chl'
#xr_vi_chl_time_mean.attrs['units'] = 'mg m^-3'
xr_aqua_chl_time_mean.name='year_avg_aqua_chl'
xr_aqua_chl_time_mean.attrs['units'] = 'mg m^-3'
xr_aqua_sst_time_mean.name='year_avg_aqua_sst'
xr_aqua_sst_time_mean.attrs['units'] = 'deg C'
xr_aqua_bbp_time_mean.name='year_avg_aqua_bbp_443'
xr_aqua_bbp_time_mean.attrs['units'] = 'm^-1'

In [11]:
# aggregate into dataset
xr_ds = make_dataset(xr_aqua_sst_da, #xr_vi_chl_da,
                     xr_aqua_chl_da, #xr_vi_chl_time_mean,
                     xr_aqua_bbp_da, xr_aqua_bbp_time_mean,
                     xr_aqua_sst_time_mean, xr_aqua_chl_time_mean)

In [12]:
# save
xr_ds.to_netcdf('./xr_sst_chl_%s.nc' % DT.now())