In a previous notebook I prepared and pickled dictionaries keyed by month and containing file paths for sst/chl monthlies/climatology for aqua and chl monthlies for viirs. Here I 
- load all files, 
- compute aqua chl anomalies, 
- sst anomalies, and viirs chl anomalies,
- stick these in xarray [dataarrays](http://xarray.pydata.org/en/stable/data-structures.html#dataarray),
- put all anomaly dataarrays in a xarray [dataset](http://xarray.pydata.org/en/stable/data-structures.html#dataset),
- save it to netcdf.

In [1]:
import pickle
from datetime import datetime as DT

from netCDF4 import Dataset as ds
import numpy as np
from numpy import testing as npt
from tqdm import tqdm_notebook
import xarray as xr
from IPython.core.display import HTML, display

In [2]:
current_year = 2018

In [3]:
def make_dataarray(data_dict, prod='chlor_a', **kwargs):
    """"""
    units = kwargs.pop('units', None)
    name = kwargs.pop('name', prod)
    coords = kwargs.pop('coords', ['time', 'lat', 'lon'])
    xr_data = xr.DataArray(data_dict[prod], 
                           coords=[data_dict[val] for val in coords],
                           dims = [key for key in coords])
    if units:
        xr_data.attrs['units'] = units
    xr_data.name = name
    return xr_data


def make_dataset(*data_array_list):
    """list of xarray dataarrays. These need the name attribute set."""
    xr_ds = xr.Dataset({data_array.name: data_array for data_array in data_array_list 
                        })
    return xr_ds

 
def make_anom_sign(chl_an_avg, sst_an_avg):
    """
    returs sign relation array
    0: both are neg
    1: neg chl, pos sst
    2: pos chl, neg sst
    3: pos chl, pos sst
    """
    chl_anom_sign = np.ma.where(chl_an_avg<0, 0, 2)
    sst_anom_sign = np.ma.where(sst_an_avg<0, 0, 1)
    return chl_anom_sign.round().astype('int') + sst_anom_sign.round().astype('int')

    
def get_monthly_anoms(mc_dict, mo_dict, prod='chlor_a',
                      verbose=False, **kwargs):
    """Make anomalies from monthlies and climatology"""
    # process kwargs
    lat_dim = kwargs.pop('lat_dim', 2160)
    lon_dim = kwargs.pop('lon_dim', 4320)
    time_dim = kwargs.pop('time_dim', 12)
    year = kwargs.pop('year', 2016)
    # setup data structures
    anom_array = np.ma.zeros((time_dim, lat_dim, lon_dim))
    months = mo_dict.keys()
    for i, month in enumerate(tqdm_notebook(months)):
        with ds(mo_dict[month]) as mo_ds:
            mo_prod = mo_ds[prod][:]
            lat_mo = mo_ds['lat'][:]
            lon_mo = mo_ds['lon'][:]
        with ds(mc_dict[month]) as mc_ds:
            mc_prod = mc_ds[prod][:]
            lat_mc = mc_ds['lat'][:]
            lon_mc = mc_ds['lon'][:]
        if verbose:
            print(f'processing {month}...')
            print(f'processing mc: {mc_dict[month]}')
            print(f'processing mo: {mo_dict[month]}')
        # check lat/lon correspondence
        npt.assert_array_equal(lat_mo, lat_mc)
        npt.assert_array_equal(lon_mo, lon_mc)
        # compute anomalies
        anom_array[i] = mo_prod - mc_prod
        if prod == 'chlor_a':
            anom_array[i] = anom_array[i] * 100 / mc_prod
    # convert time data into array of datetime objects
    time_array = np.array([DT.strptime('%s-%d' %(month, year), '%b-%Y') for month in months],
                          dtype='datetime64[ns]')
    return {'%s_anom' % prod: anom_array,
            'lat': lat_mc, 'lon': lon_mc,
            'time': time_array,
           }

In [4]:
display(HTML("<style>.container {width: 90% !important}</style>"))

<u>Load serialized dictionary from previous notebook</u>

In [5]:
with open('../PklJar/smi_dicts_2018_9km.pkl', 'rb') as fp:
    files_dict = pickle.load(fp)

In [6]:
files_dict

{'aqua_chl_mc': {'Jan': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20030012011031.L3m_MC_CHL_chlor_a_9km.nc'),
  'Feb': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20030322011059.L3m_MC_CHL_chlor_a_9km.nc'),
  'Mar': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20030602011090.L3m_MC_CHL_chlor_a_9km.nc'),
  'Apr': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20030912011120.L3m_MC_CHL_chlor_a_9km.nc'),
  'May': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20031212011151.L3m_MC_CHL_chlor_a_9km.nc'),
  'Jun': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20031522011181.L3m_MC_CHL_chlor_a_9km.nc'),
  'Jul': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20031822011212.L3m_MC_CHL_chlor_a_9km.nc'),
  'Aug': PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/chlor_a/A20032132011243.L3m_MC_CHL_chlor_a_9km.nc'),
  'Sep': PosixPath('/accounts/ekarakoy/DA

<u>Compute anomalies and store</u>

Anomaly data is stored in dictionaries of dictionaries with sensor (aqua/viirs) as higher key, and 
 - '[product]_anom', 
 - 'lat', 
 - 'lon', 
 - 'time' 
 
 as lower keys.</u> 

In [8]:
chl_anom_dict = dict(viirs={}, aqua={})
months_available = len(files_dict['aqua_chl_mo'].keys())
chl_anom_dict['aqua'] = get_monthly_anoms(files_dict['aqua_chl_mc'],
                                          files_dict['aqua_chl_mo'],
                                         year=current_year, time_dim=months_available)

HBox(children=(IntProgress(value=0, max=9), HTML(value='')))




In [26]:
months_available = len(files_dict['viirs_chl_mo'].keys())
chl_anom_dict['viirs'] = get_monthly_anoms(files_dict['aqua_chl_mc'],
                                           files_dict['viirs_chl_mo'],
                                          year=current_year, time_dim=months_available)

HBox(children=(IntProgress(value=0, max=9), HTML(value='')))

In [27]:
months_available = len(files_dict['aqua_sst_mo'].keys())
sst_anom_dict = get_monthly_anoms(files_dict['aqua_sst_mc'],
                                  mo_dict=files_dict['aqua_sst_mo'],
                                  prod='sst', year=current_year,
                                 time_dim=months_available)

HBox(children=(IntProgress(value=0, max=9), HTML(value='')))

<u>Consistency tests</u>

In [28]:
npt.assert_array_equal(sst_anom_dict['time'], chl_anom_dict['aqua']['time'])

In [29]:
npt.assert_array_equal(sst_anom_dict['lat'], chl_anom_dict['aqua']['lat'])

In [30]:
npt.assert_array_equal(sst_anom_dict['lon'], sst_anom_dict['lon'])

<u>Stick anomalies in xarray [dataarrays](http://xarray.pydata.org/en/stable/data-structures.html#dataarray) and dataarrays into an xarray [dataset](http://xarray.pydata.org/en/stable/data-structures.html#dataset)</u>

In [31]:
chl_anom_dict['aqua']['chlor_a_anom'].shape

(9, 2160, 4320)

In [32]:
xr_aqua_chl_anom = make_dataarray(chl_anom_dict['aqua'], prod='chlor_a_anom', units='%')
xr_viirs_chl_anom = make_dataarray(chl_anom_dict['viirs'], prod='chlor_a_anom', units='%')
xr_sst_anom = make_dataarray(sst_anom_dict, prod='sst_anom', units='deg C')

In [33]:
ds_anom = xr.Dataset({'aqua_chl_anom': xr_aqua_chl_anom,
                      'viirs_chl_anom': xr_viirs_chl_anom,
                         'aqua_sst_anom': xr_sst_anom,
                        })

Compute yearly mean and add into dataset

In [34]:
ds_anom['time_avg_aqua_chl_anom'] = ds_anom['aqua_chl_anom'].mean(axis=0)
ds_anom['time_avg_viirs_chl_anom'] = ds_anom['viirs_chl_anom'].mean(axis=0)
ds_anom['time_avg_sst_anom'] = ds_anom.aqua_sst_anom.mean(axis=0)

Compute SST/CHL sign relation


In [35]:
aqua_chl_anom_mean = chl_anom_dict['aqua']['chlor_a_anom'].mean(axis=0)
viirs_chl_anom_mean = chl_anom_dict['viirs']['chlor_a_anom'].mean(axis=0)
sst_anom_mean = sst_anom_dict['sst_anom'].mean(axis=0)

In [36]:
aqua_chl_sst_anom_sign = make_anom_sign(aqua_chl_anom_mean, sst_anom_mean)
viirs_chl_sst_anom_sign = make_anom_sign(viirs_chl_anom_mean, sst_anom_mean)

In [37]:
aqua_chl_sst_anom_dict = dict(aqua_chl_sst_anom_sign=aqua_chl_sst_anom_sign,
                         lat=chl_anom_dict['aqua']['lat'], lon=chl_anom_dict['aqua']['lon'])
xr_aqua_chl_sst_anom_sign = make_dataarray(aqua_chl_sst_anom_dict, prod='aqua_chl_sst_anom_sign',
                                     coords=['lat', 'lon'])

viirs_chl_sst_anom_dict = dict(viirs_chl_sst_anom_sign=viirs_chl_sst_anom_sign,
                         lat=chl_anom_dict['viirs']['lat'], lon=chl_anom_dict['viirs']['lon'])
xr_viirs_chl_sst_anom_sign = make_dataarray(viirs_chl_sst_anom_dict, prod='viirs_chl_sst_anom_sign',
                                     coords=['lat', 'lon'])

In [38]:
ds_anom['aqua_chl_sst_anom_sign'] = xr_aqua_chl_sst_anom_sign
ds_anom['viirs_chl_sst_anom_sign'] = xr_viirs_chl_sst_anom_sign

Inspect dataset

In [39]:
ds_anom

<xarray.Dataset>
Dimensions:                  (lat: 2160, lon: 4320, time: 9)
Coordinates:
  * time                     (time) datetime64[ns] 2018-01-01 2018-02-01 ...
  * lat                      (lat) float32 89.958336 89.875 89.79167 ...
  * lon                      (lon) float32 -179.95833 -179.875 -179.79166 ...
Data variables:
    aqua_chl_anom            (time, lat, lon) float64 nan nan nan nan nan ...
    viirs_chl_anom           (time, lat, lon) float64 nan nan nan nan nan ...
    aqua_sst_anom            (time, lat, lon) float64 nan nan nan nan nan ...
    time_avg_aqua_chl_anom   (lat, lon) float64 nan nan nan nan nan nan nan ...
    time_avg_viirs_chl_anom  (lat, lon) float64 nan nan nan nan nan nan nan ...
    time_avg_sst_anom        (lat, lon) float64 nan nan nan nan nan nan nan ...
    aqua_chl_sst_anom_sign   (lat, lon) float64 nan nan nan nan nan nan nan ...
    viirs_chl_sst_anom_sign  (lat, lon) float64 nan nan nan nan nan nan nan ...

Save to netCDF4

In [40]:
ds_anom.to_netcdf('./xr_anom_2018.nc')