## Preprocess OISST data into seasonal means using the seasons defined in Danielson et al. 2020
Build monthly means for each year of available OISST data.

In [1]:
import warnings
warnings.filterwarnings('ignore')
from glob import glob

## Set in paths

In [2]:
# NCEI_OISST base
base = '/uda/NCEI_OISST/AVHRR-only/v2.1/'
# year directories
dir_yrs = glob(f'{base}*/')

In [None]:
DS_sst.groupby(DS_sst.time.dt.month.isin((10,11,12))).mean().sel(month=True)

## Preprocess algorithm

In [3]:
def preproc_OISST_to_monthly(in_dir):
    # import pkgs
    import xarray as xr
    from dask.diagnostics import ProgressBar
    import numpy as np
    
    # monthly means out
    out_dir = '/work/Marion.Alberty/data/OISST_pp/'
    
    # Time average and write data for each year
    for i in np.arange(len(in_dir)):
        with ProgressBar():
            # Get year string
            year = in_dir[i][-5:-1]
            
            # Get paths for year directory
            flist = glob(f'{in_dir[i]}*.nc')
            
            # Read in the year of data
            ds = xr.open_mfdataset(flist,use_cftime=True,engine='netcdf4'
                                  ).squeeze().chunk({'time':25})['sst']
            
            # Make time coord
            time = ds.time.groupby('time.month').mean().values
            
            # Extract one year and get monthly average
            tmp = ds.groupby('time.month').mean().chunk(
                {'lat':721,'lon':1440,'month':12}).assign_coords(
                {"time": ("month", time)}).swap_dims({'month': 'time'})
            
            # Output to netcdf
            tmp.to_netcdf(out_dir + 'NCEI_OISST_monthly_SST_' + year + '.nc')

## Set up DASK cluster
In dask labextension, create a +NEW cluster. Click the < > widget, scale it to 16 workers and let's go!

## Process and write variables

In [4]:
preproc_OISST_to_monthly(dir_yrs)

[########################################] | 100% Completed |  5.5s
[########################################] | 100% Completed | 21.2s
[########################################] | 100% Completed | 17.9s
[########################################] | 100% Completed | 16.7s
[########################################] | 100% Completed | 17.1s
[########################################] | 100% Completed | 17.1s
[########################################] | 100% Completed | 20.1s
[########################################] | 100% Completed | 20.3s
[########################################] | 100% Completed | 21.6s
[########################################] | 100% Completed | 19.2s
[########################################] | 100% Completed | 20.5s
[########################################] | 100% Completed | 26.5s
[########################################] | 100% Completed | 22.7s
[########################################] | 100% Completed | 21.5s
[########################################] | 100