This notebook shows how to put the data together into a coherent structure before creating the CHL, CHL anomay, SST anomaly and CHL-SST anomaly sign relation maps. The specific operations described here include:
    * Build lists containing full_paths of NC files; one list per sensor (aqua/viirs) and type (monthly/climatology)
    * Build Dictionaries of .NC files keyed by month
    * Collect all dictionaries in a single dictionary
    * Pickle (serialize) the dictionary into a binary file

In [5]:
import pathlib
import re
from datetime import datetime as dt
import pickle
from IPython.core.display import HTML, display

In [6]:
%matplotlib inline
display(HTML("<style>.container {width: 90% !important}</style>"))

In [30]:
# HELPER FUNCTIONS

def get_string(res, return_str=True):
    """Converts datetime string with year and doy to abbreviated month, day and year format"""
    res_str_list = [dt.strptime(resi, '%Y%j').strftime('%b-%d-%Y') for resi in res]
    if return_str:
        return f'{res_str_list[0]} - {res_str_list[1]}'
    else:
        return res_str_list


def build_monthly_dict(file_list, verbose=True):
    """Build dictionaries from lists of climatologies and monthlies"""
    # regex to retrieve date range from filenames
    date_pat = re.compile(r'[AV]?(\d{7})')
    # loop in parallel through monthlies and climatology lists
    # both lists were sorted to ensure corresponding months
    monthly_dict = {}
    for file in file_list:
        # extract list of dates beginning and ending dates  in year and doy format
        #     from filename.
        dates = date_pat.findall(file.as_posix())
        # get convert date range strings to abbrev. month, day, year
        date_rng_str = get_string(dates)
        # assert that both dates have same abbrev month:
        assert date_rng_str.count(date_rng_str[:3]) == 2
        # set dictionary key as abbreviated month name
        monthly_dict[date_rng_str[:3]] = file
        if verbose:
            print(f' {date_rng_str} => {file.as_posix()}')
    return monthly_dict

In [7]:
home = pathlib.Path.home()
data_main_dir = home / 'DATA/SOC'
aqua_dir = data_main_dir / 'Aqua_9km'
viirs_dir = data_main_dir / 'VIIRS_9km'
aqua_chl_dir = aqua_dir / 'chlor_a'
aqua_sst_dir = aqua_dir / 'sst'
viirs_chl_dir = viirs_dir / 'chlor_a'
viirs_sst_dir = viirs_dir / 'sst'

<u>Build lists of paths</u>

In [8]:
# sst climatology from aqua
aqua_mc_sst_list = sorted([file for file in aqua_sst_dir.glob('*MC*')])
# chl climatology from aqua
aqua_mc_chl_list = sorted([file for file in aqua_chl_dir.glob('*MC*')])
# sst monthlies from aqua
aqua_mo_sst_list = sorted([file for file in aqua_sst_dir.glob('*MO*')])
# chl monthlies from aqua
aqua_mo_chl_list = sorted([file for file in aqua_chl_dir.glob('*MO*')])
# chl monthlies from viirs
viirs_mo_chl_list = sorted([file for file in viirs_chl_dir.glob('*MO*')])

In [9]:
aqua_mc_sst_list

[PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030012011031.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030322011059.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030602011090.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030912011120.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20031212011151.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20031522011181.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20031822011212.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20032132011243.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20032442011273.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20032742011304.L3m_MC_SST_sst_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A200330

<u>Build dictionaries of NC files</u>

verbose option is on by default for visual inspection

In [35]:
# Aqua chl climatology dictionary
a_mc_chl_dict = build_monthly_dict(aqua_mc_chl_list)

 Jan-01-2003 - Jan-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20030012011031.L3m_MC_CHL_chlor_a_9km.nc
 Feb-01-2003 - Feb-28-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20030322011059.L3m_MC_CHL_chlor_a_9km.nc
 Mar-01-2003 - Mar-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20030602011090.L3m_MC_CHL_chlor_a_9km.nc
 Apr-01-2003 - Apr-30-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20030912011120.L3m_MC_CHL_chlor_a_9km.nc
 May-01-2003 - May-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20031212011151.L3m_MC_CHL_chlor_a_9km.nc
 Jun-01-2003 - Jun-30-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20031522011181.L3m_MC_CHL_chlor_a_9km.nc
 Jul-01-2003 - Jul-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20031822011212.L3m_MC_CHL_chlor_a_9km.nc
 Aug-01-2003 - Aug-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20032132011243.L3m_MC_CHL_chlor_a_9km.nc
 Sep-01-2003 - Sep-30-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9

In [36]:
# Viirs chl monthlies
v_mo_chl_dict = build_monthly_dict(viirs_mo_chl_list)

 Jan-01-2017 - Jan-31-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20170012017031.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 Feb-01-2017 - Feb-28-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20170322017059.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 Mar-01-2017 - Mar-31-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20170602017090.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 Apr-01-2017 - Apr-30-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20170912017120.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 May-01-2017 - May-31-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20171212017151.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 Jun-01-2017 - Jun-30-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20171522017181.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 Jul-01-2017 - Jul-31-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20171822017212.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 Aug-01-2017 - Aug-31-2017 => /accounts/ekarakoy/DATA/SOC/VIIRS_9km/chlor_a/V20172132017243.L3m_MO_SNPP_CHL_chlor_a_9km.nc
 Sep-01-2017 - S

In [37]:
# Aqua chl monthlies
a_mo_chl_dict = build_monthly_dict(aqua_mo_chl_list)

 Jan-01-2017 - Jan-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20170012017031.L3m_MO_CHL_chlor_a_9km.nc
 Feb-01-2017 - Feb-28-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20170322017059.L3m_MO_CHL_chlor_a_9km.nc
 Mar-01-2017 - Mar-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20170602017090.L3m_MO_CHL_chlor_a_9km.nc
 Apr-01-2017 - Apr-30-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20170912017120.L3m_MO_CHL_chlor_a_9km.nc
 May-01-2017 - May-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20171212017151.L3m_MO_CHL_chlor_a_9km.nc
 Jun-01-2017 - Jun-30-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20171522017181.L3m_MO_CHL_chlor_a_9km.nc
 Jul-01-2017 - Jul-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20171822017212.L3m_MO_CHL_chlor_a_9km.nc
 Aug-01-2017 - Aug-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/chlor_a/A20172132017243.L3m_MO_CHL_chlor_a_9km.nc
 Sep-01-2017 - Sep-30-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9

In [38]:
# Aqua SST climatology
a_mc_sst_dict = build_monthly_dict(aqua_mc_sst_list)

 Jan-01-2003 - Jan-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030012011031.L3m_MC_SST_sst_9km.nc
 Feb-01-2003 - Feb-28-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030322011059.L3m_MC_SST_sst_9km.nc
 Mar-01-2003 - Mar-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030602011090.L3m_MC_SST_sst_9km.nc
 Apr-01-2003 - Apr-30-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20030912011120.L3m_MC_SST_sst_9km.nc
 May-01-2003 - May-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20031212011151.L3m_MC_SST_sst_9km.nc
 Jun-01-2003 - Jun-30-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20031522011181.L3m_MC_SST_sst_9km.nc
 Jul-01-2003 - Jul-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20031822011212.L3m_MC_SST_sst_9km.nc
 Aug-01-2003 - Aug-31-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20032132011243.L3m_MC_SST_sst_9km.nc
 Sep-01-2003 - Sep-30-2011 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20032442011273.L3m_MC_SST_sst_9km.nc
 Oct-01-2003 - Oct-

In [39]:
# Pairing aqua sst climatology with aqua sst monthlies
a_mo_sst_dict = build_monthly_dict(aqua_mo_sst_list)

 Jan-01-2017 - Jan-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20170012017031.L3m_MO_SST_sst_9km.nc
 Feb-01-2017 - Feb-28-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20170322017059.L3m_MO_SST_sst_9km.nc
 Mar-01-2017 - Mar-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20170602017090.L3m_MO_SST_sst_9km.nc
 Apr-01-2017 - Apr-30-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20170912017120.L3m_MO_SST_sst_9km.nc
 May-01-2017 - May-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20171212017151.L3m_MO_SST_sst_9km.nc
 Jun-01-2017 - Jun-30-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20171522017181.L3m_MO_SST_sst_9km.nc
 Jul-01-2017 - Jul-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20171822017212.L3m_MO_SST_sst_9km.nc
 Aug-01-2017 - Aug-31-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20172132017243.L3m_MO_SST_sst_9km.nc
 Sep-01-2017 - Sep-30-2017 => /accounts/ekarakoy/DATA/SOC/Aqua_9km/sst/A20172442017273.L3m_MO_SST_sst_9km.nc
 Oct-01-2017 - Oct-

Use the code above to make a dictionary for both Aqua_MC and VIIRS_MO, both of which keyed by month

In [14]:
with open('../PklJar/smi_dicts_2017_9km.pkl' , 'wb') as fp:
    smi_dict = {'aqua_chl_mc': a_mc_chl_dict,
             'viirs_chl_mo': v_mo_chl_dict,
             'aqua_chl_mo': a_mo_chl_dict,
              'aqua_sst_mc': a_mc_sst_dict,
              'aqua_sst_mo': a_mo_sst_dict,
             }
    pickle.dump(smi_dict, fp, protocol=pickle.HIGHEST_PROTOCOL)