This notebook shows how to put the data together into a coherent structure before creating the CHL, CHL anomay, SST anomaly and CHL-SST anomaly sign relation maps. The specific operations described here include:
* Build lists containing full_paths of NC files; one list per sensor (aqua/viirs) and type (monthly/climatology)
* Build Dictionaries of .NC files keyed by month
* Collect all dictionaries in a single dictionary
* Pickle (serialize) the dictionary into a binary file

In [1]:
import pathlib
import re
from datetime import datetime as dt
import pickle
from IPython.core.display import HTML, display
import os

In [2]:
%matplotlib inline
display(HTML("<style>.container {width: 90% !important}</style>"))

In [3]:
# HELPER FUNCTIONS

def get_string(res, return_str=True):
    """Converts datetime string with year and doy to abbreviated month, day and year format"""
    res_str_list = [dt.strptime(resi, '%Y%j').strftime('%b-%d-%Y') for resi in res]
    if return_str:
        return f'{res_str_list[0]} - {res_str_list[1]}'
    else:
        return res_str_list


def build_monthly_dict(file_list, verbose=True):
    """Build dictionaries from lists of climatologies and monthlies"""
    # regex to retrieve date range from filenames
    date_pat = re.compile(r'[AV]?(\d{7})')
    # loop in parallel through monthlies and climatology lists
    # both lists were sorted to ensure corresponding months
    monthly_dict = {}
    for file in file_list:
        # extract list of dates beginning and ending dates  in year and doy format
        #     from filename.
        dates = date_pat.findall(file.as_posix())
        # get convert date range strings to abbrev. month, day, year
        date_rng_str = get_string(dates)
        # assert that both dates have same abbrev month:
        assert date_rng_str.count(date_rng_str[:3]) == 2
        # set dictionary key as abbreviated month name
        monthly_dict[date_rng_str[:3]] = file
        if verbose:
            print(f' {date_rng_str} => {file.as_posix()}')
    return monthly_dict

In [5]:
home = pathlib.Path.home()
data_main_dir = home / 'DATA/SOC/SOC_2018'
aqua_mc_dir = data_main_dir / 'Aqua_MC'
aqua_mo_dir = data_main_dir / 'Aqua_MO'
aqua_mc_chl_dir = aqua_mc_dir / 'CHLOR_A_2003_2017'
aqua_mc_sst_dir = aqua_mc_dir / 'SST_2003_2017'
aqua_mc_bbp_dir = aqua_mc_dir / 'BBP_443_GIOP_2003_2017'
aqua_mo_chl_dir = aqua_mo_dir / 'chlor_a'
aqua_mo_sst_dir = aqua_mo_dir / 'sst'
aqua_mo_bbp_dir = aqua_mo_dir / 'bbp_443_giop/test_data'

In [6]:
assert os.path.exists(aqua_mc_chl_dir)
assert os.path.exists(aqua_mc_sst_dir)
assert os.path.exists(aqua_mc_bbp_dir)
assert os.path.exists(aqua_mo_chl_dir)
assert os.path.exists(aqua_mo_sst_dir)
assert os.path.exists(aqua_mo_bbp_dir)

<u>Build lists of paths</u>

In [11]:
# sst climatology from aqua
aqua_mc_sst_list = sorted([file for file in aqua_mc_sst_dir.glob('*L3m*')])
# chl climatology from aqua
aqua_mc_chl_list = sorted([file for file in aqua_mc_chl_dir.glob('*L3m*')])
# bbp climatology from aqua
aqua_mc_bbp_list = sorted([file for file in aqua_mc_bbp_dir.glob('*L3m*')])
# sst monthlies from aqua
aqua_mo_sst_list = sorted([file for file in aqua_mo_sst_dir.glob('*MO*')])
# chl monthlies from aqua
aqua_mo_chl_list = sorted([file for file in aqua_mo_chl_dir.glob('*MO*')])
# bbp monthlies from aqua
aqua_mo_bbp_list = sorted([file for file in aqua_mo_bbp_dir.glob('*MO*')])

In [12]:
aqua_mo_bbp_list

[PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180012018031.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180322018059.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180602018090.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180912018120.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181212018151.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181522018181.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181822018212.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20182132018243.L3m_MO_bbp_443_giop_9k

<u>Build dictionaries of NC files</u>

verbose option is on by default for visual inspection

In [13]:
# Aqua chl climatology dictionary
a_mc_chl_dict = build_monthly_dict(aqua_mc_chl_list)

 Jan-01-2003 - Jan-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20030012017031.L3m_MC_chl_9km.nc
 Feb-01-2003 - Feb-28-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20030322017059.L3m_MC_chl_9km.nc
 Mar-01-2003 - Mar-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20030602017090.L3m_MC_chl_9km.nc
 Apr-01-2003 - Apr-30-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20030912017120.L3m_MC_chl_9km.nc
 May-01-2003 - May-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20031212017151.L3m_MC_chl_9km.nc
 Jun-01-2003 - Jun-30-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20031522017181.L3m_MC_chl_9km.nc
 Jul-01-2003 - Jul-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20031822017212.L3m_MC_chl_9km.nc
 Aug-01-2003 - Aug-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/CHLOR_A_2003_2017/A20032132017243.L3

In [14]:
# Aqua chl monthlies
a_mo_chl_dict = build_monthly_dict(aqua_mo_chl_list)

 Jan-01-2018 - Jan-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20180012018031.L3m_MO_CHL_chlor_a_9km.nc
 Feb-01-2018 - Feb-28-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20180322018059.L3m_MO_CHL_chlor_a_9km.nc
 Mar-01-2018 - Mar-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20180602018090.L3m_MO_CHL_chlor_a_9km.nc
 Apr-01-2018 - Apr-30-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20180912018120.L3m_MO_CHL_chlor_a_9km.nc
 May-01-2018 - May-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20181212018151.L3m_MO_CHL_chlor_a_9km.nc
 Jun-01-2018 - Jun-30-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20181522018181.L3m_MO_CHL_chlor_a_9km.nc
 Jul-01-2018 - Jul-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20181822018212.L3m_MO_CHL_chlor_a_9km.nc
 Aug-01-2018 - Aug-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/chlor_a/A20182132018243.L3m_MO_CHL_chlor_a_9km.nc


In [15]:
# Aqua bbp climatology
a_mc_bbp_dict = build_monthly_dict(aqua_mc_bbp_list)

 Jan-01-2003 - Jan-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/BBP_443_GIOP_2003_2017/A20030012017031.L3m_MC_bbp_443_giop_9km.nc
 Feb-01-2003 - Feb-28-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/BBP_443_GIOP_2003_2017/A20030322017059.L3m_MC_bbp_443_giop_9km.nc
 Mar-01-2003 - Mar-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/BBP_443_GIOP_2003_2017/A20030602017090.L3m_MC_bbp_443_giop_9km.nc
 Apr-01-2003 - Apr-30-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/BBP_443_GIOP_2003_2017/A20030912017120.L3m_MC_bbp_443_giop_9km.nc
 May-01-2003 - May-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/BBP_443_GIOP_2003_2017/A20031212017151.L3m_MC_bbp_443_giop_9km.nc
 Jun-01-2003 - Jun-30-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/BBP_443_GIOP_2003_2017/A20031522017181.L3m_MC_bbp_443_giop_9km.nc
 Jul-01-2003 - Jul-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/BBP_443_GIOP_2003_2017/A20031822017212.L3m_MC_bbp_443_giop_9km.nc
 Aug-01-2003 

In [16]:
aqua_mo_bbp_list

[PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180012018031.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180322018059.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180602018090.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180912018120.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181212018151.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181522018181.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181822018212.L3m_MO_bbp_443_giop_9km.nc'),
 PosixPath('/accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20182132018243.L3m_MO_bbp_443_giop_9k

In [17]:
# Pairing Aqua bbp climatology with aqua bbp monthlies
a_mo_bbp_dict = build_monthly_dict(aqua_mo_bbp_list)

 Jan-01-2018 - Jan-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180012018031.L3m_MO_bbp_443_giop_9km.nc
 Feb-01-2018 - Feb-28-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180322018059.L3m_MO_bbp_443_giop_9km.nc
 Mar-01-2018 - Mar-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180602018090.L3m_MO_bbp_443_giop_9km.nc
 Apr-01-2018 - Apr-30-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20180912018120.L3m_MO_bbp_443_giop_9km.nc
 May-01-2018 - May-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181212018151.L3m_MO_bbp_443_giop_9km.nc
 Jun-01-2018 - Jun-30-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181522018181.L3m_MO_bbp_443_giop_9km.nc
 Jul-01-2018 - Jul-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/bbp_443_giop/test_data/A20181822018212.L3m_MO_bbp_443_giop_9km.nc
 Aug-01-2018 

In [18]:
# Aqua SST climatology
a_mc_sst_dict = build_monthly_dict(aqua_mc_sst_list)

 Jan-01-2003 - Jan-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20030012017031.L3m_MC_sst_9km.nc
 Feb-01-2003 - Feb-28-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20030322017059.L3m_MC_sst_9km.nc
 Mar-01-2003 - Mar-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20030602017090.L3m_MC_sst_9km.nc
 Apr-01-2003 - Apr-30-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20030912017120.L3m_MC_sst_9km.nc
 May-01-2003 - May-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20031212017151.L3m_MC_sst_9km.nc
 Jun-01-2003 - Jun-30-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20031522017181.L3m_MC_sst_9km.nc
 Jul-01-2003 - Jul-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20031822017212.L3m_MC_sst_9km.nc
 Aug-01-2003 - Aug-31-2017 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MC/SST_2003_2017/A20032132017243.L3m_MC_sst_9km.nc
 Sep-01-2003 - S

In [19]:
# Pairing aqua sst climatology with aqua sst monthlies
a_mo_sst_dict = build_monthly_dict(aqua_mo_sst_list)

 Jan-01-2018 - Jan-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20180012018031.L3m_MO_SST_sst_9km.nc
 Feb-01-2018 - Feb-28-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20180322018059.L3m_MO_SST_sst_9km.nc
 Mar-01-2018 - Mar-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20180602018090.L3m_MO_SST_sst_9km.nc
 Apr-01-2018 - Apr-30-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20180912018120.L3m_MO_SST_sst_9km.nc
 May-01-2018 - May-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20181212018151.L3m_MO_SST_sst_9km.nc
 Jun-01-2018 - Jun-30-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20181522018181.L3m_MO_SST_sst_9km.nc
 Jul-01-2018 - Jul-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20181822018212.L3m_MO_SST_sst_9km.nc
 Aug-01-2018 - Aug-31-2018 => /accounts/ekarakoy/DATA/SOC/SOC_2018/Aqua_MO/sst/A20182132018243.L3m_MO_SST_sst_9km.nc
 Sep-01-2018 - Sep-30-2018 => /accounts/ekarakoy/DATA/SOC/SOC_20

Use the code above to make a dictionary for both Aqua_MC and VIIRS_MO, both of which keyed by month

In [20]:
with open('../PklJar/smi_dicts_2018_9km_test_MC_2003_2017.pkl' , 'wb') as fp:
    smi_dict = {'aqua_chl_mc': a_mc_chl_dict,
                'aqua_chl_mo': a_mo_chl_dict,
                'aqua_sst_mc': a_mc_sst_dict,
                'aqua_sst_mo': a_mo_sst_dict,
                'aqua_bbp_mc': a_mc_bbp_dict,
                'aqua_bbp_mo': a_mo_bbp_dict,
             }
    pickle.dump(smi_dict, fp, protocol=pickle.HIGHEST_PROTOCOL)