# Introduction

This notebook does not run in **binder**.

The OOI data system features an extension called Data Explorer. This sub-system is intended to 
facilitate data exploration and use. For more detail see notebook **Ocean 01 B**. This notebook 
cleans up Level 1+ NetCDF files obtained from Data Explorer. The results are cleanly sampled at 
one-minute ("1Min") sample spacing.


In [1]:
import os, sys, time, glob, warnings
from IPython.display import clear_output             # use inside loop with clear_output(wait = True) followed by print(i)
warnings.filterwarnings('ignore')
this_dir = os.getcwd()
data_dir = this_dir + '/../../data'                  # large datasets reside outside the repository

from matplotlib import pyplot as plt
from matplotlib import colors as mplcolors
import numpy as np, pandas as pd, xarray as xr
from numpy import datetime64 as dt64, timedelta64 as td64

# convenience functions abbreviating 'datetime64' and so on
def doy(theDatetime): return 1 + int((theDatetime - dt64(str(theDatetime)[0:4] + '-01-01')) / td64(1, 'D'))
def dt64_from_doy(year, doy): return dt64(str(year) + '-01-01') + td64(doy-1, 'D')
def day_of_month_to_string(d): return str(d) if d > 9 else '0' + str(d)

print('\nJupyter Notebook running Python {}'.format(sys.version_info[0]))


Jupyter Notebook running Python 3


In [2]:
############################################################
# shallow profiler dive timestamp generator
############################################################
# Datasets extend over the full program from 2014 to download date Aug 2021.
# At first look they appear sampled at 1/minute but there are anomalies; 
#   so to have more confidence this code resamples them at "1Min".

def StandardizeNetCDFDataset(source_location, datafile):
    ds   = xr.open_dataset(source_location + datafile)
    ds   = ds.set_coords("time")
    ds   = ds.swap_dims({"row":"time"})
    df   = ds.to_dataframe().resample("1Min").mean()
    vals = [xr.DataArray(data=df[c], dims=['time'], coords={'time':df.index}, attrs=ds[c].attrs) for c in df.columns]
    return xr.Dataset(dict(zip(df.columns, vals)), attrs=ds.attrs)

data_root = os.getenv("HOME") + '/data/data_ooi_data_explorer/'
out_root  = os.getenv("HOME") + '/data/data_explorer_1Min/'
sitekeys  = ['/axb/', '/oos/', '/osb/']
framekeys = ['/profiler/', '/platform/']
n_sites   = len(sitekeys)
n_frames  = len(framekeys)

for i in range(n_sites):
    for j in range(n_frames):
        this_data_path = data_root + sitekeys[i] + framekeys[j]
        this_out_path  = out_root + sitekeys[i] + framekeys[j]
        possible_datafiles = os.listdir(this_data_path)
        datafiles = []
        for poss in possible_datafiles:
            if poss.split('.')[1] == 'nc': datafiles.append(poss)      
        for datafile in datafiles:
            ds      = StandardizeNetCDFDataset(this_data_path, datafile)
            outfile = this_out_path + datafile.split('.')[0] + '_1Min.nc'
            ds.to_netcdf(outfile)
            print(datafile, '>', outfile, 'with size', ds.time.shape[0])


axb_profiler_backscatter.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_backscatter_1Min.nc with size 3608277
axb_profiler_cdom.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_cdom_1Min.nc with size 3608277
axb_profiler_chlora.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_chlora_1Min.nc with size 3608277
axb_profiler_density.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_density_1Min.nc with size 3608283
axb_profiler_doxygen.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_doxygen_1Min.nc with size 3608283
axb_profiler_nitrate.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_nitrate_1Min.nc with size 3607853
axb_profiler_par.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_par_1Min.nc with size 3598972
axb_profiler_pco2.nc > /mnt/d//data/data_explorer_1Min//axb//profiler/axb_profiler_pco2_1Min.nc with size 3607990
axb_profiler_ph.nc > /mnt/d//data/data_explorer_1Min//

osb_profiler_velup.nc > /mnt/d//data/data_explorer_1Min//osb//profiler/osb_profiler_velup_1Min.nc with size 3588508
osb_platform_backscatter.nc > /mnt/d//data/data_explorer_1Min//osb//platform/osb_platform_backscatter_1Min.nc with size 3594819
osb_platform_cdom.nc > /mnt/d//data/data_explorer_1Min//osb//platform/osb_platform_cdom_1Min.nc with size 3594819
osb_platform_chlora.nc > /mnt/d//data/data_explorer_1Min//osb//platform/osb_platform_chlora_1Min.nc with size 3594819
osb_platform_density.nc > /mnt/d//data/data_explorer_1Min//osb//platform/osb_platform_density_1Min.nc with size 3594340
osb_platform_doxygen.nc > /mnt/d//data/data_explorer_1Min//osb//platform/osb_platform_doxygen_1Min.nc with size 3594340
osb_platform_doxygen_seawater_CTD.nc > /mnt/d//data/data_explorer_1Min//osb//platform/osb_platform_doxygen_seawater_CTD_1Min.nc with size 3594340
osb_platform_doxygen_seawater_DO.nc > /mnt/d//data/data_explorer_1Min//osb//platform/osb_platform_doxygen_seawater_DO_1Min.nc with size 14