# Introduction

This notebook does not run in **binder**.

The OOI data system features an extension called Data Explorer. This sub-system is intended to 
facilitate data exploration and use. For more detail see notebook **Ocean 01 B**. This notebook 
cleans up Level 1+ NetCDF files obtained from Data Explorer. The results are cleanly sampled at 
one-minute ("1Min") sample spacing.


In [1]:
import os, sys, time, glob, warnings
from IPython.display import clear_output             # use inside loop with clear_output(wait = True) followed by print(i)
warnings.filterwarnings('ignore')
this_dir = os.getcwd()
data_dir = this_dir + '/../data'

from matplotlib import pyplot as plt
from matplotlib import colors as mplcolors
import numpy as np, pandas as pd, xarray as xr
from numpy import datetime64 as dt64, timedelta64 as td64

# convenience functions abbreviating 'datetime64' and so on
def doy(theDatetime): return 1 + int((theDatetime - dt64(str(theDatetime)[0:4] + '-01-01')) / td64(1, 'D'))
def dt64_from_doy(year, doy): return dt64(str(year) + '-01-01') + td64(doy-1, 'D')
def day_of_month_to_string(d): return str(d) if d > 9 else '0' + str(d)

print('\nJupyter Notebook running Python {}'.format(sys.version_info[0]))


Jupyter Notebook running Python 3


In [2]:
%%timeit

############################################################
# shallow profiler dive timestamp generator
############################################################
# Datasets extend over the full program from 2014 to download date Aug 2021.
# At first look they appear sampled at 1/minute but there are anomalies; 
#   so to have more confidence this code resamples them at "1Min".

def StandardizeNetCDFDataset(source_location, datafile):
    ds   = xr.open_dataset(source_location + datafile)
    ds   = ds.set_coords("time")
    ds   = ds.swap_dims({"row":"time"})
    df   = ds.to_dataframe().resample("1Min").mean()
    vals = [xr.DataArray(data=df[c], dims=['time'], coords={'time':df.index}, attrs=ds[c].attrs) for c in df.columns]
    return xr.Dataset(dict(zip(df.columns, vals)), attrs=ds.attrs)

drive_dir = '/mnt/d/'
data_root = drive_dir + '/data/data_ooi_data_explorer/'
out_root  = drive_dir + '/data/data_explorer_1Min/'
sitekeys  = ['/axb/', '/oos/', '/osb/']
framekeys = ['/profiler/', '/platform/']
n_sites   = len(sitekeys)
n_frames  = len(framekeys)

for i in range(n_sites):
    for j in range(n_frames):
        this_data_path = data_root + sitekeys[i] + framekeys[j]
        this_out_path  = out_root + sitekeys[i] + framekeys[j]
        possible_datafiles = os.listdir(this_data_path)
        datafiles = []
        for poss in possible_datafiles:
            if poss.split('.')[1] == 'nc': datafiles.append(poss)      
        for datafile in datafiles:
            ds      = StandardizeNetCDFDataset(this_data_path, datafile)
            outfile = this_out_path + datafile.split('.')[0] + '_1Min.nc'
            ds.to_netcdf(outfile)
            print(outfile, ds.time.shape[0])


OSError: [Errno 22] Invalid argument: '/mnt/d//data/data_ooi_data_explorer//axb//profiler/'