## Demo: Stage2 processing for mooring data

- Apply clock offsets
- Trim to deployment period

In [None]:
import os

import yaml
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from datetime import datetime
import pandas as pd
from ctd_tools.writers import NetCdfWriter
from oceanarray import writers

moorlist = ['ds2_X_2012','ds2_X_2017','ds2_X_2018',
            'ds8_1_2012','ds9_1_2012','ds10_1_2012', 'ds11_1_2012','ds12_1_2012',
            'ds13_1_2012','ds14_1_2012','ds15_1_2012','ds16_1_2012','ds17_1_2012',
            'ds19_1_2012','ds18_1_2012','ds28_1_2017',
            'dsA_1_2018','dsB_1_2018','dsC_1_2018', 'dsD_1_2018','dsE_1_2018','dsF_1_2018',
            'dsM1_1_2017','dsM2_1_2017','dsM3_1_2017','dsM4_1_2017','dsM5_1_2017']
moorlist = ['dsE_1_2018']

In [None]:
# Specify the base directory.  raw is a subdirectory from here moor/raw/ and proc is moor/proc
basedir = '/Users/eddifying/Dropbox/data/ifmro_mixsed/ds_data_eleanor/'
output_path = basedir + 'moor/proc/'



def read_yaml_time(data, key):
    """Return datetime64[ns] from YAML dict or NaT if missing/invalid."""
    val = data.get(key, None)
    print(val)
    if val is None or (isinstance(val, str) and not val.strip()):
        return np.datetime64("NaT", "ns")
    try:
        return pd.to_datetime(val).to_datetime64()
    except Exception:
        return np.datetime64("NaT", "ns")

# Cycle through the yaml and load instrument data into a list of xarray datasets
# Enrich the netCDF with information from the yaml file
# Find the mooring's processed directory & read the yaml specification
name1 = moorlist[0]
proc_dir = output_path + name1
moor_yaml = proc_dir + '/' + name1 + '.mooring.yaml'
with open(moor_yaml, 'r') as f:
    moor_yaml_data = yaml.safe_load(f)

# For each instrument, load the raw netCDF files and add some metadata from the yaml
datasets = []
deploy_time  = read_yaml_time(moor_yaml_data, "deployment_time")
recover_time = read_yaml_time(moor_yaml_data, "recovery_time")
print(f"deploy time is {deploy_time}")

for i in moor_yaml_data['instruments']:
    fname = name1 + '_' + str(i['serial']) + '_raw.nc'
    rawfile = proc_dir + '/' + i['instrument'] + '/' + fname

    if os.path.exists(rawfile):
        ds1 = xr.open_dataset(rawfile)

        if 'InstrDepth' not in ds1.variables and 'depth' in i:
            ds1['InstrDepth'] = i['depth']
        if 'instrument' not in ds1.variables and 'instrument' in i:
            ds1['instrument'] = i['instrument']
        if 'serial_number' not in ds1.variables and 'serial' in i:
            ds1['serial_number'] = i['serial']
        if 'timeS' in ds1.variables:
            ds1 = ds1.drop_vars('timeS')

        # Apply the clock offset
        ds1['clock_offset'] = i.get('clock_offset', 0)
        ds1['clock_offset'].attrs['units'] = 's'
        clock_offset = ds1['clock_offset'].values
        ds1['time'] = ds1['time'] + np.timedelta64(int(ds1['clock_offset'].values), 's')

        if np.isfinite(deploy_time):
            ds1 = ds1.sel(time=slice(deploy_time, None))
        if np.isfinite(recover_time):
            ds1 = ds1.sel(time=slice(None, recover_time))

        start_time = ds1['time'].values.min()
        end_time = ds1['time'].values.max()
        print(f"Deploy time is {deploy_time}. Data starts at {start_time} and ends at {end_time}")
        #---------------------------------------------
        # Store the data in a list of datasets
        fname2 = fname.replace('_raw','_use')
        fileout = proc_dir + '/' + i['instrument'] + '/' + fname2
        print(f"Saving to {proc_dir} + {fname2}")

        if os.path.exists(fileout):
            os.remove(fileout)

        writer = NetCdfWriter(ds1)
        writer.write(
            fileout,
            optimize=True,
            drop_derived=False,  # drops vars with attrs["derived"] == True (e.g., z)
            uint8_vars=[
                "correlation_magnitude", "echo_intensity", "status", "percent_good",
                "bt_correlation", "bt_amplitude", "bt_percent_good",
            ],
            float32_vars=[  # optional explicit list; float32=True already covers floats generically
                "eastward_velocity", "northward_velocity", "upward_velocity",
                "temperature", "salinity", "pressure", "pressure_std", "depth", "bt_velocity",
            ],
            chunk_time=3600,  # 1-hour chunks if you have ~1 Hz ensembles; adjust as needed
            complevel=5,
            quantize=3,
        )





In [None]:
import pandas as pd
val = '2018-08-12T19:53:00'
f = pd.to_datetime(val).to_datetime64()
print(f)

In [None]:
if 0:
    fname = 'dsC_1_2018_7516_use.nc'
    instr = 'microcat'
    filein = proc_dir + '/' + instr + '/' + fname
    ds2 = xr.open_dataset(filein)

    fname = 'dsC_1_2018_7516_raw.nc'
    instr = 'microcat'
    filein = proc_dir + '/' + instr + '/' + fname
    ds1 = xr.open_dataset(filein)