## Demo: Stage1 processing for mooring data

Read the original raw files and convert to netCDF.  None to minimal additional processing.

In [None]:
import os

import yaml
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from datetime import datetime

from ctd_tools.readers import SbeCnvReader, RbrRskAutoReader, RbrMatlabReader, NortekAsciiReader, SbeAsciiReader, RbrAsciiReader, AdcpMatlabReader
from ctd_tools.writers import NetCdfWriter
from ctd_tools.plotters import TimeSeriesPlotter

moorlist = ['ds2_X_2012','ds2_X_2017','ds2_X_2018',
            'ds8_1_2012','ds9_1_2012','ds10_1_2012', 'ds11_1_2012','ds12_1_2012',
            'ds13_1_2012','ds14_1_2012','ds15_1_2012','ds16_1_2012','ds17_1_2012',
            'ds19_1_2012','ds18_1_2012','ds28_1_2017',
            'dsA_1_2018','dsB_1_2018','dsC_1_2018', 'dsD_1_2018','dsE_1_2018','dsF_1_2018',
            'dsM1_1_2017','dsM2_1_2017','dsM3_1_2017','dsM4_1_2017','dsM5_1_2017']
moorlist = ['dsA_1_2018','dsB_1_2018','dsC_1_2018', 'dsD_1_2018','dsE_1_2018','dsF_1_2018']
moorlist = ['dsE_1_2018']

In [None]:
basedir = '/Users/eddifying/Dropbox/data/ifmro_mixsed/ds_data_eleanor/'
def stage1_mooring(mooring_name, basedir=None, output_path=None):
    '''
    Process a single mooring's data.

    The default output_path will be a subdirectory from the basedir as:
    basedir + moor/proc/mooring_name/instrument/

    To override this, input a full path for the output_path, but note that
    from the output path, the files will arrive in:
    output_path + mooring_name/instrument/

    Performs basic loading from the original data file (as specified in
    the YAML "filename").  It will look for this file in:
    basedir + moor/raw/instrument/

    Outfiles will be named: mooringname_serial_raw.nc
    '''
    if output_path is None:
        output_path = basedir + 'moor/proc/' + mooring_name
    moor_yaml = output_path + '/' + mooring_name + '.mooring.yaml'

    # Set up log file
    log_time = datetime.now().strftime('%Y%m%dT%H')
    log_file = os.path.join(output_path, f"{mooring_name}_{log_time}_stage1.mooring.log")
    def log_print(*args, **kwargs):
        print(*args, **kwargs)
        with open(log_file, 'a') as lf:
            print(*args, **kwargs, file=lf)

    log_print(mooring_name)
    with open(moor_yaml, 'r') as f:
        yaml_data = yaml.safe_load(f)

    indir = basedir + yaml_data['directory']
    dir_file = yaml_data['directory']
    for i in yaml_data['instruments']:
        readflag = False
        if 'filename' in i:
            infile_for_log = dir_file + i['instrument'] +'/'+i['filename']
            infile = indir + i['instrument'] + '/' + i['filename']
            out_inst_dir = output_path + '/' + i['instrument'] + '/'
            if not os.path.exists(out_inst_dir):
                os.makedirs(out_inst_dir)
                log_print(f"Created directory: {out_inst_dir}")

            # Cycle through reader types. Note that the SBE reader adds extra variables. Delete these.
            try:
                remove_vars = []
                remove_coords = []
                if i['file_type'] == 'sbe-cnv':
                    reader = SbeCnvReader(infile)
                    readflag = True
                    remove_vars = ['potential_temperature', 'julian_days_offset', 'density']
                    remove_coords = ['depth', 'latitude', 'longitude']
                elif i['file_type'] == 'nortek-aqd':
                    header_path = indir + i['instrument'] + '/' + i['header']
                    reader = NortekAsciiReader(infile, header_file_path=header_path)
                    readflag = True
                elif i['file_type'] == 'sbe-asc':
                    reader = SbeAsciiReader(infile)
                    readflag = True
                    remove_vars = ['potential_temperature', 'julian_days_offset', 'density']
                    remove_coords = ['depth', 'latitude', 'longitude']
                elif i['file_type'] == 'rbr-rsk':
                    reader = RbrRskAutoReader(infile)
                    readflag = True
                elif i['file_type'] == 'rbr-matlab':
                    reader = RbrMatlabReader(infile)
                    readflag = True
                elif i['file_type'] == 'rbr-dat':
                    reader = RbrAsciiReader(infile)
                    readflag = True
                elif i['file_type'] == 'adcp-matlab':
                    reader = AdcpMatlabReader(infile)
                    readflag = True
            except AttributeError as e:
                log_print(f"EXCEPT: Error reading file {infile_for_log}: {e}")
                continue

            def find_known_tag(fname, tags=("_000", "_001", "_002")):
                fname = str(fname)
                tag = ''
                for t in tags:
                    if t in fname:
                        tag = t
                return tag

            if readflag:
                tag1 = find_known_tag(i['filename'])
                if i['file_type'] == 'adcp-matlab':
                    tag1 = tag1
                else:
                    tag1 = ''
                output_file = out_inst_dir + mooring_name + "_" + str(i['serial']) + tag1 + '_raw.nc'
                outfile_for_log = output_file.replace(basedir, '')
                if not os.path.exists(output_file):
                    log_print(f"-->   Processing {i['instrument']}: {infile_for_log}")
                    log_print(f"Creating output file: {outfile_for_log}")

                    # Read the data into an xarray dataset
                    dataset = reader.get_data()
                    if remove_vars:
                        for var in remove_vars:
                            if var in dataset.variables:
                                log_print(f"Removing variable: {var}")
                                dataset = dataset.drop_vars(var)
                    if remove_coords:
                        for coord in remove_coords:
                            if coord in dataset.coords:
                                log_print(f"Removing coordinate: {coord}")
                                dataset = dataset.drop_vars(coord)

                    # Get parameters from yaml
                    # Global attributes
                    dataset.attrs['mooring_name'] = yaml_data['name']
                    dataset.attrs['waterdepth'] = yaml_data['waterdepth']
                    dataset.attrs['longitude'] = yaml_data.get('longitude', 0.0)
                    dataset.attrs['latitude'] = yaml_data.get('latitude', 0.0)
                    dataset.attrs['deployment_latitude'] = yaml_data.get('deployment_latitude', '00 00.000 N')
                    dataset.attrs['deployment_longitude'] = yaml_data.get('deployment_longitude', '000 00.000 W')
                    dataset.attrs['deployment_time'] = yaml_data.get('deployment_time','YYYY-mm-ddTHH:MM:ss')
                    dataset.attrs['seabed_latitude'] = yaml_data.get('seabed_latitude', '00 00.000 N')
                    dataset.attrs['seabed_longitude'] = yaml_data.get('seabed_longitude', '000 00.000 W')
                    dataset.attrs['recovery_time'] = yaml_data.get('recovery_time', 'YYYY-mm-ddTHH:MM:ss')

                    dataset['serial_number'] = i.get('serial',0)
                    dataset['InstrDepth'] = i.get('depth',0)
                    dataset['instrument'] = i.get('instrument','Unknown')
                    dataset['clock_offset'] = i.get('clock_offset', 0)
                    dataset['clock_offset'].attrs['units'] = 's'
                    if 0:
                        # Applying clock offsets shouldn't really happen until *_use.nc is created
                        dataset['time'] = dataset['time'] + np.timedelta64(int(dataset['clock_offset'].values), 's')

                    dataset['start_time'] = i.get('start_time', dataset.attrs['deployment_time'])
                    dataset['end_time'] = i.get('end_time', dataset.attrs['recovery_time'])

                    writer = NetCdfWriter(dataset)
                    writer.write(
                        output_file,
                        optimize=True,
                        drop_derived=False,  # drops vars with attrs["derived"] == True (e.g., z)
                        uint8_vars=[
                            "correlation_magnitude", "echo_intensity", "status", "percent_good",
                            "bt_correlation", "bt_amplitude", "bt_percent_good",
                        ],
                        float32_vars=[  # optional explicit list; float32=True already covers floats generically
                            "eastward_velocity", "northward_velocity", "upward_velocity",
                            "temperature", "salinity", "pressure", "pressure_std", "depth", "bt_velocity",
                        ],
                        chunk_time=3600,  # 1-hour chunks if you have ~1 Hz ensembles; adjust as needed
                        complevel=5,
                        quantize=3,
                    )
                else:
                    # Delete output file
                    #os.remove(output_file)
                    log_print(f"OUTFILE EXISTS: Skipping   {outfile_for_log}.")
            else:
                log_print(f"READER: No valid readers:  {infile_for_log}.")
        else:
            fname_for_log2 = dir_file + i['instrument'] +'/'+str(i['serial'])
            log_print(f"FILENAME MISSING: Skipping {i['instrument']}:{fname_for_log2}.  YAML is missing 'filename'.")

In [None]:

basedir = '/Users/eddifying/Dropbox/data/ifmro_mixsed/ds_data_eleanor/'
output_path = basedir + 'moor/proc/'


for idx, name1 in enumerate(moorlist):
    stage1_mooring(name1, basedir=basedir)

In [None]:
#FileNotFoundError: [Errno 2] No such file or directory: '/Users/eddifying/Dropbox/data/ifmro_mixsed/ds_data_eleanor/moor/raw/msm76_2018/adcp/DSF18_RDI_000_14971_001.mat'

## Try a plotter on a raw file
rawdir = basedir + 'moor/raw/msm76_2018/'
instrument = 'aquadopp'
data_dir = rawdir + instrument
fname = 'DSC18_477102.dat'
filename = data_dir + '/' + fname
header_file = data_dir + '/' + fname[0:-4] + '.hdr'
print(header_file)
print(filename)

#output_path = basedir + 'moor/proc/dsC_1_2018/aquadopp/' + fname[0:-4] + '.nc'
reader = NortekAsciiReader(filename,header_file_path=header_file)
dataset = reader.get_data()
plotter = TimeSeriesPlotter(dataset)
plotter.plot(parameter_name='east_velocity')

dataset

In [None]:
basedir = '/Users/eddifying/Dropbox/data/ifmro_mixsed/ds_data_eleanor/'

## Try a plotter on a raw file
rawdir = basedir + 'moor/raw/msm76_2018/'
instrument = 'adcp'
data_dir = rawdir + instrument
fname = 'DS0218_RDI_000_24289.mat'
filename = data_dir + '/' + fname
print(filename)

#output_path = basedir + 'moor/proc/dsC_1_2018/aquadopp/' + fname[0:-4] + '.nc'
reader = AdcpMatlabReader(filename)
dataset = reader.get_data()
#plotter = TimeSeriesPlotter(dataset)
#plotter.plot(parameter_name='east_velocity')
plt.plot(dataset.pressure)
dataset

In [None]:
basedir = '/Users/eddifying/Dropbox/data/ifmro_mixsed/ds_data_eleanor/moor/'
procdir = 'proc/dsC_1_2018/'

instrument = 'aquadopp'
fname = 'DSC18_905701.nc'
fname2 = 'dsC_1_2018_9057_raw.nc'

ds1 = xr.open_dataset(basedir+procdir+instrument+'/'+fname)
ds2 = xr.open_dataset(basedir+procdir+instrument+'/'+fname2)


plt.plot(ds1.time, ds1.east_velocity, label='east_velocity')
plt.plot(ds2.time, ds2.east_velocity, ':r',label='east_velocity2')

In [None]:
ds2

In [None]:

filenames=['dsM4_1_2017_7513_raw.nc','dsM4_1_2017_7517_raw.nc']
# Now try loading all netcdf files in the proc directory
proc_dir = basedir + 'moor/proc/dsM4_1_2017/microcat/'
for file in os.listdir(proc_dir):
    if file.endswith('.nc'):
        file_path = os.path.join(proc_dir, file)
        print(f"Loading netCDF file: {file_path}")
        dataset = xr.open_dataset(file_path)
        # You can add more processing or analysis here
        print(list(dataset.data_vars))  # Print variable names in the dataset
        print(list(dataset.attrs))

        if 'temperature' in dataset.data_vars:
            fig, ax = plt.subplots(figsize=(8, 2))
            ax.plot(dataset['time'], dataset['temperature'], label='Temperature')
            ax.set_ylabel('Temperature (°C)')
            ax.set_title(dataset.attrs['mooring_name'] + ': ' + dataset['instrument'].values + ' ' + str(dataset['serial_number'].values))
            ax.set_xlabel('Time')
            ax.legend()
            ax.grid()

# Save the plot to a png
            fig.savefig(f"{dataset.attrs['mooring_name']}_{dataset['instrument'].values}_T.png")
            plt.tight_layout()
            plt.show()
