# 1 - Adjust base data

NetCDF data has already been combined and mapped to consistent depths. This Notebook will import data, truncate to desired depth range, check for data gaps and segment the dataset accordingly. Finally, data will be rotated according to site, small gaps interpolated, and filtered to acquire mean currents and residual flows. Output in NetCDF format for further processing.

## Imports

Import modules and data.

In [None]:
# import modules

import xarray as xr
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as pldates
import numpy as np
import scipy.signal as sig
import pandas as pd
for i in range(2):
    %matplotlib notebook

In [None]:
# import 'raw' data

adcp = 'Shoulder'     # Slope, Axis75, Axis55, Shoulder
season = 'Annual'  # Annual, Winter, Spring, Summer, Fall

if adcp == 'Slope':
    ds = xr.open_dataset(f'../Data/{adcp}All/{adcp}_all.nc')
    name = 'Slope'
elif adcp == 'Axis75':
    ds = xr.open_dataset(f'../Data/AxisAll/{adcp}/{adcp}_all.nc')
    name = 'Axis'   # for consistent naming between Axis 75 and 55 kHz
elif adcp == 'Axis55':
    ds = xr.open_dataset(f'../Data/AxisAll/{adcp}/{adcp}_all.nc')
    name = 'Axis'   # for consistent naming between Axis 75 and 55 kHz      

elif adcp == 'Shoulder':
    year = 2013
    adcp2 = 'Axis75'         # Slope, Axis75, Axis55 
    if adcp2 == 'Slope':
        name = 'Slope (1-min)'   # for consistent naming
    elif adcp2 == 'Axis75' or adcp2 == 'Axis55':
        name = 'Axis (1-min)'
    ds = xr.open_mfdataset(f'../Data/QA/1min/{adcp2}_{year}/*.nc',join='override')   

print(ds)

In [None]:
# plot dataset to check data quality

# fig,ax = plt.subplots(figsize=(13,5))

# if adcp2 == 'Axis75':
#     im = ax.pcolormesh(ds.time, -ds.depth, ds.u[0,0,:,:].T, rasterized=True, cmap='RdBu_r', vmin=-0.15, vmax=0.15, shading='auto')
# else:
#     im = ax.pcolormesh(ds.time, -ds.depth, ds.u.T, rasterized=True, cmap='RdBu_r', vmin=-0.15, vmax=0.15, shading='auto')
# cbar = fig.colorbar(im, ax=ax, fraction=0.05, pad=0.01, aspect=40, extend='both')
# cbar.set_label('Velocity [m/s]')
# ax.set_xlabel('Time')
# ax.set_ylabel('Depth [m]')
# ax.set_title('Velocity data')
# date_form = pldates.DateFormatter("%m")
# ax.xaxis.set_major_formatter(date_form)
# #ax.xaxis.set_major_locator(plt.MaxNLocator(6))

# plt.show()

## Depth

Truncate data to a specific depth interval to eliminate unreliable data, based on data quality checks regarding correlation and backscatter intensity.

In [None]:
# process to find nearby indices for desired depth values

def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx      # returns index of nearest depth value

array = ds.depth         # input array to process 
if adcp=='Slope':
    upval = 0         # upper depth for data, metres (upper slope)
    lowval = 400        # lower depth for data (upper slope)
elif adcp=='Axis75':
    upval = 0         # upper depth for data, metres (axis)
    lowval = 1000        # lower depth for data (axis)
elif adcp=='Axis55':
    upval = 0            # upper depth for data, metres (axis)
    lowval = 1000        # lower depth for data (axis)

if adcp2=='Slope':
    upval = 0            # upper depth for data, metres (axis)
    lowval = 400        # lower depth for data (axis)
elif adcp2=='Axis75':
    upval = 580
    lowval = 1000
elif adcp2=='Axis55':
    upval = 10
    lowval = 1000
    
upidx = find_nearest(array, upval)                 # index of upper depth cutoff
lowidx = find_nearest(array,lowval)                # index of lower depth cutoff 

print("Index at upper depth cutoff:",upidx,"/ Value at upper depth cutoff:",-ds.depth.values[upidx],"metres")
print("Index at lower depth cutoff:",lowidx,"/ Value at lower depth cutoff:",-ds.depth.values[lowidx],"metres")

if adcp2=='Slope' or adcp2=='Axis75' or adcp2=='Axis55':
    depth = np.array(ds.depth[lowidx:upidx+1])         # remove unwanted depths
    dup_stamp = int(-depth[-1])                         # depth stamps for use in output filenames
    dlow_stamp = int(-depth[0])
    print("Length of new depth array: ",len(depth),'/ Upper limit:',dup_stamp,'metres','/ Lower limit:',dlow_stamp,'metres')   # new depth interval
else:
    depth = np.array(ds.depth[upidx:lowidx+1])         # remove unwanted depths
    dup_stamp = int(-depth[0])                         # depth stamps for use in output filenames
    dlow_stamp = int(-depth[-1])
    print("Length of new depth array: ",len(depth),'/ Upper limit:',dup_stamp,'metres','/ Lower limit:',dlow_stamp,'metres')   # new depth interval

## Time

Find specific time range and format dates. If significant *consecutive* NaN values are present, then split time series into segments for analysis processing.

In [None]:
# set date range

year = 2013

datestimes = pd.to_datetime(ds.time.values)                          # convert to datetime from datetime64
datestimes = pd.Series(datestimes)                                   # convert to pandas dataframe

start_date = dt.datetime(year,1,1)                                   # input start date in YYYY,MM,DD
end_date = dt.datetime(year+1,1,1)                                   # input end date in YYYY,MM,DD
start = datestimes[datestimes >= start_date].index[0]                # desired start date
end = datestimes[datestimes < end_date].index[-1]                    # desired end date
time_test = ds.time.values[start:end]                                # test desired interval
t_stamp = f'{datestimes.dt.year[start]}'                             # set year time stamp for output filenames
print("Desired time range:",np.min(time_test),np.max(time_test))     # print to check desired interval

In [None]:
# check time series for significant data gaps

if adcp == 'Slope' or adcp2 == 'Slope':
    depth_test = ds.depth[lowidx-15]        # mid depth for NaN testing
elif adcp2 == 'Axis55':
    depth_test = ds.depth[10]
else:
    depth_test = ds.depth[15]
print('Checked at depth',-depth_test.values,'m')

if adcp2 == 'Axis75':
    u_test = np.array(ds.u[0,0,start:end,15])            # u data at this depth
    u_test[187000:194000] = np.nan
elif adcp2 == 'Axis55' and year == 2017:
    u_test = np.array(ds.u[start:end,10])
elif adcp2 == 'Axis55' and year == 2018:
    u_test = np.array(ds.u[0,0,start:end,10])
else:
    u_test = np.array(ds.u[start:end,lowidx-15])
counter = 0                                            # counter to keep track of # of consecutive NaN values
nan_list = [0]                                         # empty list to keep track of NaN indices

if adcp == 'Shoulder':
    limit=1500
else:
    limit=100

for i in range(len(time_test)):                        # loop to count consecutive NaN values
    if np.isnan(u_test[i])==True:                      # add to counter if NaN = true
        counter += 1
        if counter==limit:                               # length of NaN gap
            dead = i-limit                               # datetime series hits significant NaN interval
            nan_list.append(dead)                      # append end of good data index to nan_list
            print('Good data until:',np.max(time_test[dead+1]))
        elif counter > limit and i < (len(time_test)-1):   # if NaNs, but before the end of the interval
            if np.isnan(u_test[i+1])==False:           # if next value is NOT a NaN
                nan_list.append(i+1)                   # append that index to nan_list as start of good data
                print('Good data resets at:',time_test[i])
    elif np.isnan(u_test[i])==False:                   # reset counter if NaN inconsistent
        counter = 0

if (len(nan_list)%2) != 0:
    nan_list.append(len(time_test))

if adcp2 == 'Slope':
    u_test = np.array(ds.u[start:end,lowidx:upidx+1])      # total u for interval and depth, to chop
    v_test = np.array(ds.v[start:end,lowidx:upidx+1])      # total v for interval and depth, to chop
elif adcp2 == 'Axis55':
    u_test = np.array(ds.u[0,0,start:end,lowidx:upidx+1])      # total u for interval and depth, to chop
    v_test = np.array(ds.v[0,0,start:end,lowidx:upidx+1])      # total v for interval and depth, to chop
elif adcp2 == 'Axis75':
    u_test = np.array(ds.u[0,0,start:end,lowidx:upidx+1])      # total u for interval and depth, to chop
    v_test = np.array(ds.v[0,0,start:end,lowidx:upidx+1])      # total v for interval and depth, to chop
else:
    u_test = np.array(ds.u[start:end,:])      # total u for interval and depth, to chop
    v_test = np.array(ds.v[start:end,:])      # total v for interval and depth, to chop
    
u_seg = []                                             # empty list for u segment arrays
v_seg = []                                             # empty list for v segment arrays
t_seg = []                                             # empty list for u segment time intervals

for i in range(len(nan_list)):                         # for the number of NaN indices found
    if i%2 != 0:                                       # for every second index
        u_seg.append(np.array(u_test[nan_list[i-1]:nan_list[i],:]))      # create arrays of good u data
        v_seg.append(np.array(v_test[nan_list[i-1]:nan_list[i],:]))      # create arrays of good v data
        t_seg.append(np.array(time_test[nan_list[i-1]:nan_list[i]]))     # and their time intervals

n_seg = len(u_seg)
print('Number of segments:',n_seg)
for i in range(n_seg):
    print(f'Length of segment {i}:',len(t_seg[i]))

In [None]:
# plot specified time intervals to check data quality

# fig,ax = plt.subplots(figsize=(13,5))
# for i in range(n_seg):
#     im = ax.pcolormesh(t_seg[i], -depth, u_seg[i].T, rasterized=True, cmap='RdBu_r', vmin=-0.15, vmax=0.15, shading='auto')
# cbar = fig.colorbar(im, ax=ax, fraction=0.05, pad=0.01, aspect=40, extend='both')
# ax.set_xlim(start_date,end_date)
# cbar.set_label('Velocity [m/s]')
# ax.set_xlabel('Time')
# ax.set_ylabel('Depth [m]')
# ax.set_title('Velocity data')
# plt.show()

## Adjust
Data rotated based on a visual estimate of along-slope angle, as 30$^{\circ}$, for Upper Slope. Axis rotation based on lower canyon topography, as %%%. Data are then interpolated to deal with minor instances of NaN values, and filtered using a 40h low-pass Butterworth digital filter to extract the mean currents and residual (tidal) flows.

In [None]:
# process to rotate, interpolate, and filter raw data (no significant data gaps)

uorig,ulp,vorig,vlp = [],[],[],[]   # empty lists for adjusted data

for i in range(n_seg):

    # rotate Upper Slope data (y is already fairly along-canyon for Axis)
    
    if adcp == 'Slope' or adcp2 == 'Slope':
        theta_along_slope = np.radians(-30)         # rotation angle in radians, -30 degrees to rotate AXES not velocities
        u_old = u_seg[i] 
        v_old = v_seg[i]                                         
        u_rot = u_old*np.cos(theta_along_slope) - v_old*np.sin(theta_along_slope)               # u_new = Re(rotated vector)
        v_rot = u_old*np.sin(theta_along_slope) + v_old*np.cos(theta_along_slope)               # v_new = Im(rotated vector)     
    elif adcp == 'Axis75' or adcp == 'Axis55':
        u_rot = u_seg[i]
        v_rot = v_seg[i]
    elif adcp2 == 'Axis75' or adcp2 == 'Axis55':
        u_rot = u_seg[i]
        v_rot = v_seg[i]

    # filter small NaN instances from data

    t = len(t_seg[i])                        # number of time data points after checking for consistent NaN intervals
    d = len(depth)                           # number of depth data points after removing unwanted depths

    uorig_temp = np.empty([t,d])             # empty array for interpolated & rotated u data
    vorig_temp = np.empty([t,d])             # empty array for interpolated & rotated v data

    if adcp == 'Shoulder':
        limit2 = 1500
    else:
        limit2 = 100
        
    for j in range(d):                       # loop to interpolate small gaps at each depth
        utemp = pd.Series(u_rot[:,j])
        uint = utemp.interpolate(method="linear", limit = limit2, limit_direction='both')
        uorig_temp[:,j] = uint               # set interpolated data to original array
        vtemp = pd.Series(v_rot[:,j])
        vint = vtemp.interpolate(method="linear", limit = limit2, limit_direction='both')
        vorig_temp[:,j] = vint               # set interpolated data to original array

    # low pass Butterworth filter for 40 hour cut-off to remove 30 hour tides

    if adcp == 'Shoulder':
        fs1 = 1.667e-2
        order = 8
        btype1 = 'bandpass'
        fc = [1e-4,1e-3]
    else:
        fs1 = 1.111e-3                            # samples per SECOND for entire time series
        order = 8
        fc = 6.944e-6
        btype1 = 'lowpass'
    
    sos = sig.butter(N=order, Wn=fc,btype=btype1, fs=fs1, output='sos')   # digital butterworth filter
    w, h = sig.sosfreqz(sos)                   # to plot filter response

    ulp_temp = np.empty([t,d])               # empty array for low-pass filtered u values
    vlp_temp = np.empty([t,d])               # empty array for low-pass filtered v values

    for j in range(d):                       # loop for filtered and residual velocities
        ufilt = np.copy(uorig_temp[:,j])     # copy unfiltered array
        ulp_temp[:,j] = sig.sosfiltfilt(sos, ufilt)  # apply low pass filter

        vfilt = np.copy(vorig_temp[:,j])             # repeat for v
        vlp_temp[:,j] = sig.sosfiltfilt(sos, vfilt)
        
    uorig.append(uorig_temp)                  # append cleaned data to lists
    vorig.append(vorig_temp)
    ulp.append(ulp_temp)
    vlp.append(vlp_temp)
    
# if adcp == 'Shoulder' and adcp2 == 'Axis75' and year == 2013:
#     uorig[0][-5000:,:] = np.nan
#     vorig[0][-5000:,:] = np.nan
#     uorig[1][165000:171000,:] = np.nan
#     vorig[1][165000:171000,:] = np.nan
# if adcp == 'Shoulder' and adcp2 == 'Axis55' and year == 2017:
#     uorig[0][165000:171000,:] = np.nan
#     vorig[0][165000:171000,:] = np.nan

In [None]:
# plot filter response

#plt.semilogx(w*fs/(2*np.pi),abs(h))
#plt.semilogx(w/np.pi,abs(h))

In [None]:
# plot specified time intervals to check data quality

# fig,ax = plt.subplots(figsize=(13,5))
# for i in range(n_seg):
#     im = ax.pcolormesh(t_seg[i], -depth, uorig[i].T, rasterized=True, cmap='RdBu_r', vmin=-0.15, vmax=0.15, shading='auto')
# cbar = fig.colorbar(im, ax=ax, fraction=0.05, pad=0.01, aspect=40, extend='both')
# ax.set_xlim(start_date,end_date)
# cbar.set_label('Velocity [m/s]')
# ax.set_xlabel('Time')
# ax.set_ylabel('Depth [m]')
# ax.set_title('Velocity data')
# date_form = pldates.DateFormatter("%m/%d")
# ax.xaxis.set_major_formatter(date_form)
# if adcp2 == 'Axis75' and t_stamp == '2013':
#     ax.set_xlim(np.datetime64(f'{t_stamp}-01-01'),np.datetime64(f'{t_stamp}-12-30'))
#     #ax.set_xlim(None,None)
# else:
#     ax.set_xlim(None,None)
# plt.show()

## Save
Save key values and arrays to NetCDF format using xarray.

In [None]:
# save to .nc files

for i in range(n_seg):
    ds_out = xr.Dataset( 
        data_vars=dict(
            uorig=(['t_seg','depth'], uorig[i]),    # adjusted data
            vorig=(['t_seg','depth'], vorig[i]),
            ulp=(['t_seg','depth'], ulp[i]),        # low-pass data
            vlp=(['t_seg','depth'], vlp[i]),
        ),
        coords=dict(
            depth=depth,                 # depth values
            t_seg=t_seg[i],              # datetime values in segments
        ),
        attrs=dict(
            description=f'Adjusted data for {adcp} {t_stamp} segment {i} (from 0 to {n_seg - 1}).',
            adcp=adcp,                   # adcp
            upidx=upidx,                 # upper depth index
            lowidx=lowidx,               # lower depth index
            dup_stamp=dup_stamp,         # upper depth stamp
            dlow_stamp=dlow_stamp,       # lower depth stamp
            t_stamp=t_stamp,             # year stamp
            start_date=f'{start_date}',  # start date
            end_date=f'{end_date}',      # end date
            t=t,                         # length of time series
            d=d,                         # length of depth series
            n_seg=n_seg,                 # total number of segments
        ),
    ) 
    if adcp == 'Shoulder':
        ds_out.to_netcdf(f'../Data/data/adj/adj_{adcp2}_1min_{t_stamp}_{i}.nc')
    else:
        ds_out.to_netcdf(f'../Data/data/adj/adj_{adcp}_{t_stamp}_{i}.nc')