# IMPORT PACKAGES

In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import vector_tools as vt
import khFunctions as khf
from datetime import timedelta
import re

# IMPORT AND CLEAN DATA

In [2]:
#Use .dat, .vhd. and .sen files to generate a dataset that matches format used by 
#Wheeler and Giddings 2023 to be usable in their functions
datfile1 = 'ADV/DEP204.dat'
vhdfile1 = 'ADV/DEP204.vhd'
senfile1 = 'ADV/DEP204.sen'

datfile2 = 'ADV/DEP205.dat'
vhdfile2 = 'ADV/DEP205.vhd'
senfile2 = 'ADV/DEP205.sen'

fs = 32

#Create the raw dataset 
adv1 = vt.vector_to_ds(datfile1, vhdfile1, senfile1, fs)

#Flag the raw dataset
adv1_flagged = vt.vectorFlag(adv1)

#Trim the bad times during adv deployment and recovery
adv1_cleaned = adv1_flagged.sel(dict(time=slice('2022-08-02T11:00:00.000000000', '2022-08-11T11:00:00.000000000'),
                                     time_sen=slice('2022-08-02T11:00:00.000000000', '2022-08-11T11:00:00.000000000'),
                                     time_start=slice('2022-08-02T11:00:00.000000000', '2022-08-11T11:00:00.000000000')))

#Repeat with second deployment
adv2 = vt.vector_to_ds(datfile2, vhdfile2, senfile2, fs)
adv2_flagged = vt.vectorFlag(adv2)
adv2_cleaned = adv2_flagged.sel(dict(time=slice('2022-08-15T09:00:00.000000000', '2022-08-30T11:00:00.000000000'),
                                     time_sen=slice('2022-08-15T09:00:00.000000000', '2022-08-30T11:00:00.000000000'),
                                     time_start=slice('2022-08-15T09:00:00.000000000', '2022-08-30T11:00:00.000000000')))

Importing data
Creating timelines
Creating xarray dataset
Assigning dataset attributes
Flagging data
Flagging sensor data
Importing data
Creating timelines
Creating xarray dataset
Assigning dataset attributes
Flagging data
Flagging sensor data


In [102]:
#Add in metadata from .hdr file and project information
ds1 = adv1_cleaned.copy(deep=True)
headerFile = 'ADV/DEP204.hdr'
ds.attrs['description'] = 'First deployment of ADV in Stillwater Cove'

#For second deployment
#ds2 = adv2_cleaned.copy(deep=True)
#headerFile = 'ADV/DEP205.hdr'
#ds.attrs['description'] = 'Second deployment of ADV in Stillwater Cove'

ds.attrs['Creator'] = 'Logan Grady'
ds.attrs['Contact information'] = 'loganagrady@gmail.com'

headerlines = []

#Runs through all lines in the file and splits them by [attribute name, attribute value]
with open(headerFile) as f: 
    for i in f.readlines():
        line = i
        line = line.strip('\n')
        line = line.replace('(','')
        line = line.replace(')','')
        line = line.replace('Gyro/Accel', 'Gyro')
        line = re.split(r'\s{2,}', line)
        
        
        #Keep relevant vars, drop empty spaces
        if len(line) >= 2:
            headerlines.append(line)
        else:
            continue
for i in headerlines[:36]:
    #Use try and except statement to make some metadata sections an int data type
    #but switch it to a string if the section has letters present
    try:
        ds.attrs[str(i[0])] = int(i[1])
    except:
        ds.attrs[str(i[0])] = str(i[1])
ds

In [9]:
#Export usable data
ds.to_netcdf('ADV/adv1_all.nc')
ds2.to_netcdf('ADV/adv2_all.nc')

In [2]:
#Import usable data
adv1 = xr.open_dataset('ADV/adv1_all.nc')
adv2 = xr.open_dataset('ADV/adv2_all.nc')

# DESPIKING DATA

In [None]:
#Despike adv data using the expanding ellipsoid method from Wheeler & Giddings 2023

#Conditions
badSections = [] #Bad sectiions already removed during data import
reverse = False #Set variable for reversing direction after rotation if needed

#Run expanding threshold despiking algorith and clean data with bad SNR/Correlation
adv1Despiked = vt.ProcessVec(adv1,badSections,reverse)
adv2Despiked = vt.ProcessVec(adv2,badSections,reverse)

#'ProcessVec' function from Wheeler & Giddings has been modified to work with my dataset
#but the core algorithm remains unchanged

In [37]:
#Export despiked data
adv1Despiked.to_netcdf('ADV/adv1_despiked.nc')
adv2Despiked.to_netcdf('ADV/adv2_despiked.nc')

# Repairing data gaps
- Three methods
    - Full linear interpolation across all gaps
    - Partial interpolation over gaps <= 1s and averaging longer gaps with the average of the removed data
    - Partial interpolation over gaps <= 1s and patching longer gaps

In [4]:
#Import despiked data
adv1Despiked = xr.open_dataset('ADV/adv1_despiked.nc')
adv2Despiked = xr.open_dataset('ADV/adv2_despiked.nc')

In [22]:
#Full linear interpolation of gaps
adv1Int = vt.fullInterpVec(adv1Despiked)
adv2Int = vt.fullInterpVec(adv2Despiked)

Linearly interpolating dataset
Evaluating ratio of nans leftover in the dataset
Linearly interpolating dataset
Evaluating ratio of nans leftover in the dataset


In [8]:
#Inerpolate gaps <= 1s and and patch the remaining gaps
adv1Patch = vt.patchVec(adv1Despiked)
adv2Patch = vt.patchVec(adv2Despiked)

Interpolating gaps <= 1s
Evaluating ratio of nans leftover in the dataset
Interpolating gaps <= 1s
Evaluating ratio of nans leftover in the dataset


In [None]:
#Interpolate gaps <= 1s and average the remaining gaps
adv1IntAvg = vt.interpAvgVec(adv1Despiked)
adv2IntAvg = vt.interpAvgVec(adv2Despiked)

# Wavenumber integral Jlm
- Takes equation A13 from Gerbi et al. (2009) and applies it to the despiked and repaired datasets

In [None]:
#Calculate the wavenumber integral for all datasets
Jlm1Int = vt.JlmIntegral(adv1Int)
Jlm2Int = vt.JlmIntegral(adv2Int)
Jlm1Patch = vt.JlmIntegral(adv1Patch)
Jlm2Patch = vt.JlmIntegral(adv2Patch)
Jlm1IntAvg = vt.JlmIntegral(adv1IntAvg)
Jlm2IntAvg = vt.JlmIntegral(adv2IntAvg)

In [2]:
#Add Jlm to corresponding datasets

#Import deployment 1 data
adv1Int['J33'] = Jlm1Int['J_33']
adv1Patch['J33'] = Jlm1Patch['J_33']
adv1IntAvg['J33'] = Jlm1IntAvg['J_33']

#Import deployment 2 data
adv2Int['J33'] = Jlm1Int['J_33']
adv2Int['J33'] = Jlm2Patch['J_33']
adv2IntAvg['J33'] = Jlm2IntAvg['J_33']

In [4]:
#Export the Organized, cleaned, despiked, gap-repaired datasets with wavenumber integrals included
adv1Int.to_netcdf('ADV/adv1Int.nc')
adv2Int.to_netcdf('ADV/adv2Int.nc')

adv1Patch.to_netcdf('ADV/adv1Patch.nc')
adv2Patch.to_netcdf('ADV/adv2Patch.nc')

adv1IntAvg.to_netcdf('ADV/adv1IntAvg.nc')
adv2IntAvg.to_netcdf('ADV/adv2IntAvg.nc')

In [2]:
#Import deployment 1 data
adv1Int = xr.open_dataset('ADV/adv1_Interp.nc')
adv1Int['J33'] = (['burst'], pd.read_csv('ADV/Jlm1Int.csv').J_33)
adv1Patch = xr.open_dataset('ADV/adv1_Patched.nc')
adv1Patch['J33'] = (['burst'], pd.read_csv('ADV/Jlm1Patch.csv').J_33)
adv1IntAvg = xr.open_dataset('ADV/adv1_IntAverage.nc')
adv1IntAvg['J33'] = (['burst'], pd.read_csv('ADV/Jlm1IntAvg.csv').J_33)

#Import deployment 2 data
adv2Int = xr.open_dataset('ADV/adv2_Interp.nc')
adv2Int['J33'] = (['burst'], pd.read_csv('ADV/Jlm2Int.csv').J_33)
adv2Patch = xr.open_dataset('ADV/adv2_Patched.nc')
adv2Int['J33'] = (['burst'], pd.read_csv('ADV/Jlm2Patch.csv').J_33)
adv2IntAvg = xr.open_dataset('ADV/adv2_IntAverage.nc')
adv2IntAvg['J33'] = (['burst'], pd.read_csv('ADV/Jlm2IntAvg.csv').J_33)

## If phase wrapping is present
- Convert velocities from ENU to beam by using the transformation matrix in .hdr files
    - The following code is based on a MatLab script available on NORTEK's FAQ forums:
        https://support.nortekgroup.com/hc/en-us/articles/360029820971-How-is-a-coordinate-transformation-done-
        - Most relevant information is available in the .hdr file
        - You will need the transformation matrix, as well as heading, pitch, and roll data for each sample to make the conversions
    - Once velocities have been converted, calculate the ambiguous velocity V_amb
    - Run a patch over the entire BEAM velocity dataset
        - If phase wrap is negative: newvel = oldvel + 2*V_amb
        - If phase wrap is positive: newvel = oldvel - 2*V_amb
    -Convert patched velocities back to ENU for more user-friendly data

In [52]:
# Transformation matrix located in .hdr file as 'Transformation matrix'

T = np.array([[2.7249, -1.3770, -1.3503], #Convert matrix to multidimensional numpy array
   [-0.0161, 2.3442, -2.3308],
   [0.3472, 0.3455, 0.3389]])

# Heading, pitch and roll are the angles output in the data in degrees
# Convert to radians
hh = np.pi*(adv1_flagged['Heading']-90)/180 #Creates list of hh, pp, and rr for all datapoints
pp = np.pi * (adv1_flagged['Pitch']/180)
rr = np.pi * (adv1_flagged['Roll']/180)

# Generate empty arrays to be populated by resulting beam velocities
beam1 = np.empty(len(adv1_flagged)) # Already created to be the length of the dataset to save processing time
beam2 = np.empty(len(adv1_flagged))
beam3 = np.empty(len(adv1_flagged))

In [None]:
# Calculate heading matrix and tilt matrix for each data point and convert ENU velocities to beam velocities

# for loop iterates for each data point
for i in range(0,len(adv1_flagged)):
    if i % 1000000 == 0: # Progress check every 1000000 rows
                print('Currently on row:', i)
            
    H = np.array([[np.cos(hh[i]), np.sin(hh[i]), 0], # Makes the heading matrix for row i
                  [-np.sin(hh[i]), np.cos(hh[i]), 0], 
                  [0, 0, 1]])
    
    P = np.array([[np.cos(pp[i]), -np.sin(pp[i])*np.sin(rr[i]), -np.cos(rr[i])*np.sin(pp[i])], # Makes the tilt matrix for row i
                  [0, np.cos(rr[i]), -np.sin(rr[i])], 
                  [np.sin(pp[i]), np.sin(rr[i])*np.cos(pp[i]), np.cos(pp[i])*np.cos(rr[i])]])
    
    R = H*P*T #Product of transformation, heading, and tilt matrix creates conversion matrix R

    # Retrieves ENU velocities from row i to be converted by R[i]
    enu = np.array([adv1_flagged['Velocity_East(m/s)'][i], adv1_flagged['Velocity_North(m/s)'][i], adv1_flagged['Velocity_Up(m/s)'][i]])
    
    beam1[i] = np.dot(np.linalg.inv(R),enu)[0]
    beam2[i] = np.dot(np.linalg.inv(R),enu)[1]
    beam3[i] = np.dot(np.linalg.inv(R),enu)[2]

### Calculate v_amb
 - v_amb = VR * 2
 - VR = c/(4 * f * tlag(s))
 - c = 1530 (speed of sound measured by instrument)
 - f = instrument frequency (6000kHz)
 - tlag (for vector) = 50/480000 (50 is from system 38 in .hdr file, which indicates nominal velocity of 1m/s)