## Download and clean up near-real-time Wave Glider data

* read in data
* sort time and remove redundant records
* add lat/lon to ADCP files
* add depth vector to ADCP files
* save cleaned up ADCP files

first cut by Tom, 10/18/2021  
Updated for IOP1, 10/9/2022

In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import cftime
import requests
import cartopy.crs as ccrs                   # import projections
import cartopy
import gsw
import functions  # requires functions.py from this directory

In [2]:
# %matplotlib inline
%matplotlib qt5
plt.rcParams['figure.figsize'] = (7,4)
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 400
plt.close('all')

__figdir__ = '../plots/' 
savefig_args = {'bbox_inches':'tight', 'pad_inches':0.2}
plotfiletype='png'

In [3]:
savefig = True
zoom = True
if zoom:
    xmin, xmax = (-127,-121)
    ymin, ymax = (36.25,38.5)
    levels = np.linspace(14,17,21)-2.5
else:
    xmin, xmax = (-127,-121)
    ymin, ymax = (35, 41)
    levels = np.linspace(13,18,11)

    

Payload 2 Table 1 has met, ctd variables  
Payload 2 Table 2 has RDI variables

In [4]:
# List of WGs
input_list = ['WHOI-ASL22','WHOI-ASL32','SV3-1043','STOKES', 'PLANCK', 'PASCAL', 'KELVIN', 'CARSON']
url_prefix = 'http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/'
tab1_postfix = '_PLD2_TAB1.nc'
tab2_postfix = '_PLD2_TAB2.nc'
position_postfix = '_position.nc'
WG_list = ['WHOI22','WHOI32','WHOI43','STOKES', 'PLANCK', 'PASCAL', 'KELVIN', 'CARSON']
outpath='../data/raw/WG_NRT/'

In [5]:
# For some reason, reading the files over the internet directly is not working well
# Download instead

n=0
file_list1 = []
file_list2 = []
file_list3 = []
for WG in WG_list:
    input_WG=input_list[n]
    outfile1 = outpath+input_WG+tab1_postfix
    outfile2 = outpath+input_WG+tab2_postfix
    outfile3 = outpath+input_WG+position_postfix
    # Read and save table 1 files
    url1 = url_prefix+input_WG+tab1_postfix
    file_data = requests.get(url1).content
    # create the file in write binary mode, because the data we get from net is in binary
    with open(outfile1, "wb") as file:
        file.write(file_data)
    # Read and save table 2 files
    url2 = url_prefix+input_WG+tab2_postfix
    file_data = requests.get(url2).content
    # create the file in write binary mode, because the data we get from net is in binary
    with open(outfile2, "wb") as file:
        file.write(file_data)
    # Read and save position files
    url3 = url_prefix+input_WG+position_postfix
    file_data = requests.get(url3).content
    # create the file in write binary mode, because the data we get from net is in binary
    with open(outfile3, "wb") as file:
        file.write(file_data)
    n=n+1
    print(url3)
    file_list1.append(outfile1)
    file_list2.append(outfile2)
    file_list3.append(outfile3)


http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/WHOI-ASL22_position.nc
http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/WHOI-ASL32_position.nc
http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/SV3-1043_position.nc
http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/STOKES_position.nc
http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/PLANCK_position.nc
http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/PASCAL_position.nc
http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/KELVIN_position.nc
http://smode.whoi.edu:8080/thredds/fileServer/IOP1_2022/waveglider/CARSON_position.nc


In [6]:
file_list2

['../data/raw/WG_NRT/WHOI-ASL22_PLD2_TAB2.nc',
 '../data/raw/WG_NRT/WHOI-ASL32_PLD2_TAB2.nc',
 '../data/raw/WG_NRT/SV3-1043_PLD2_TAB2.nc',
 '../data/raw/WG_NRT/STOKES_PLD2_TAB2.nc',
 '../data/raw/WG_NRT/PLANCK_PLD2_TAB2.nc',
 '../data/raw/WG_NRT/PASCAL_PLD2_TAB2.nc',
 '../data/raw/WG_NRT/KELVIN_PLD2_TAB2.nc',
 '../data/raw/WG_NRT/CARSON_PLD2_TAB2.nc']

In [7]:
def fix_ds_time(ds):
    '''
    Drop nonunique values in realtime data files and sort time.
    
    Input: ds, xarray dataset
    Output: ds, xarray dataset
    '''
    t, ind = np.unique(ds.time, return_index=True)
    ds2 = ds.isel(time=ind,drop=True)
    return ds2

In [8]:
n

8

In [9]:
# Read in files (Payload 2 Tables 1 and 2; position) from all WG
n=0
for WG in WG_list:
    input_WG=input_list[n]
    file1 = file_list1[n]
    file2 = file_list2[n]
    file3 = file_list3[n]
    varstr = 'met_'+WG
    ds_met_temp=xr.open_dataset(file1,decode_times=True)
    locals()[varstr]=fix_ds_time(ds_met_temp) #Drop nonunique values and sort time
    varstr = 'adcp_'+WG
    !ncrename -v z,z_matrix $file2 #renaming variable z to prevent dimension/variable name conflict in xarray, requires nco in linux
    ds_adcp_temp=xr.open_dataset(file2,decode_times=True)
    locals()[varstr]=fix_ds_time(ds_adcp_temp) #Drop nonunique values and sort time
    varstr = 'pos_'+WG
    ds_pos_temp=xr.open_dataset(file3,decode_times=True)
    locals()[varstr]=fix_ds_time(ds_pos_temp) #Drop nonunique values and sort time
    n=n+1
    print(file1)

../data/raw/WG_NRT/WHOI-ASL22_PLD2_TAB1.nc
../data/raw/WG_NRT/WHOI-ASL32_PLD2_TAB1.nc
../data/raw/WG_NRT/SV3-1043_PLD2_TAB1.nc
../data/raw/WG_NRT/STOKES_PLD2_TAB1.nc
../data/raw/WG_NRT/PLANCK_PLD2_TAB1.nc
../data/raw/WG_NRT/PASCAL_PLD2_TAB1.nc
../data/raw/WG_NRT/KELVIN_PLD2_TAB1.nc
../data/raw/WG_NRT/CARSON_PLD2_TAB1.nc


In [10]:
eval('met_'+WG)

In [11]:
# Write WHOI22 met record to file
# met_WHOI22.to_netcdf('../data/raw/WG_NRT/WHOI22_met.nc')


In [12]:
# Now we can access these in a loop using syntax like:
# eval('adcp_'+WG_list[7])

In [13]:
eval('met_'+WG_list[0])

In [14]:
#Compute density from T and cond
p = 1
for WG in WG_list:
    ds = eval('met_'+WG)
    ds['uctd_psu_Avg']=gsw.conversions.SP_from_C(10*ds.uctd_cond_Avg, ds.uctd_temp_Avg, p)
    SA = gsw.conversions.SA_from_SP(ds.uctd_psu_Avg, 1,ds.longitude_1hz_Avg, ds.latitude_1hz_Avg)
    CT = gsw.conversions.CT_from_t(SA, ds.uctd_temp_Avg, p)
    ds['uctd_sigma0_Avg'] = gsw.density.sigma0(SA, CT)
    varstr = 'met_'+WG
    locals()[varstr]= ds

OK, now let's look at RDI files (Table 2)

OK, we have 15 minute files from the ADCP and 5 minute from the position files.  Interpolate the position files to the ADCP times.  That should be easy using xarray interp package, following:  
https://docs.xarray.dev/en/stable/user-guide/interpolation.htmlhttps://docs.xarray.dev/en/stable/user-guide/interpolation.html  

```
new_lon = -126.1
new_lat = 37.1
new_time = ds.time[-3]
dsi = ds.interp(time=new_time,latitude=new_lat, longitude=new_lon)
```

```
new_time = ds_adcp.time
ds_pos_i = ds_pos.interp(time=new_time)
```

In [15]:
# Interpolate each WG's position to ADCP time and add to ADCP file
for WG in WG_list:
    ds_adcp = eval('adcp_'+WG)
    ds_pos = eval('pos_'+WG)
    ds_pos_i = ds_pos.interp(time=ds_adcp.time)
    ds_adcp['Longitude']=ds_pos_i.Longitude
    ds_adcp['Latitude']=ds_pos_i.Latitude
    varstr = 'adcp_'+WG
    locals()[varstr]= ds_adcp
    del ds_adcp

  imin = index.get_loc(minval, method="nearest")
  imax = index.get_loc(maxval, method="nearest")


OK, that's very cool!  I have all the files cleaned up and have added the lat/lon.  Let's save the cleaned up files for met and adcp.  First, add z for adcp files.

In [16]:
for WG in WG_list:
    fout = outpath + 'adcp_'+WG + '.nc'
    ds_adcp = eval('adcp_'+WG)
    ind=np.flatnonzero(np.isnan(ds_adcp.z_matrix[1][:])==False)
    if WG=='PASCAL': #Special case because PASCAL has 600 kHz RDI
        depth = ds_adcp.z_matrix[:,ind[0]]/2
    else:
        depth = ds_adcp.z_matrix[:,ind[0]]
    ds_adcp['depth'] = depth
    ds_adcp.to_netcdf(fout)



In [17]:
%whos

Variable           Type              Data/Info
----------------------------------------------
CT                 DataArray         <xarray.DataArray 'uctd_p<...>0 ... 2022-10-26T23:30:08
SA                 DataArray         <xarray.DataArray 'uctd_p<...>0 ... 2022-10-26T23:30:08
WG                 str               CARSON
WG_list            list              n=8
adcp_CARSON        Dataset           <xarray.Dataset>\nDimensi<...>1 (Homepage = http://n...
adcp_KELVIN        Dataset           <xarray.Dataset>\nDimensi<...>1 (Homepage = http://n...
adcp_PASCAL        Dataset           <xarray.Dataset>\nDimensi<...>1 (Homepage = http://n...
adcp_PLANCK        Dataset           <xarray.Dataset>\nDimensi<...>1 (Homepage = http://n...
adcp_STOKES        Dataset           <xarray.Dataset>\nDimensi<...>1 (Homepage = http://n...
adcp_WHOI22        Dataset           <xarray.Dataset>\nDimensi<...>1 (Homepage = http://n...
adcp_WHOI32        Dataset           <xarray.Dataset>\nDimensi<...>1 (Homepag

In [18]:
ds_adcp

In [19]:
vmin = -0.5
vmax = 0.5
fig = plt.figure()
plt.set_cmap(cmap=plt.get_cmap('turbo'))
# ax1 = plt.subplot(len(WG_list),1,len(WG_list))
# ax1.set_xlim(tmin,tmax)
ds = adcp_CARSON
im = plt.pcolor(ds.time.values,ds.z_matrix,ds.current_north,vmin=vmin,vmax=vmax)
# plt.contourf(ds.time.values,ds.z_matrix[:,1],ds.current_east,levels)
plt.ylim(-60, 0)
plt.title(' Carson North vel')
fig=plt.gcf()
fig.autofmt_xdate()


In [20]:
ds.time[-1]