### L0 and L1 processing of the glider data

Removing spikes and outliers

In [1]:
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (18,9)
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['xtick.direction'] = 'in'

font = {'family' : 'Arial',
        'weight' : 'ultralight',
        'size'   : 13}

plt.rc('font', **font)

plt.rc('ytick.major', size=6)
plt.rc('xtick.major', size=6)
plt.rc('ytick.minor', size=4)
plt.rc('xtick.minor', size=4)
plt.rc('lines', linewidth=2)

In [2]:
%pylab inline
import glidertools as gt
import process_seaglider_data_L1 as ps
import process_slocum_data_L1 as psl
import my_functions as my

Populating the interactive namespace from numpy and matplotlib


### Subantarctic Zone Seaglider

In [3]:
%run read_saz_glider_data.py

dat_saz_L1 = ps.process_seaglider_data_L1(dat_saz)


DIMENSION: ctd_data_point
{temperature_raw, latitude, salinity_raw, salinity_qc, temperature_qc, ctd_time, ctd_depth, longitude, ctd_pressure}


100%|██████████| 485/485 [00:14<00:00, 32.92it/s]


/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/gliders/SG542_SAZ_2019/p5420082.nc was skipped


Cannot merge data - not enough time indexed DataFrames
Returning unmerged dataframes
Mean bin size = 5.00
Mean depth binned (50 m) vertical sampling frequency = 0.62
Mean bin size = 5.00
Mean depth binned (50 m) vertical sampling frequency = 0.62


In [4]:
dens = my.dens(dat_saz_L1.salt, dat_saz_L1.temp, dat_saz_L1.depth, dat_saz_L1.lat, dat_saz_L1.lon, dat_saz_L1.time)

mld_01 = my.calc_mld(dens.T, dat_saz_L1.depth, den_lim=0.01)
mld_03 = my.calc_mld(dens.T, dat_saz_L1.depth, den_lim=0.03)
mld_10 = my.calc_mld(dens.T, dat_saz_L1.depth, den_lim=0.10)

MLD not calculated: profile 958. Setting to NaN
MLD not calculated: profile 959. Setting to NaN
MLD not calculated: profile 960. Setting to NaN
MLD not calculated: profile 961. Setting to NaN
MLD not calculated: profile 962. Setting to NaN
MLD not calculated: profile 963. Setting to NaN


Calculate secondary variables - using the `GliderTools` Python package, I have calculated density and MLD from the Level 1 salinity and temperature data

Assign secondary variables to a new `xarray dataset`  

In [5]:
dat_saz_L2 = dat_saz_L1.copy()

dat_saz_L2=dat_saz_L2.assign(density=(("depth", "time"), dens))
dat_saz_L2=dat_saz_L2.assign(mld_01=("time", mld_01))
dat_saz_L2=dat_saz_L2.assign(mld_03=("time", mld_03))
dat_saz_L2=dat_saz_L2.assign(mld_10=("time", mld_10))

Assigning the metadata for the SAZ Seaglider

In [6]:
from datetime import date

dat_saz_L2.attrs['instrument']              = 'Seaglider'
dat_saz_L2.attrs['owner']                   = 'Southern Ocean Carbon and Climate Observatory'
dat_saz_L2.attrs['contact']                 = 'sarahanne.n@gmail.com'
dat_saz_L2.attrs['processing_date']         = str(date.today())
dat_saz_L2.attrs['processing_level']        = '2'
dat_saz_L2.attrs['time_coverate_start']     = str(dat_saz_L2.time[0].values)
dat_saz_L2.attrs['time_coverate_end']       = str(dat_saz_L2.time[-1].values)
dat_saz_L2.attrs['geospatial_vertical_min'] = '0'
dat_saz_L2.attrs['geospatial_vertical_max'] = '1000'
dat_saz_L2.attrs['geospatial_lat_min']      = str(dat_saz_L2.lat.min().values)
dat_saz_L2.attrs['geospatial_lat_max']      = str(dat_saz_L2.lat.max().values)
dat_saz_L2.attrs['geospatial_lon_min']      = str(dat_saz_L2.lon.min().values)
dat_saz_L2.attrs['geospatial_lon_max']      = str(dat_saz_L2.lon.max().values)

### Antarctic Polar Zone Slocum

In [7]:
%run read_apz_glider_data.py

In [8]:
dat_apz['latitude'] = (('time'), dat_apz.latitude)
dat_apz['longitude'] = (('time'), dat_apz.longitude)
dat_apz['density'] = (('time'), dat_apz.density)

dat_apz = dat_apz.drop(labels=['dim_0'])

dat_apz['ctd_depth'] = (('time'), np.abs(gsw.z_from_p(dat_apz.ctd_pressure, dat_apz.latitude)))

# i = dat_apz.dives>84
# dat_apz = dat_apz.where(i, drop=True)

In [9]:
dat_apz_L1 = psl.process_slocum_data_L1(dat_apz)

Mean bin size = 5.00
Mean depth binned (50 m) vertical sampling frequency = 0.12
Mean bin size = 5.00
Mean depth binned (50 m) vertical sampling frequency = 0.12


Calculate secondary variables - using the `GliderTools` Python package, I have calculated density and MLD from the Level 1 salinity and temperature data

In [10]:
dens = my.dens(dat_apz_L1.salt, dat_apz_L1.temp, dat_apz_L1.depth, dat_apz_L1.lat, dat_apz_L1.lon, dat_apz_L1.time)

mld_01 = my.calc_mld(dens.T, dat_apz_L1.depth, den_lim=0.01)
mld_03 = my.calc_mld(dens.T, dat_apz_L1.depth, den_lim=0.03)
mld_10 = my.calc_mld(dens.T, dat_apz_L1.depth, den_lim=0.10)

Assign secondary variables to a new `xarray dataset`  

In [11]:
dat_apz_L2 = dat_apz_L1.copy()

dat_apz_L2 = dat_apz_L2.assign(density=(("depth", "time"), dens))
dat_apz_L2 = dat_apz_L2.assign(mld_01=("time", mld_01))
dat_apz_L2 = dat_apz_L2.assign(mld_03=("time", mld_03))
dat_apz_L2 = dat_apz_L2.assign(mld_10=("time", mld_10))

In [12]:
from datetime import date

dat_apz_L2.attrs['instrument']              = 'Webb Teledyne Slocum'
dat_apz_L2.attrs['owner']                   = 'Southern Ocean Carbon and Climate Observatory'
dat_apz_L2.attrs['contact']                 = 'sarahanne.n@gmail.com'
dat_apz_L2.attrs['processing_date']         = str(date.today())
dat_apz_L2.attrs['processing_level']        = '2'
dat_apz_L2.attrs['time_coverate_start']     = str(dat_apz_L2.time[0].values)
dat_apz_L2.attrs['time_coverate_end']       = str(dat_apz_L2.time[-1].values)
dat_apz_L2.attrs['geospatial_vertical_min'] = '0'
dat_apz_L2.attrs['geospatial_vertical_max'] = '1000'
dat_apz_L2.attrs['geospatial_lat_min']      = str(dat_apz_L2.lat.min().values)
dat_apz_L2.attrs['geospatial_lat_max']      = str(dat_apz_L2.lat.max().values)
dat_apz_L2.attrs['geospatial_lon_min']      = str(dat_apz_L2.lon.min().values)
dat_apz_L2.attrs['geospatial_lon_max']      = str(dat_apz_L2.lon.max().values)

### Marginal Ice Zone Seaglider

In [13]:
%run read_miz_glider_data.py


DIMENSION: ctd_data_point
{temperature_raw, latitude, salinity_raw, salinity_qc, temperature_qc, ctd_time, ctd_depth, longitude, ctd_pressure}


100%|██████████| 497/497 [00:14<00:00, 35.23it/s]


/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/gliders/SG643/p6430039.nc was skipped


Cannot merge data - not enough time indexed DataFrames
Returning unmerged dataframes


Run the Level 1 processing script

In [14]:
dat_miz_L1 = ps.process_seaglider_data_L1(dat_miz)

Mean bin size = 5.00
Mean depth binned (50 m) vertical sampling frequency = 0.56
Mean bin size = 5.00
Mean depth binned (50 m) vertical sampling frequency = 0.56


Calculate secondary variables - using the `GliderTools` Python package, I have calculated density and MLD from the Level 1 salinity and temperature data

In [15]:
dens = my.dens(dat_miz_L1.salt, dat_miz_L1.temp, dat_miz_L1.depth, dat_miz_L1.lat, dat_miz_L1.lon, dat_miz_L1.time)

mld_01 = my.calc_mld(dens.T, dat_miz_L1.depth, den_lim=0.01)
mld_03 = my.calc_mld(dens.T, dat_miz_L1.depth, den_lim=0.03)
mld_10 = my.calc_mld(dens.T, dat_miz_L1.depth, den_lim=0.10)

MLD not calculated: profile 347. Setting to NaN
MLD not calculated: profile 484. Setting to NaN
MLD not calculated: profile 347. Setting to NaN
MLD not calculated: profile 484. Setting to NaN
MLD not calculated: profile 347. Setting to NaN
MLD not calculated: profile 484. Setting to NaN


Assign secondary variables to a new `xarray dataset`  

In [16]:
dat_miz_L2 = dat_miz_L1.copy()

dat_miz_L2 = dat_miz_L2.assign(density=(("depth", "time"), dens))
dat_miz_L2 = dat_miz_L2.assign(mld_01=("time", mld_01))
dat_miz_L2 = dat_miz_L2.assign(mld_03=("time", mld_03))
dat_miz_L2 = dat_miz_L2.assign(mld_10=("time", mld_10))

In [17]:
from datetime import date

dat_miz_L2.attrs['instrument']              = 'Hydroid Seaglider'
dat_miz_L2.attrs['owner']                   = 'University of Gothenburg'
dat_miz_L2.attrs['contact']                 = 'sebastiaan.swart@marine.gu.se'
dat_miz_L2.attrs['processing_date']         = str(date.today())
dat_miz_L2.attrs['processing_level']        = '2'
dat_miz_L2.attrs['time_coverate_start']     = str(dat_miz_L2.time[0].values)
dat_miz_L2.attrs['time_coverate_end']       = str(dat_miz_L2.time[-1].values)
dat_miz_L2.attrs['geospatial_vertical_min'] = '0'
dat_miz_L2.attrs['geospatial_vertical_max'] = '1000'
dat_miz_L2.attrs['geospatial_lat_min']      = str(dat_miz_L2.lat.min().values)
dat_miz_L2.attrs['geospatial_lat_max']      = str(dat_miz_L2.lat.max().values)
dat_miz_L2.attrs['geospatial_lon_min']      = str(dat_miz_L2.lon.min().values)
dat_miz_L2.attrs['geospatial_lon_max']      = str(dat_miz_L2.lon.max().values)

Making TS plots to identify if there are outliers in the data that were not picked up in the cleaning methods

In [18]:
# saz_stacked = dat_saz_L2.stack(z=('time', 'depth'))
# apz_stacked = dat_apz_L2.stack(z=('time', 'depth'))
# miz_stacked = dat_miz_L2.stack(z=('time', 'depth'))

In [19]:
# fig, ax = subplots(1, 3, figsize=[15, 5])

# ax[0].scatter(saz_stacked.salt, saz_stacked.temp, 0.1, c=saz_stacked.depth, vmin=0, vmax=1000)
# ax[1].scatter(apz_stacked.salt, apz_stacked.temp, 0.1, c=apz_stacked.depth, vmin=0, vmax=1000)
# ax[2].scatter(miz_stacked.salt, miz_stacked.temp, 0.1, c=miz_stacked.depth, vmin=0, vmax=1000)


Save all data as NetCDF

In [20]:
dat_saz.to_netcdf   ('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/sg542_saz_L0.nc')
dat_saz_L1.to_netcdf('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/sg542_saz_L1.nc')
dat_saz_L2.to_netcdf('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/sg542_saz_L2.nc')

dat_apz.to_netcdf   ('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/slocum_apz_L0.nc')
dat_apz_L1.to_netcdf('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/slocum_apz_L1.nc')
dat_apz_L2.to_netcdf('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/slocum_apz_L2.nc')

dat_miz.to_netcdf   ('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/sg643_miz_L0.nc')
dat_miz_L1.to_netcdf('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/sg643_miz_L1.nc')
dat_miz_L2.to_netcdf('/Users/marcel/Google Drive/Projects/buoyancy_flux_paper/data/sg643_miz_L2.nc')

In [21]:
np.diff(dat_apz_L2.time.values.astype('datetime64[m]'))

array([ 216,   45,  233,   27,  231,   16,  230,  105,  250,   15,  258,
         14,  371,   17,  254,  324,  213,   50,  256,  658,  198,   21,
        193,  238,   55,   17,  100,   23,   97,  125,   98,   26,   97,
         27,   42,  309,   81,   35,   62,   26,   55, 1386,  121,   15,
        134,   24,  132,   30,  129,   15,  259,   21,  261,   23,  263,
        141,  237,   20,  231,   18,  247,   18,  237,   27,  270,   25,
        275,   16,  269,   23,  265,   20,  259,   26,  134,   18,  269,
         18,  274,   21,  269,   18,  264,   18,  270,   17,  266,   18,
        270,   23,  271,   54,  271,   57,  278,   16,  266,   47,  266,
         18,  270,   18,  266,   17,  269,   16,  268,   17,  266,   21,
        268,   18,  261,   18,  257,   17,  276,   17,  262,   19,  258,
         17,  262,   24,  265,   17,  267,   17,  286,   18,  270,   17,
        271,   15,  264,   20,  262,   24,  267,   18,  263,   29,  268,
         17,  268,   19,  280,   15,  273,   29,  2