In [1]:
%matplotlib ipympl
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.signal import welch, periodogram
import os
import matplotlib.dates as mdates
import utide

from physoce import tseries as ts
from tseries import lombscargle
from scipy.stats import chi2

### Load data

#### Rover

In [2]:
# Load data
rover_csv_file = 'data/Rover_II_Current_Mag_Hourly_Avg_pad_2018.csv'
df_all = pd.read_csv(rover_csv_file,parse_dates=[7])

# Datetime index
df_all = df_all.set_index('Date_time_R')

# Create u and v components with units m/s
df_all['u'] = df_all['Easting (cm/sec)']/100
df_all['v'] = df_all['Northing (cm/sec)']/100

# mask erroneous points 
# abrupt shift near end of deployment 
bi = ((df_all.index > np.datetime64('2015-06-18T12:00')) &
    (df_all.index < np.datetime64('2015-06-20T00:00')))

df_all['u'][bi] = np.nan
df_all['v'][bi] = np.nan

# select time range
t1 = np.datetime64('2014-10-11T00:00:00')
t2 = np.datetime64('2018-10-17T00:00:00')
df_all = df_all.loc[(df_all.index >= t1) & (df_all.index <= t2)]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [3]:
# tidal analysis
time = mdates.date2num(df_all.index)

coef = utide.solve(time, np.array(df_all['u']), 
         np.array(df_all['v']),
         lat=35+8.4585/60,
         method='ols')

solve: matrix prep ... solution ... diagnostics ... done.


In [4]:
# tide = utide.reconstruct(time,coef)

In [5]:
# df_all['u_tide'] = tide.u
# df_all['v_tide'] = tide.v
# df_all['u_detide'] = df_all['u'] - df_all['u_tide']
# df_all['v_detide'] = df_all['v'] - df_all['v_tide']

### Resample to hourly values

#### Define date range

In [6]:
# t1 = np.datetime64('2017-11-12T00:00:00')
# t2 = np.datetime64('2018-10-17T00:00:00')
# dff = dfr.loc[(dfr.index >= t1) & (dfr.index <= t2)].interpolate(limit=2)

In [7]:
# resample and interpolate
df_all_r = df_all.resample('1H').mean()

# Calculate tidal currents at hourly points
time_r = mdates.date2num(df_all_r.index)
tide_r = utide.reconstruct(time_r,coef)
df_all_r['u_tide'] = tide_r.u
df_all_r['v_tide'] = tide_r.v
df_all_r['u_detide'] = df_all_r['u'] - df_all_r['u_tide']
df_all_r['v_detide'] = df_all_r['v'] - df_all_r['v_tide']

df_all_int = df_all_r.interpolate(limit=int(24*3.5))

df_all_int['u'] = df_all_int['u_detide'] + df_all_int['u_tide']
df_all_int['v'] = df_all_int['v_detide'] + df_all_int['v_tide']

df_all_int['u_f'] = ts.pl64(df_all_int['u_detide'])
df_all_int['v_f'] = ts.pl64(df_all_int['v_detide'])

df_all_int['u_super'] = df_all_int['u'] - df_all_int['u_f'] - df_all_int['u_tide']
df_all_int['v_super'] = df_all_int['v'] - df_all_int['v_f'] - df_all_int['v_tide']

prep/calcs ... done.


  return x[reverse].conj()
  in1zpadded[sc] = in1.copy()


### Subset with no gaps
November 2015-October 2018

In [8]:
# select time range
t1s = np.datetime64('2015-11-11T13:00:00')
t2s = np.datetime64('2018-10-14T07:00:00')
df_sub = df_all_int.loc[(df_all_int.index >= t1s) & (df_all_int.index <= t2s)]

### Initial plots

### Full velocity

In [9]:
plt.figure(figsize=(7.5,4))
plt.plot(df_all.index,df_all['u'])

FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x1c1f4533c8>]

In [10]:
plt.figure(figsize=(7.5,4))
plt.plot(df_all_int.index,df_all_int['u_super'])
plt.plot(df_all_int.index,df_all_int['u_f'])

FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x1c20c86d30>]

In [11]:
plt.figure(figsize=(7.5,4))
plt.plot(df_all_int.index,df_all_int['u_super'])
plt.plot(df_all_int.index,df_all_int['u_tide'])

FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x1c1f3ef438>]

In [12]:
plt.figure(figsize=(7.5,4))
plt.plot(df_all_int.index,df_all_int['u_super'])
plt.plot(df_all_int.index,df_all_int['v_super'])

FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x109f3ab00>]

In [13]:
plt.figure(figsize=(7.5,4))
plt.plot(df_sub.index,df_sub['u_f'])

FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x109f45dd8>]

### Spectral analysis

In [14]:
u = np.array(df_sub['u'])
v = np.array(df_sub['v'])

N = len(u)
fu,Su = welch(u,fs=24.,nperseg=N/3,window='hanning')
fv,Sv = welch(v,fs=24.,nperseg=N/3,window='hanning')

In [20]:
# fmax = 1/2
# fi, = np.where(fu <= fmax)

plt.figure()
plt.loglog(fu,Su,'k-')
plt.loglog(fv,Sv,'-',color='gray')
xl = plt.xlim()
plt.xlim(xl[0],np.max(fu))

# confidence intervals
dof = 6
edof = (8/3)*dof
lower = edof/chi2.ppf(0.975,edof)
upper = edof/chi2.ppf(0.025,edof)
fplot = 5
fac = 0.0008
plt.plot(np.array([fplot,fplot]),fac*np.array([lower,upper]),'k-')
plt.text(fplot+0.5,fac*np.mean([lower,upper]),'95%',verticalalignment='center')

plt.legend(['u','v'])
plt.xlabel('frequency [cpd]')
plt.ylabel('[m$^2$ s$^{-2}$ cpd$^{-1}$]')
plt.title('Benthic Rover\nNov 2015-Oct 2018')

plt.savefig('figures_paper/rover_spectrum.png')
plt.savefig('figures_paper/rover_spectrum.pdf')

FigureCanvasNbAgg()

### Lomb-Scargle periodogram - 2014 to 2018

In [16]:
gi, = np.where(np.isfinite(df_all['u']+df_all['v']))
ua = np.array(df_all['u'][gi])
va = np.array(df_all['v'][gi])

#rotary spectrum
fua,ftua,Sua = lombscargle(time[gi],ua,ofac=1,window='hanning',scaling='density')
fva,ftva,Sva = lombscargle(time[gi],va,ofac=1,window='hanning',scaling='density')

In [17]:
# average frequency bands
navg = 2 # number of frequency bands to average
nfreq = int(np.floor(len(fua)/navg))  # number of freqencies in averaged spectrum

fwt = 1/navg
fm = np.zeros(nfreq)
Sum = np.zeros(nfreq)
Svm = np.zeros(nfreq)
for k in range(navg):
    fm = fm + fwt*fua[1:(nfreq*navg)][::navg]
    Sum = Sum + fwt*Sua[1:(nfreq*navg)][::navg]
    Svm = Svm + fwt*Sva[1:(nfreq*navg)][::navg]

In [18]:
Sum

array([3.49997163e-04, 2.22530111e-03, 1.92575183e-03, ...,
       4.81409249e-06, 1.48315243e-07, 2.90089730e-07])

In [19]:
plt.figure()
plt.loglog(fua,Sua)
plt.loglog(fva,Sva,'--')
plt.xlabel('frequency [cpd]')
plt.ylabel('[m$^2$ s$^{-2}$ cpd$^{-1}$]')
plt.title('Lomb-Scargle periodogram [Rover]')

FigureCanvasNbAgg()

Text(0.5, 1.0, 'Lomb-Scargle periodogram [Rover]')

In [95]:
df_all_1M = df_all_int.rolling(24*30,min_periods=24*30,center=True).mean()
df_all_2M = df_all_int.rolling(24*60,min_periods=24*60,center=True).mean()
df_all_3M = df_all_int.rolling(24*90,min_periods=24*90,center=True).mean()

In [239]:
df_all_3M_mon = df_all_3M.resample('1M').last()
df_all_2M_mon = df_all_2M.resample('1M').last()
df_all_1M_mon = df_all_1M.resample('1M').last()

In [97]:
df_all_1Mstd = df_all_int.rolling(24*30,min_periods=24*30,center=True).std()
df_all_2Mstd = df_all_int.rolling(24*60,min_periods=24*60,center=True).std()
df_all_3Mstd = df_all_int.rolling(24*90,min_periods=24*90,center=True).std()

In [98]:
df_all_3Mstd_mon = df_all_3Mstd.resample('1M').last()
df_all_2Mstd_mon = df_all_2Mstd.resample('1M').last()
df_all_1Mstd_mon = df_all_1Mstd.resample('1M').last()

In [243]:
plt.figure(figsize=(6.5,6.5))
plt.subplot(311)
plt.plot(df_all_2M_mon.index.month,df_all_2M_mon['u_f'],'ko')
plt.plot(df_all_2M_mon.index.month,df_all_2M_mon['v_f'],'^',color='gray')
plt.ylim([-0.01,0.01])
plt.ylabel('[m/s]')
xl = plt.xlim()
yl = plt.ylim()
plt.text(xl[0]+0.005*np.diff(xl),yl[1]-0.1*np.diff(yl),'a)')
plt.gca().set_xticks(np.arange(1,13))
plt.gca().set_xticklabels([])
plt.legend(['$u$','$v$'],loc=(0.6,0.7))
plt.title('60-day mean $u$, $v$ velocity components')

plt.subplot(312)
plt.plot(df_all_2Mstd_mon.index.month,df_all_2Mstd_mon['u_f'],'ko')
plt.ylim([0,0.02])
plt.ylabel('[m/s]')
xl = plt.xlim()
yl = plt.ylim()
plt.text(xl[0]+0.005*np.diff(xl),yl[1]-0.1*np.diff(yl),'b)')
plt.gca().set_xticks(np.arange(1,13))
plt.gca().set_xticklabels([])
plt.title('60-day standard deviation - $u_{lp}$')

plt.subplot(313)
plt.plot(df_all_2Mstd_mon.index.month,df_all_2Mstd_mon['u_super'],'ko')
plt.ylim([0,0.02])
plt.ylabel('[m/s]')
xl = plt.xlim()
yl = plt.ylim()
plt.text(xl[0]+0.005*np.diff(xl),yl[1]-0.1*np.diff(yl),'c)')
plt.gca().set_xticks(np.arange(1,13))
plt.gca().set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
plt.title('60-day standard deviation - $u_{hp}$')
plt.tight_layout()

plt.savefig('figures_paper/monthly_stats.png',dpi=600)
plt.savefig('figures_paper/monthly_stats.pdf')



FigureCanvasNbAgg()

In [189]:
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,
                                         MultiComparison)

In [190]:
fi = np.isfinite(df_all_2Mstd_mon['u_f'])

In [204]:
MultiComp = MultiComparison(df_all_2Mstd_mon['u_f'][fi],
                            df_all_2Mstd_mon.index.month[fi])

In [238]:
df_all_2Mstd_mon

Unnamed: 0_level_0,Unnamed: 0,Current_mag,Easting (cm/sec),Northing (cm/sec),Minutes,Seconds,u,v,u_tide,v_tide,u_detide,v_detide,u_f,v_f,u_super,v_super
Date_time_R,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2014-10-31,,,,,,,,,,,,,,,,
2014-11-30,415.871014,0.934629,0.730026,1.621102,7.479623,11.140831,0.007381,0.01646,0.008566,0.011741,0.008031,0.00981,0.003081,0.003533,0.007347,0.009074
2014-12-31,415.837064,0.964297,0.757248,1.676798,7.592627,10.946328,0.007609,0.017007,0.008031,0.012201,0.007857,0.01041,0.003228,0.003408,0.007104,0.00975
2015-01-31,415.837176,1.034222,0.796061,1.806495,7.672934,10.932526,0.008018,0.018328,0.007637,0.012308,0.007833,0.011684,0.003345,0.003609,0.007024,0.010993
2015-02-28,415.420054,0.988327,0.85465,1.585933,7.502846,11.163207,0.008632,0.016113,0.007622,0.01224,0.008775,0.011542,0.005046,0.00551,0.007111,0.010014
2015-03-31,415.67927,0.987474,0.930683,1.500098,7.685077,10.961499,0.009403,0.015267,0.007908,0.012247,0.010468,0.01119,0.00724,0.006563,0.007518,0.008932
2015-04-30,415.831987,0.959486,0.928179,1.687715,7.840095,10.86061,0.009388,0.01714,0.008166,0.012366,0.009931,0.01176,0.006959,0.007373,0.007018,0.009053
2015-05-31,415.782746,2.56159,1.928168,2.236844,7.966458,10.519992,0.009169,0.016714,0.008227,0.012461,0.008872,0.010665,0.00572,0.005826,0.006641,0.009063
2015-06-30,,,,,,,,,0.008182,0.012375,,,,,,
2015-07-31,,,,,,,,,0.008018,0.012055,,,,,,


In [210]:
df_all_2Mstd_mon.index.month[fi]

Int64Index([11, 12,  1,  2,  3,  4,  5, 12,  1,  2,  3,  4,  5,  6,  7,  8,  9,
            10, 11, 12,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  1,  2,
             3,  4,  5,  6,  7,  8,  9],
           dtype='int64', name='Date_time_R')

In [209]:
print(MultiComp.tukeyhsd(0.2).summary())

Multiple Comparison of Means - Tukey HSD,FWER=0.20
group1 group2 meandiff  lower  upper  reject
--------------------------------------------
  1      2    -0.0002  -0.0077 0.0074 False 
  1      3     0.0009  -0.0067 0.0084 False 
  1      4     0.0014  -0.0061 0.009  False 
  1      5     0.0029  -0.0047 0.0104 False 
  1      6     0.0065  -0.0017 0.0146 False 
  1      7     0.0068  -0.0014 0.015  False 
  1      8     0.0053  -0.0028 0.0135 False 
  1      9     0.0043  -0.0038 0.0125 False 
  1      10    0.0038  -0.0055 0.013  False 
  1      11    0.0021   -0.006 0.0103 False 
  1      12    0.0022  -0.0053 0.0098 False 
  2      3     0.001   -0.0065 0.0086 False 
  2      4     0.0016   -0.006 0.0092 False 
  2      5     0.0031  -0.0045 0.0106 False 
  2      6     0.0066  -0.0015 0.0148 False 
  2      7     0.007   -0.0012 0.0152 False 
  2      8     0.0055  -0.0027 0.0137 False 
  2      9     0.0045  -0.0037 0.0127 False 
  2      10    0.0039  -0.0053 0.0132 False 
  2 

In [None]:
MultiComp_std = MultiComparison(df_all_2Mstd_mon['u_f'][fi],
                            df_all_2Mstd_mon.index.month[fi])

In [212]:
plt.figure(figsize=(7,8))
plt.subplot(311)
plt.plot(df_all_2M.index,df_all_2M['u_f'],'k-')
plt.plot(df_all_2M.index,df_all_2M['v_f'],'--',color='gray')
plt.title('60-day running mean')
plt.ylim([-0.011,0.011])
plt.ylabel('[m/s]')
plt.gca().set_xticklabels([])
yl = plt.ylim()
xl = plt.xlim()
plt.text(xl[0]+0.01*np.diff(xl),yl[1]-0.1*np.diff(yl),'a)')
plt.legend(['$u$','$v$'])

plt.subplot(312)
plt.plot(df_all_2Mstd.index,df_all_2Mstd['u_f'],'k-')
plt.plot(df_all_2Mstd.index,df_all_2Mstd['u_super'],'-',color='darkgray',lw=2)
plt.plot(df_all_2Mstd.index,df_all_2Mstd['u_tide'],'-',color='lightgray')
plt.gca().set_xticklabels([])
yl = plt.ylim()
plt.ylim([0,yl[1]])
plt.ylabel('[m/s]')
plt.title('60-day running standard deviation - $u$')
yl = plt.ylim()
xl = plt.xlim()
plt.text(xl[0]+0.01*np.diff(xl),yl[1]-0.1*np.diff(yl),'b)')
plt.legend(['$u_{lp}$','$u_{tide}$','$u_{hp}$'])

plt.subplot(313)
plt.plot(df_all_2Mstd.index,df_all_2Mstd['v_f'],'k-')
plt.plot(df_all_2Mstd.index,df_all_2Mstd['v_super'],'-',color='darkgray',lw=2)
plt.plot(df_all_2Mstd.index,df_all_2Mstd['v_tide'],'-',color='lightgray')
plt.xticks(rotation=30)
plt.ylim([0,yl[1]])
plt.title('60-day running standard deviation - $v$')
plt.ylabel('[m/s]')
plt.tight_layout()
yl = plt.ylim()
xl = plt.xlim()
plt.text(xl[0]+0.01*np.diff(xl),yl[1]-0.1*np.diff(yl),'c)')

plt.savefig('figures_paper/runnning_stats.png',dpi=600)
plt.savefig('figures_paper/runnning_stats.pdf')



FigureCanvasNbAgg()

In [100]:
plt.figure()
plt.plot(df_all_1M.index,df_all_1M['u_f'])
plt.plot(df_all_2M.index,df_all_2M['u_f'])
plt.plot(df_all_3M.index,df_all_3M['u_f'])
plt.legend(['1M','2M','3M'])
plt.title('rolling means')



FigureCanvasNbAgg()

Text(0.5, 1.0, 'rolling means')

In [99]:
plt.figure()
plt.plot(df_all_3Mstd_mon.index,df_all_3Mstd_mon['u_f'])
plt.plot(df_all_3Mstd_mon.index,df_all_3Mstd_mon['u_super'])
plt.plot(df_all_3Mstd_mon.index,df_all_3Mstd_mon['u_tide'])



FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x1c571bc2e8>]

In [82]:
plt.figure()
plt.plot(df_all_2Mstd_mon.index,df_all_2Mstd_mon['u_f'])
plt.plot(df_all_2Mstd_mon.index,df_all_2Mstd_mon['u_super'])
plt.plot(df_all_2Mstd_mon.index,df_all_2Mstd_mon['u_tide'])



FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x1c52018ba8>]

In [64]:
plt.figure()
plt.plot(df_all_3M.index,df_all_3M['u_f'])
plt.plot(df_all_3M_mon.index,df_all_3M_mon['u_f'])
plt.plot(df_all_2M_mon.index,df_all_2M_mon['u_f'])



FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x1c4650aa20>]

In [71]:
plt.figure()
plt.plot(df_all_3M_mon['u_f'],df_all_2M_mon['u_f'],'.')



FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x1c4a83d9e8>]

In [42]:
plt.figure()
plt.plot(df_all_1M.index,df_all_1M['u_f'])
plt.plot(df_all_2M.index,df_all_2M['u_f'])
plt.plot(df_all_3M.index,df_all_3M['u_f'])
plt.legend(['1M','2M','3M'])
plt.title('rolling means')

FigureCanvasNbAgg()

Text(0.5, 1.0, 'rolling means')