# Fit harmonics to filtered $A_n$ data

In [1]:
import pymc3 as pm
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from datetime import datetime
import h5py

from soda.dataio.conversion import readotps

from scipy.optimize import fmin_powell, fmin, fmin_cg, fmin_ncg

from theano import shared
from theano import tensor as tt

from soda.utils.timeseries import timeseries, skill, rmse
from soda.utils.uspectra import uspectra, getTideFreq
from soda.utils.othertime import SecondsSince, TimeVector
from mycurrents import oceanmooring as om
from soda.utils.harmonic_analysis import harmonic_fit_array
from soda.utils.mysignal import power_spectra

from tqdm import tqdm

import matplotlib as mpl

In [2]:
%matplotlib notebook

In [3]:
mpl.rcParams['font.size'] = 14
mpl.rcParams['axes.labelsize'] = 'large'

In [4]:
#ncfile = '/home/suntans/Share/ARCHub/DATA/FIELD/browse-basin-kissme/Data/NetCDF/KISSME_Fitted_Buoyancy_wout_motion.nc'
ncfile = '/home/suntans/Share/ARCHub/DATA/FIELD/ShellCrux/KP150_Fitted_Buoyancy_wout_motion.nc'

ds1 = xr.open_dataset(ncfile,group='KP150_phs1')
ds2 = xr.open_dataset(ncfile,group='KP150_phs2')
ds1

<xarray.Dataset>
Dimensions:   (freqs: 3, modes: 4, time: 264601, timeslow: 728, z: 100)
Coordinates:
  * modes     (modes) int64 0 1 2 3
  * time      (time) datetime64[ns] 2016-04-30 ... 2016-10-30T18:00:00
  * z         (z) float64 0.0 -2.525 -5.051 -7.576 ... -244.9 -247.5 -250.0
  * timeslow  (timeslow) datetime64[ns] 2016-05-01 ... 2016-10-29T18:00:00
  * freqs     (freqs) float64 0.0001405 0.000281 0.0004216
Data variables:
    Atide     (time, modes) float64 ...
    phi       (timeslow, modes, z) float64 ...
    rhofit    (time, z) float64 ...
    r10       (timeslow, modes) float64 ...
    rhobar    (timeslow, z) float64 ...
    amp       (timeslow, modes, freqs) float64 ...
    phs       (timeslow, modes, freqs) float64 ...
    A_n       (time, modes) float64 ...
    N2        (timeslow, z) float64 ...
    cn        (timeslow, modes) float64 ...
Attributes:
    Y:            -13.75895
    ncfile:       /home/suntans/Share/ARCHub/DATA/FIELD/ShellCrux/KP150_Gridd...
    group: 

In [5]:
# Filter each time series and concatenate
A1 = om.OceanMooring(ds1.time.values, ds1['A_n'][:,0],0.0)
A2 = om.OceanMooring(ds2.time.values, ds2['A_n'][:,0],0.0)

A1f = om.OceanMooring(A1.t, A1.filt((34*3600, 3*3600), btype='band'), 0.0)
A2f = om.OceanMooring(A2.t, A2.filt((34*3600, 3*3600), btype='band'), 0.0)

A_n = A1f.concat(A2f)
A_n_1h = xr.DataArray(A_n.y[::60], dims=('time'), coords={'time':A_n.t[::60]})


# A_n_1h.loc['2016-09-15':'2016-10-31']=np.nan

#A_n_1h = A_n_1h.sel(time=slice('2016-11-02','2017-05-02'))

  b = a[a_slice]


In [6]:
plt.figure()
A_n_1h.plot(lw=0.2)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f3007cea9e8>]

In [9]:
def sine_model(beta_s, ff, t):
    n = len(ff)

    result = beta_s[0]*np.ones_like(t)
    for ii in range(0,n):
        result += beta_s[2*ii+1]*np.cos(ff[ii] * t) + beta_s[2*ii+2]*np.sin(ff[ii]*t)

    return result

def sine_model_pm(beta_s, ff, t):
    n = len(ff)

    result = beta_s[0] + beta_s[1]*t
    for ii in range(0,n):
        result += beta_s[2*ii+2]*pm.math.cos(ff[ii] * t) + beta_s[2*ii+3]*pm.math.sin(ff[ii]*t)

    return result

def sine_model_notrend_pm(beta_s, ff, t):
    n = len(ff)
    
    result = beta_s[0]+0*t
    for ii in range(0,n):
        result += beta_s[2*ii+1]*pm.math.cos(ff[ii] * t) + beta_s[2*ii+2]*pm.math.sin(ff[ii]*t)

    return result
    
def cosine_model_pm(beta_s, ff, t):
    n = len(ff)
    
    result = beta_s[0]+0*t
    for ii in range(0,n):
        result += beta_s[2*ii+1]*pm.math.cos(ff[ii] * t - beta_s[2*ii+2])

    return result

def sine_model_notrend(beta_s, ff, t):
    n = len(ff)
    
    result = beta_s[0] + 0*t
    for ii in range(0,n):
        result += beta_s[2*ii+1]*np.cos(ff[ii] * t) + beta_s[2*ii+2]*np.sin(ff[ii]*t)

    return result    

def sine_model_envelope(beta_s, ff, t):
    n = len(ff)
    
    #result = t*0
    
    #result = np.zeros(t.shape)
    result = beta_s[0] + 0*t

    for ii in range(0,n):
        result += beta_s[2*ii+1]*np.cos(ff[ii] * t) + beta_s[2*ii+2]*np.sin(ff[ii]*t)
        
    # Compute the imaginary part by adding a 90 degree phase shift
    #result_i = t*0
    result_i = np.zeros(t.shape) # Imaginary mean component is zero

    for ii in range(0,n):
        result_i += beta_s[2*ii+1]*np.cos(ff[ii] * t + np.pi/2) \
            + beta_s[2*ii+2]*np.sin(ff[ii]*t + np.pi/2)
    
    return np.sqrt(result*result + result_i*result_i)

# def sine_model_envelope_pm(beta_s, ff, t):
#     n = len(ff)
    
#     #result = t*0
    
#     result = tt.zeros(t.shape)

#     for ii in range(0,n):
#         result += beta_s[2*ii]*pm.math.cos(ff[ii] * t) + beta_s[2*ii+1]*pm.math.sin(ff[ii]*t)
        
#     # Compute the imaginary part by adding a 90 degree phase shift
#     #result_i = t*0
#     result_i = tt.zeros(t.shape)

#     for ii in range(0,n):
#         result_i += beta_s[2*ii]*pm.math.cos(ff[ii] * t + np.pi/2) \
#             + beta_s[2*ii+1]*pm.math.sin(ff[ii]*t + np.pi/2)
    
#     return pm.math.sqrt(result*result + result_i*result_i)

In [10]:
# Create a function with similar functionality as harmonic_fit in soda
def harmonic_fit_mcmc(time, X, frq, mask=None, axis=0, basetime=None,         **kwargs):
    """
    Harmonic fitting using Bayesian inference
    """
    tday = 86400.
    # Convert the time to days
    dtime = SecondsSince(time, basetime=basetime )
    
    
    # Generate initvalues from least-squares fitting
    ts = timeseries(time, X)
    amp, phs, _, _, h_lsq, err = ts.tidefit(frqnames=names)
    
    initvals = {'beta_mean':0.}
    initvals = {'sigma':1.}

    ii=-1
    for aa, pp in zip(amp,phs):
        ii+=1
        initvals.update({'beta_amp_%d'%ii:aa})
        initvals.update({'beta_phs_%d'%ii:pp})
    print(initvals)

    
    dtime /= tday
    
    # Convert the frequencies to radians / day
    omega = [ff*tday for ff in frq]
    #omega = frq
    
    # Number of parameters
    n_params = 2*len(omega) + 1
    
    print('Number of Parametrs: %d\n'%n_params, omega)

    with pm.Model() as my_model:
        ###
        # Create priors for each of our variables
        BoundNormal = pm.Bound(pm.Normal, lower=0.0)

        # Mean
        beta_mean = pm.Normal('beta_mean', mu=0, sd=1)
        # Trend
        #beta_linear = pm.Normal('beta_linear', mu=0, sd=1.)

        #beta_s = [beta_mean, beta_linear]
        beta_s=[beta_mean]

        # Harmonics
        for n in range(0,2*len(omega),2):
            beta_s.append(pm.Normal('beta_%d_re'%(n//2), mu=1., sd = 5.))
            beta_s.append(pm.Normal('beta_%d_im'%(n//2), mu=1., sd = 5.))
        #for n in range(0,len(omega)):
        #    beta_s.append(BoundNormal('beta_amp_%d'%n,mu=1., sd=10.))
        #    #beta_s.append(pm.Uniform('beta_amp_%d'%n,lower=0, upper=30))
        #    beta_s.append(pm.Uniform('beta_phs_%d'%n, lower=-np.pi, upper=np.pi))

        ###
        # Generate the likelihood function using the deterministic variable as the mean
        #mu_x = cosine_model_pm(beta_s, omega, dtime)
        #mu_x = pm.Deterministic('mu_x', cosine_model_pm(beta_s, omega, dtime))
        mu_x = sine_model_notrend_pm(beta_s, omega, dtime)

        
        #sigma = BoundNormal('sigma', mu=1.,sd=0.25)
        sigma = pm.HalfNormal('sigma',5.)
        
        #sigma = 1.0
        X_obs = pm.Normal('X_obs', mu=mu_x, sd=sigma, observed=X)
        #X_obs = pm.Normal('X_obs', mu=mu_x, sd=1, observed=X)
        
        mp = pm.find_MAP()
        print(mp)
        # Inference step...
        #step = pm.Metropolis()
        #step = pm.NUTS()
        step = None
        start = None
        trace = pm.sample(500, tune=1000, start = start, step=step, cores=2,
                         )#nuts_kwargs=dict(target_accept=0.95, max_treedepth=16, k=0.5))
    
    
    # Return the trace and the parameter stats
    return trace,  my_model, omega, dtime, h_lsq

In [20]:
# tidecons = ['M2','S2','N2','K2','K1','O1','P1','Q1','MA2','MB2']
tidecons = ['M2','S2',] # Non-station harmonics
# tidecons = ['M2','S2',]
tidecons_2 = ['N2','K1','O1'] # Stationary harmonics

frq,names = getTideFreq(tidecons)
frq2,names = getTideFreq(tidecons_2)


# Add on long-term modulations
tdaysec = 86400
fA = 2*np.pi/(365*tdaysec)
# fSA = 2*np.pi/(182*tdaysec)
# fTA = 2*np.pi/(120*tdaysec)
# f90 = 2*np.pi/(90*tdaysec)

# longperiods = [0, fA, fSA, fTA]
#longperiods = [0, 2*np.pi/(365*tdaysec), 2*np.pi/(182*tdaysec), 2*np.pi/(120*tdaysec)]
# longperiods = [0, 2*np.pi/(182*tdaysec),]
#longperiods = [0,  2*np.pi/(120*tdaysec), 2*np.pi/(30*tdaysec)]
# longperiods = [-fTA, -fSA, -fA, 0, fA, fSA, fTA]
# longperiods = [0, fA, fSA]
longperiods = [-3*fA, -2*fA, -1*fA, 0, 1*fA, 2*fA, 3*fA]

# longperiods = [-4*fA, -3*fA, -2*fA, 0, 2*fA, 3*fA, 4*fA]

sitename = 'M2S2nonstat_N2K1O1'
#sitename = 'M2S2_lowfreq_90d'
# outputh5 = '../inputs/a0_samples_harmonicfit_M2S2lowfreq_ATASA_12month.h5'
outputh5 = '../inputs/a0_samples_harmonicfit_{}_12month.h5'.format(sitename)
#outputh5 = '../inputs/a0_samples_harmonicfit_M2S2K1O1lowfreq_12month.h5'

frq_lt = []
for ff in frq:
    for ll in longperiods:
        frq_lt.append(ff+ll)
        
# Append the stationary harmonics
for ff in frq2:
    frq_lt.append(ff)

# outputh5 = '../inputs/a0_samples_harmonicfit_12month.h5'
# tidecons = ['M2','S2','N2','K2','K1','O1','P1','Q1']
# # tidecons = ['M2','S2','N2','K2','K1','O1','P1','Q1','MA2','MB2']

# frq_lt,names = getTideFreq(tidecons)

basetime = datetime(2016,1,1)

# X_sd = A_n_1h.y.std()
# X_mu = A_n_1h.y.mean()

X_sd = 1
X_mu = 0

idx = ~np.isnan(A_n_1h.values)
X = A_n_1h.values[idx] - X_mu
X /= X_sd
trace, my_model, omega, dtime, h_lsq = harmonic_fit_mcmc(A_n_1h.time.values[idx], X, frq_lt, basetime=basetime)
pm.summary(trace)

  b = np.linalg.lstsq(A,y)


{'sigma': 1.0, 'beta_amp_0': 2.2949812984546543, 'beta_phs_0': -3.0183229917531835, 'beta_amp_1': 1.227340421594148, 'beta_phs_1': 1.9807649381740402, 'beta_amp_2': 0.6965528421832167, 'beta_phs_2': -1.866996005845608}
Number of Parametrs: 35
 [12.08919055778369, 12.10640476410473, 12.123618970425769, 12.14083317674681, 12.15804738306785, 12.175261589388889, 12.192475795709928, 12.514727995396052, 12.531942201717092, 12.549156408038133, 12.566370614359172, 12.583584820680212, 12.600799027001253, 12.618013233322293, 11.912806035637455, 6.300388082120014, 5.8404450946267925]


INFO (theano.gof.compilelock): Refreshing lock /home/suntans/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.6.6-64/lock_dir/lock
logp = -27,385, ||grad|| = 0.30745: 100%|██████████| 33/33 [00:00<00:00, 59.08it/s]  
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...


{'beta_mean': array(-0.00532361), 'beta_0_re': array(-1.44451165), 'beta_0_im': array(2.01719065), 'beta_1_re': array(2.55166599), 'beta_1_im': array(-0.44926914), 'beta_2_re': array(1.15009132), 'beta_2_im': array(-1.21075615), 'beta_3_re': array(2.55478556), 'beta_3_im': array(10.91256993), 'beta_4_re': array(1.9215533), 'beta_4_im': array(0.15701817), 'beta_5_re': array(2.63201369), 'beta_5_im': array(-0.54161948), 'beta_6_re': array(-0.295499), 'beta_6_im': array(-3.49280095), 'beta_7_re': array(-0.00652395), 'beta_7_im': array(1.19156281), 'beta_8_re': array(0.0654346), 'beta_8_im': array(-1.07874519), 'beta_9_re': array(0.73836634), 'beta_9_im': array(-0.45155423), 'beta_10_re': array(4.95394839), 'beta_10_im': array(1.67162058), 'beta_11_re': array(0.79316849), 'beta_11_im': array(1.41045142), 'beta_12_re': array(-0.39358235), 'beta_12_im': array(-2.24297043), 'beta_13_re': array(-1.31936687), 'beta_13_im': array(-0.88998693), 'beta_14_re': array(-2.03893768), 'beta_14_im': arra

Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sigma, beta_16_im, beta_16_re, beta_15_im, beta_15_re, beta_14_im, beta_14_re, beta_13_im, beta_13_re, beta_12_im, beta_12_re, beta_11_im, beta_11_re, beta_10_im, beta_10_re, beta_9_im, beta_9_re, beta_8_im, beta_8_re, beta_7_im, beta_7_re, beta_6_im, beta_6_re, beta_5_im, beta_5_re, beta_4_im, beta_4_re, beta_3_im, beta_3_re, beta_2_im, beta_2_re, beta_1_im, beta_1_re, beta_0_im, beta_0_re, beta_mean]
Sampling 2 chains: 100%|██████████| 3000/3000 [05:11<00:00,  9.62draws/s]


Unnamed: 0,mean,sd,mc_error,hpd_2.5,hpd_97.5,n_eff,Rhat
beta_mean,-0.004585,0.055697,0.001109,-0.123758,0.096532,2889.212743,0.999361
beta_0_re,-1.445979,0.077093,0.00152,-1.59486,-1.300502,3111.000292,0.999336
beta_0_im,2.017261,0.07648,0.00163,1.849623,2.15605,2440.620697,1.000209
beta_1_re,2.553125,0.079035,0.001626,2.403972,2.709869,2500.692482,0.999052
beta_1_im,-0.450997,0.078813,0.001535,-0.608494,-0.304893,2365.78173,1.001072
beta_2_re,1.150472,0.077624,0.001698,1.005356,1.295216,2789.268887,0.999
beta_2_im,-1.213598,0.073366,0.001664,-1.353019,-1.054713,2755.386281,0.999003
beta_3_re,2.553258,0.075472,0.001785,2.411491,2.703306,1900.090072,0.999042
beta_3_im,10.913807,0.080299,0.001774,10.77069,11.073539,2170.794568,1.000524
beta_4_re,1.920884,0.076675,0.00147,1.783174,2.074555,2983.268933,0.999069


In [33]:
# Fixed prediction time
rhotime = pd.date_range('2016-05-01','2017-05-02',freq='D').values
rhotime.shape

(367,)

In [22]:
#tnew = A_n_1h.time.values[idx]
tnew = rhotime
print(basetime)
tsecnew = SecondsSince(tnew,basetime=basetime)
tdaynew = tsecnew/86400.
nt = tnew.shape[0]

# Extract the amplitude envelope from the trace
nsamples = len(trace)
a0_pred = np.zeros((nsamples,nt))
for nn in tqdm(range(nsamples)):
    mytrace = trace[nn]
    betas =[mytrace['beta_mean']]
    for ii in range(len(omega)):
        betas.append(mytrace['beta_{}_re'.format(ii)])
        betas.append(mytrace['beta_{}_im'.format(ii)])

    env = sine_model_envelope(betas, omega, tdaynew)
    # Add the noise on
    env += np.random.normal(scale=mytrace['sigma']*0.5,size=(nt,))
    a0_pred[nn,:] = env

# plt.figure()    
# plt.plot(tnew, np.median(a0_pred,axis=0),'b--')
# plt.fill_between(tnew, np.percentile(a0_pred,0.5,axis=0),\
#                  np.percentile(a0_pred,99.5,axis=0),color='b',alpha=0.5)
# plt.fill_between(tnew, np.min(a0_pred,axis=0),\
#                  np.max(a0_pred,axis=0),color='b',alpha=0.3)



 17%|█▋        | 83/500 [00:00<00:00, 828.09it/s]

2016-01-01 00:00:00


100%|██████████| 500/500 [00:00<00:00, 853.21it/s]


In [23]:
plt.figure(figsize=(8,6))

plt.plot(tnew, np.median(a0_pred,axis=0),'b--')
plt.fill_between(tnew, np.percentile(a0_pred,2.5,axis=0),\
                 np.percentile(a0_pred,97.5,axis=0),color='b',alpha=0.5)

A_n_1h.plot(color='r',ls='--',lw=0.5)
# plt.fill_between(tnew, np.min(a0_pred,axis=0),\
#                  np.max(a0_pred,axis=0),color='b',alpha=0.3)
plt.grid(b=True)

<IPython.core.display.Javascript object>

In [24]:
f = h5py.File(outputh5,'w')
g = f.create_group('data')
g.create_dataset('a0-all-times-samples',data=a0_pred.T)
g.create_dataset('time',data=tdaynew)
g.create_dataset('dtime64',data=tnew.view(int))
f.close()
print(outputh5)

../inputs/a0_samples_harmonicfit_M2S2nonstat_N2K1O1_12month.h5


# Plot the power spectrum with the individual freqencies plus posterior distributions

In [25]:
def power_spectra_w(tsec, u_r, window=np.hanning, K=3, power=2., axis=-1):
    """
    Calculates the power spectral density from a real valued quanity
    
    """
    
    M = tsec.shape[0]
    dt = tsec[1]-tsec[0]
    M_2 = int(np.floor(M/2))
      
    
    h_tk = window(M)
    # Weight the time-series and perform the fft
    u_r_t = u_r[...,:]*h_tk
    S_k = np.fft.fft(u_r_t, axis=axis)
    S = dt *np.abs(S_k)**power
    #S = np.mean(S_k,axis=-2)
        
    omega = np.fft.fftfreq(int(M),d=dt/(2*np.pi))
    
    #domega = 2*np.pi/(M*dt)
    domega = 1/(M*dt)
    
    # Extract the positive and negative frequencies
    omega_ccw = omega[0:M_2]
    #omega_cw = omega[M_2::] # negative frequencies
    S_ccw = S[...,0:M_2]
    #S_cw = S[...,M_2::]

    return omega_ccw,S_ccw,domega

In [26]:
# Interpolate onto a regular time grid
#dt = 3600
dt = 60
timeinterp = pd.date_range(A_n_1h.time.values[0],A_n_1h.time.values[-1],freq='{}s'.format(dt))
A_n_1h_i = A_n.to_xray().interp({'time':timeinterp})
#A_n_1h_i = A_n_1h.interp({'time':timeinterp})


tsec = timeinterp.view(int)*1e-9

f, S, df = power_spectra(tsec, A_n_1h_i.values, K=1 )
#f, S3, df = power_spectra(tsec, A_n_1h_i.values, K=3)

dt=60
fcpd = f*86400/(2*np.pi)

plt.figure()
plt.semilogy(fcpd,np.sqrt(S/dt)/dt)
#plt.xlim(0.5,4.5)
plt.xlim(1.8,2.2)

#plt.ylim(1e3,1e9)
plt.ylabel('Amplitude Spectrum [m$^2$ s$^{-1}$]')
plt.xlabel('Frequency [cpd]')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Frequency [cpd]')

In [31]:
# Calculate the power spectrum for the inferred harmonic parameters
psd = []
amp = []
for ii in range(len(omega)):
    A = trace['beta_{}_re'.format(ii)] + 1j*trace['beta_{}_im'.format(ii)]
    psd.append(np.abs( A*np.conj(A))*86400*dt)
    amp.append(np.abs(A))
    
psd = np.array(psd)
amp = np.array(amp)
psd_mu = np.median(psd,axis=1)
psd_low = np.percentile(psd,2.5,axis=1)
psd_high = np.percentile(psd,97.5,axis=1)

amp_mu = np.median(amp,axis=1)
amp_low = np.percentile(amp,2.5,axis=1)
amp_high = np.percentile(amp,97.5,axis=1)



omega_cpd = np.array([ff/(2*np.pi) for ff in omega])
plt.figure()
plt.semilogy(fcpd,S,'0.5',lw=0.5)
#plt.semilogy(fcpd,S3,'k',lw=0.74)

plt.errorbar(omega_cpd, psd_mu, yerr=[psd_low, psd_high], c='b', fmt='o')
plt.xlim(0.85,2.1)
#plt.xlim(1.85,2.05)
plt.ylim(1e3,2e9)
plt.ylabel('Amplitude Spectrum [m$^2$ s$^{-1}$]')
plt.xlabel('Frequency [cpd]')

plt.tight_layout()

plt.savefig('../FIGURES/a0_spectra_{}.png'.format(sitename),dpi=150)
plt.savefig('../FIGURES/a0_spectra_{}.pdf'.format(sitename),dpi=150)


plt.figure()
plt.semilogy(fcpd,S,'0.5',lw=0.5)
#plt.semilogy(fcpd,S3,'k',lw=0.74)

plt.errorbar(omega_cpd, psd_mu, yerr=[psd_low, psd_high], c='b', fmt='o')
# plt.xlim(0.85,2.1)
plt.xlim(1.85,2.05)
plt.ylim(1e3,2e9)
plt.ylabel('Amplitude Spectrum [m$^2$ s$^{-1}$]')
plt.xlabel('Frequency [cpd]')

plt.tight_layout()

plt.savefig('../FIGURES/a0_spectra_SD_{}.png'.format(sitename),dpi=150)
plt.savefig('../FIGURES/a0_spectra_SD_{}.pdf'.format(sitename),dpi=150)

# plt.figure()
# plt.plot(fcpd,2*np.sqrt(S/dt)/(dt*2*np.pi),'0.5',lw=0.5)
# #plt.plot(fcpd,2*np.sqrt(S3/dt)/(dt*2*np.pi),'k',lw=0.74)

# plt.errorbar(omega_cpd, amp_mu, yerr=[amp_low, amp_high], c='b', fmt='o')
# # plt.xlim(0.85,2.1)
# plt.xlim(1.85,2.05)
# plt.ylim(1e-2,25)
# plt.ylabel('Amplitude [m]')
# plt.xlabel('Frequency [cpd]')

# plt.tight_layout()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Short fit as a demonstration for SWOT proposal

In [14]:
tidecons = ['M2','S2','N2','K2','K1',]



frq,names = getTideFreq(tidecons)

basetime = datetime(2016,1,1)


A_n_sel = A_n_1h.sel(time=slice('2017-02-01','2017-05-01'))
idx = ~np.isnan(A_n_sel.values)
X = A_n_sel.values[idx] 
trace, my_model, omega, dtime, h_lsq = harmonic_fit_mcmc(A_n_sel.time.values[idx], X, frq, basetime=basetime)
pm.summary(trace)

  b = np.linalg.lstsq(A,y)


{'sigma': 1.0, 'beta_amp_0': 12.061028110539207, 'beta_phs_0': -1.4699380139811349, 'beta_amp_1': 8.068883702592315, 'beta_phs_1': 0.41991900274006266, 'beta_amp_2': 2.2370887629520566, 'beta_phs_2': 2.837108660793149, 'beta_amp_3': 0.5752917510654437, 'beta_phs_3': 3.0949123931286606, 'beta_amp_4': 1.0750971102152895, 'beta_phs_4': 2.228228109047282}
Number of Parametrs: 11
 [12.14083317674681, 12.566370614359172, 11.912806035637455, 12.600776206127932, 6.300388082120014]


logp = -6,459.7, ||grad|| = 1.8953: 100%|██████████| 35/35 [00:00<00:00, 994.44it/s]   
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...


{'beta_mean': array(0.01567946), 'beta_0_re': array(3.89700872), 'beta_0_im': array(11.40480765), 'beta_1_re': array(7.35956404), 'beta_1_im': array(3.28745778), 'beta_2_re': array(-2.22805008), 'beta_2_im': array(0.17455056), 'beta_3_re': array(-0.5792043), 'beta_3_im': array(0.02712078), 'beta_4_re': array(-0.65663523), 'beta_4_im': array(0.84928992), 'sigma_log__': array(1.556619), 'sigma': array(4.74275884)}


Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sigma, beta_4_im, beta_4_re, beta_3_im, beta_3_re, beta_2_im, beta_2_re, beta_1_im, beta_1_re, beta_0_im, beta_0_re, beta_mean]
Sampling 2 chains: 100%|██████████| 3000/3000 [00:11<00:00, 261.39draws/s]


Unnamed: 0,mean,sd,mc_error,hpd_2.5,hpd_97.5,n_eff,Rhat
beta_mean,0.016546,0.105503,0.002408,-0.174492,0.227444,1803.099734,0.999252
beta_0_re,3.896638,0.139293,0.00337,3.629264,4.16744,1535.772229,0.999527
beta_0_im,11.405905,0.150556,0.003204,11.119333,11.688922,2427.718193,0.999
beta_1_re,7.358901,0.191569,0.004581,7.01498,7.760096,1968.525895,0.99912
beta_1_im,3.272891,0.191556,0.005353,2.875702,3.62082,1189.54649,1.001547
beta_2_re,-2.228943,0.142285,0.003289,-2.500589,-1.951261,2021.60953,0.999264
beta_2_im,0.175072,0.139701,0.003659,-0.092391,0.431233,1507.151246,1.00014
beta_3_re,-0.579875,0.195561,0.004609,-0.956113,-0.20208,1636.223118,0.9992
beta_3_im,0.021804,0.194242,0.004866,-0.339933,0.411961,1528.923304,0.999348
beta_4_re,-0.658878,0.145359,0.003703,-0.96477,-0.376745,1904.200508,0.999058


In [11]:
tnew = A_n_sel.time.values[idx]
# Use the built-in function for prediction
nsamples = 500
ppc = pm.sample_posterior_predictive(trace, model=my_model, samples=nsamples)

a0_pred = ppc['X_obs']

100%|██████████| 500/500 [00:00<00:00, 533.92it/s]


In [32]:
plt.figure(figsize=(9,5))

#(A_n_sel/1000).plot(color='r',ls='-',lw=1)
plt.plot(dtime, A_n_sel.values/1000,'r')


plt.plot(dtime, np.median(a0_pred,axis=0)/1000,'k--')
#plt.plot(tnew, h_lsq,'k--')

plt.fill_between(dtime, np.percentile(a0_pred,5,axis=0)/1000,\
                 np.percentile(a0_pred,95,axis=0)/1000,color='b',alpha=0.5)

#plt.xlim(datetime(2017,3,25),datetime(2017,4,14))
# plt.fill_between(tnew, np.min(a0_pred,axis=0),\
#                  np.max(a0_pred,axis=0),color='b',alpha=0.3)
plt.xlabel('Time [d]')
plt.xlim(450,470)
plt.grid(b=True)
plt.ylabel('$h_{iw}$ [m]')
plt.legend(('Observed','Least-squares fit','90 % credible inteval'))
#plt.tight_layout()
plt.savefig('../FIGURES/Bayesian_SSH_example.png')

<IPython.core.display.Javascript object>

NameError: name 'A_n_sel' is not defined

# Build a hierarchical harmonic model

 - First level is the short term (15 d) harmonic fits (M2, S2, K1 ,O1)
 - Second level is the long-term variation of those harmonics

In [73]:
def harmonic_beta(aa, Aa, Ba, omega, tdays):
    nomega = len(omega)
    nt = tdays.shape[0]
    amp = tt.ones_like(tdays) * aa[...,None]
    for ii in range(nomega):
        amp += Aa[ii]*pm.math.cos(omega[ii]*tdays) + Ba[ii]*pm.math.sin(omega[ii]*tdays)
    
    return amp

def harmonic_beta_time(aa, Aa, Ba, omega, tdays, timeidx):
    nomega = len(omega)
    amp = tt.ones_like(tdays[timeidx]) * aa
    for ii in range(nomega):
        amp += Aa[timeidx,ii]*pm.math.cos(omega[ii]*tdays[timeidx]) +\
            Ba[timeidx,ii]*pm.math.sin(omega[ii]*tdays[timeidx])
    
    return amp

def harmonic_fit_mcmc_bhm(time,timeidx,timelow, X, frq, frqlow, basetime=None):

    """
    Harmonic fitting using Bayesian inference
    """
    tday = 86400.
    # Convert the time to days
    dtime = SecondsSince(time, basetime=basetime )
    dtime_low = SecondsSince(timelow, basetime=basetime )

    
    dtime /= tday
    dtime_low /= tday
    
    dtime = shared(dtime)
    dtime_low = shared(dtime_low)
    timeidx = shared(timeidx)
    
    # Convert the frequencies to radians / day
    omega = [ff*tday for ff in frq]
    omega_low = [ff*tday for ff in frqlow]

    #omega = frq
    
    # Number of parameters
    nomega = len(omega) 
    nomega_low = len(omega_low)
    nt = timelow.shape[0]
    
    print(nomega, nomega_low, nt)


    with pm.Model() as rho_model:

        # Harmonic decomposition on the real and imaginary components
        aa_l_i = pm.Normal('aa_l_i', mu=0, sd=5, shape=nomega_low)
        Aa_l_i = pm.Normal('Aa_l_i', mu=0, sd=5,shape=nomega_low)
        Ba_l_i = pm.Normal('Ba_l_i', mu=0, sd=5,shape=nomega_low)
        
        aa_l_r = pm.Normal('aa_l_r', mu=0, sd=5, shape=nomega_low)
        Aa_l_r = pm.Normal('Aa_l_r', mu=0, sd=5,shape=nomega_low)
        Ba_l_r = pm.Normal('Ba_l_r', mu=0, sd=5,shape=nomega_low)
        
        # This gives the amplitude of the low-frequency amplitudes
        sigma_beta = pm.HalfNormal('sigma_beta', sd=1.0, shape=nt)
        sigma_beta_a = pm.HalfNormal('sigma_beta_a', sd=1.0, shape=(nomega,nt))
        sigma_beta_b = pm.HalfNormal('sigma_beta_b', sd=1.0, shape=(nomega,nt))
        
        
        aa = pm.Normal('aa', mu=0, sd=sigma_beta, shape=nt)
        Aa = pm.Normal('Aa',mu=harmonic_beta(aa_l_i, Aa_l_i, Ba_l_i, omega_low, dtime_low),\
                    sd=sigma_beta_a, shape=(nomega,nt))
        Ba = pm.Normal('Ba',mu=harmonic_beta(aa_l_r, Aa_l_r, Ba_l_r, omega_low, dtime_low),\
                    sd=sigma_beta_b, shape=(nomega,nt))
        
        mu_x = harmonic_beta_time(aa, Aa, Ba, omega, dtime, timeidx)

        sigma_curve = pm.HalfNormal('sigma_curve', sd=2.0 )

        y_ = pm.Normal('y', mu=mu_x, sd=sigma_curve, observed=X)
        
        ###
        # Inference step

        trace = pm.sample(500, tune=1500, step=pm.NUTS(), cores=ncores, chains=nchains)

    return trace, rho_model

In [142]:
Xsel = A_n_1h.sel(time=slice('2016-05-02','2017-05-02'))
#Xsel.loc['2016-09-15':'2016-10-15']=np.nan
idx = ~np.isnan(Xsel.values)



X = Xsel.values[idx]
tnew = Xsel.time.values[idx]

# Break the time series up into chunks
trange = pd.date_range(tnew[0],tnew[-1],freq='15D').values
tmid = trange[0:-1] + 0.5*(trange[1:]-trange[0:-1])

tindex = np.zeros(tnew.shape, np.int)
ii=0
for t1,t2 in zip(trange[0:-1], trange[1:]):
    idx = (tnew>=t1) & (tnew<=t2)
    #print(t1,t2,sum(idx))

    tindex[idx] = ii
    ii+=1


In [143]:
def harmonic_pred(aa, Aa, Ba, omega, tdays):
    nomega = len(omega)
    nt = tdays.shape[0]
    amp = np.ones_like(tdays) * aa
    for ii in range(nomega):
        amp += Aa[...,ii]*np.cos(omega[ii]*tdays) + Ba[...,ii]*np.sin(omega[ii]*tdays)
    
    return amp


tidecons = ['M2','S2','N2','K1','O1',]
frq,names = getTideFreq(tidecons)
#frqlow = [2*np.pi/(365*tdaysec), 2*np.pi/(180*tdaysec), 2*np.pi/(120*tdaysec)]
frqlow = [2*np.pi/(365*tdaysec), 2*np.pi/(180*tdaysec)]


# Go through and do the fitting using least-squares
nfrq = len(frq)
nfrqlow = len(frqlow)

tsec = SecondsSince(tnew)
tseclow = SecondsSince(tmid)

nt = tsec.shape[0]
ntlow = tseclow.shape[0]

# 1) Fit the tide harmonics to each 15 d block
aa = np.zeros((ntlow,))
Aa = np.zeros((ntlow, nfrq))
Ba = np.zeros((ntlow, nfrq))

for ii in range(ntlow):
    idx = tindex == ii
    Y = harmonic_fit_array(X[idx], tsec[idx], frq, axis=0)
    aa[ii] = Y[0]
    Aa[ii,:] = Y[1::2]
    Ba[ii,:] = Y[2::2]


# 2) Fit the low-frequency harmonics to these harmonics
aa_l_r = np.zeros((nfrq,))
Aa_l_r = np.zeros((nfrqlow,nfrq))
Ba_l_r = np.zeros((nfrqlow,nfrq))

aa_l_i = np.zeros((nfrq,))
Aa_l_i = np.zeros((nfrqlow,nfrq))
Ba_l_i = np.zeros((nfrqlow,nfrq))

#Y = harmonic_fit_array(aa, tseclow, frqlow, axis=0)
#aa_l_r[:] = Y[1::2]
#aa_l_i[:] = Y[2::2]

Y = harmonic_fit_array(Aa, tseclow, frqlow, axis=0)
aa_l_r[:] = Y[0,:]
Aa_l_r[:] = Y[1::2,:]
Aa_l_i[:] = Y[2::2,:]

Y = harmonic_fit_array(Ba, tseclow, frqlow, axis=0)
aa_l_i[:] = Y[0,:]
Ba_l_r[:] = Y[1::2,:]
Ba_l_i[:] = Y[2::2,:]

## Prediction
# 3) Build the tidal harmonics as a time-series
Aa_pred = np.zeros((nt,nfrq))
Ba_pred = np.zeros((nt,nfrq))

for ii in range(nfrq):
    Aa_pred[:,ii] = harmonic_pred(aa_l_r[ii], Aa_l_r[:,ii], Ba_l_r[:,ii], frqlow, tsec)
    Ba_pred[:,ii] = harmonic_pred(aa_l_i[ii], Aa_l_i[:,ii], Ba_l_i[:,ii], frqlow, tsec)


# 4) Predict the raw amplitude
a0 = harmonic_pred(0, Aa_pred, Ba_pred, frq, tsec)

plt.figure()
plt.plot(tnew,a0,lw=0.2)
Xsel.plot(lw=0.2)
#plt.plot(Ba_pred[:,1])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f905bc6c668>]

In [55]:
tdaysec = 86400
frqlow= [ 2*np.pi/(365*tdaysec), 2*np.pi/(182*tdaysec), 2*np.pi/(120*tdaysec)]

tidecons = ['M2','S2','N2','K1','O1',]
frq,names = getTideFreq(tidecons)

trace,  my_model = harmonic_fit_mcmc_bhm(tnew,tindex,tmid, X, frq, frqlow, basetime=basetime)
pm.summary(trace)

5 3 12


ValueError: operands could not be broadcast together with shapes (5,12) (3,12) 