# Test spydell method

In [23]:
import os
from glob import glob

import numpy as np
import pandas as pd
import xarray as xr

%matplotlib inline
import matplotlib.pyplot as plt
import hvplot.pandas  # noqa
import holoviews as hv

#
import pynsitu as pyn
from pynsitu.maps import crs

from lib import raw_dir, root_dir, images_dir, KEYS, color, columns

In [24]:
key = KEYS[0]
df = pd.read_csv(glob(os.path.join(raw_dir, 'L1_' + key+'*'))[0], parse_dates = ['time'], dtype={"id":str}).set_index('id')
df_ = df.loc['0-4388553'].sort_values('time').rename(columns = {'velocity_east':'u', 'velocity_north':'v'})

In [25]:
df_

Unnamed: 0_level_0,time,GpsQuality,lat,lon,BatteryStatus,GPS Confidence,GPS Fail Counter,deployment,platform,x,y,dt,u,v,velocity,acceleration_east,acceleration_north,acceleration
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0-4388553,2023-04-23 21:55:10,3,40.868990,4.196691,GOOD,0,0,d0,carthe_cnr_00,-304389.826561,306083.175744,,0.283761,0.267734,0.390130,,,
0-4388553,2023-04-23 22:04:58,3,40.870127,4.198408,GOOD,0,0,d0,carthe_cnr_00,-304240.032180,306203.639071,588.0,0.227157,0.187953,0.294833,-0.000062,-0.000104,0.000121
0-4388553,2023-04-23 22:14:59,3,40.870996,4.199889,GOOD,0,0,d0,carthe_cnr_00,-304111.332710,306295.145480,601.0,0.210857,0.144870,0.255828,-0.000020,-0.000064,0.000067
0-4388553,2023-04-23 22:24:59,3,40.871694,4.201412,GOOD,0,0,d0,carthe_cnr_00,-303979.865485,306367.531446,600.0,0.203439,0.111238,0.231865,-0.000016,-0.000039,0.000042
0-4388553,2023-04-23 22:34:59,3,40.872198,4.202785,GOOD,0,0,d0,carthe_cnr_00,-303861.911056,306418.886055,600.0,0.191425,0.098282,0.215181,-0.000025,-0.000002,0.000025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0-4388553,2023-06-16 01:28:05,3,37.328903,9.643378,LOW,0,0,d0,carthe_cnr_00,162743.045786,-91373.366247,599.0,0.232862,0.139794,0.271601,-0.000023,-0.000005,0.000024
0-4388553,2023-06-16 01:38:07,3,37.329687,9.644902,LOW,0,0,d0,carthe_cnr_00,162876.388403,-91283.702572,602.0,0.219724,0.131849,0.256248,-0.000011,-0.000022,0.000025
0-4388553,2023-06-16 01:48:17,3,37.330341,9.646382,LOW,0,0,d0,carthe_cnr_00,163006.111934,-91208.540258,610.0,0.220211,0.112827,0.247432,-0.000001,-0.000033,0.000033
0-4388553,2023-06-16 01:58:08,3,37.330910,9.647884,LOW,0,0,d0,carthe_cnr_00,163137.967533,-91142.770224,591.0,0.218052,0.092480,0.236853,0.000012,-0.000016,0.000020


In [26]:

import warnings
def spydell_smooth(df,
                       t_target,
                       acc_cut = 1e-3,
                       nb_pt_mean=5,
                       import_columns=['id'],
                       geo=True,
                       acc=True, 
                      ):
    """ 
    Smooth and interpolated a trajectory with the method described in Spydell et al. 2021.
    Parameters:
    -----------
            df :  dataframe with raw trajectory, must contain 'time', 'x', 'y', 'velocity_east', 'velocity_north'
            t_target: `pandas.core.indexes.datetimes.DatetimeIndex` or str
                Output time series, as typically given by pd.date_range or the delta time of the output time series as str
                In this case, t_target is then recomputed taking start-end the start end of the input trajectory and the given delta time 
            nb_pt_mean : odd int,
                number of points of wich is applied the box mean
            acc_cut : float, 
                acceleration spike cut value
            import_columns : list of str,
                list of df constant columns we want to import (ex: id, platform)
            geo: boolean,
                optional if geo obj with projection
            acc: boolean,
                optional compute acceleration
    Return : interpolated dataframe with x, y, u, v, ax-ay computed from xy, au-av computed from u-v, +norms, id, platform with index time
    """
    
    #index = time
    if df.index.name!='time':
        if df.index.name == None:
            df = df.set_index('time')
        else : 
            df =df.reset_index().set_index('time')
    print(df.index.name)  
    
    # assert x, y in dataframe
    if 'x' not in df or 'y' not in df :
        assert False, "positions must be labelled as 'x' and 'y'"
    if 'u' not in df or 'v' not in df :
        assert False, "velocities must be labelled as 'u' and 'v'"
        
    # store projection to align with dataframes produced
    if geo :
        proj_ref = df.geo.projection_reference
    
    #t_target
    if isinstance(t_target, str) :
        t_target = pd.date_range(df.index.min(), df.index.max(), freq = t_target)
    
    #xarray for easy interpolation
    ds = df.to_xarray()[['x', 'y', 'u', 'v']]
    
    # fill little gap 
    
    # 3) linearly interpolate velocities
    ds = ds.interp(time=t_target, method='linear')
    
    reg_dt =t_target[1] - t_target[0] 
    print(reg_dt)
    # 4) integrate velocities and find constant
    #ms_x, ms_y = (df.x**2).mean(), (df.y**2).mean()
    x_cum = ds.u.cumsum('time')*reg_dt/pd.Timedelta('1s')
    y_cum = ds.v.cumsum('time')*reg_dt/pd.Timedelta('1s')
    
    #def msx_difference(x_0) :
        #return abs(ms_x-((x_0+x_cum)**2).mean())
    #def msy_difference(y_0) :
        #return abs(ms_y-((y_0+y_cum)**2).mean())
    from scipy.optimize import minimize
    def msx_difference(x_0) :
        return ((ds.x - x_0-x_cum)**2).mean()
    def msy_difference(y_0) :
        return ((ds.y - y_0-y_cum)**2).mean()
    from scipy.optimize import minimize
    x_0 = minimize(msx_difference, ds.x[0]).x
    y_0 = minimize(msy_difference, ds.y[0]).x

    ds['x'] = x_0+x_cum
    ds['y'] = y_0+y_cum

    # 5) remove spike and interpolate
    ds['ax'] = ds.u.differentiate('time', datetime_unit='s')
    ds['ay'] = ds.v.differentiate('time', datetime_unit='s')
    x = ds.where(ds.ax<acc_cut).x
    y = ds.where(ds.ay<acc_cut).y
    print(f"nb of spike removed { np.isnan(x).sum('time').values} over {ds.dims['time']}")
    ds['x'] = x.interpolate_na('time')
    ds['y'] = y.interpolate_na('time')
    
    
    # 6) Box mean on nb_pt_mean
    if nb_pt_mean%2==0:
        warnings.warn( 'nb_pt_mean should be odd, set to np_pt_window+1')
        nb_pt_mean +=1
    if nb_pt_mean ==0:
        assert False, 'np_pt_window=0'
        
    n = nb_pt_mean//2
    ds0 = 0
    ds1= ds
    for i in np.arange(-n,n+1):

        ds0 += ds1.shift({'time':i})
        ds1 = ds
    ds0 = ds0/nb_pt_mean
    
    # test box mean
    assert ds0.isel(time=n) == ds.isel(time=slice(0, nb_pt_mean)).mean(), 'pb with mean over n points'
    
    ds0 = ds0.drop(['ax', 'ay'])
    
    # Build full dataframe
    df_out = ds0.to_dataframe()
    
    
    #import columns/info ex: id or time
    if import_columns :
        for column in import_columns :
             df_out[column] = df[column][0]  
          
    # update lon/lat
    if geo:
        df_out['lon'] = df.lon.mean()
        df_out['lat'] = df.lat.mean()
        # first reset reference from df
        df_out.geo.set_projection_reference(proj_ref)  # inplace
        df_out.geo.compute_lonlat()  # inplace
        
    # recompute acceleration
    if acc:
        if geo :
            df_out.geo.compute_accelerations(
                from_ = ("xy", "x", "y"),
                names = ("ax", "ay", "axy"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
            )
            df_out.geo.compute_accelerations(
                from_ = ("velocities", "u", "v"),
                names =("au", "av", "auv"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
            ) 
            # should still recompute for non-geo datasets
        else:
            compute_accelerations(
                df_out,
                from_ = ("xy", "x", "y"),
                names = ("ax", "ay", "axy"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
                keep_dt=False
            )
            compute_accelerations(
                df_out,
                from_ = ("velocities", "u", "v"),
                names =("au", "av", "auv"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
                keep_dt=False
            )  
              
    return df_out

In [27]:
dfi = spydell_smooth(df_, t_target = '5min', import_columns =['id', 'platform'], geo=True, acc=True)
dfi

time
0 days 00:05:00
nb of spike removed 0 over 15315


Unnamed: 0_level_0,x,y,u,v,id,platform,lon,lat,ax,ay,axy,au,av,auv
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-04-23 21:55:10,,,,,0-4388553,carthe_cnr_00,,,,,,,,
2023-04-23 22:00:10,,,,,0-4388553,carthe_cnr_00,,,,,,,,
2023-04-23 22:05:10,-304408.803516,174649.871517,0.238978,0.198339,0-4388553,carthe_cnr_00,4.263441,39.671485,,,,,,
2023-04-23 22:10:10,-304341.715066,174700.955437,0.223628,0.170280,0-4388553,carthe_cnr_00,4.264198,39.671968,-0.000034,-0.000077,0.000085,-0.000043,-0.000085,0.000096
2023-04-23 22:15:10,-304277.726393,174745.077646,0.213296,0.147074,0-4388553,carthe_cnr_00,4.264924,39.672387,-0.000020,-0.000055,0.000058,-0.000027,-0.000066,0.000072
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-16 01:45:10,163533.582669,-220103.348030,0.220394,0.117680,0-4388553,carthe_cnr_00,9.628762,36.153835,-0.000001,-0.000028,0.000028,-0.000004,-0.000027,0.000027
2023-06-16 01:50:10,163599.580966,-220070.525256,0.219994,0.109409,0-4388553,carthe_cnr_00,9.629502,36.154119,0.000007,-0.000023,0.000024,0.000003,-0.000025,0.000025
2023-06-16 01:55:10,163666.206698,-220039.761702,0.222086,0.102545,0-4388553,carthe_cnr_00,9.630249,36.154385,,,,,,
2023-06-16 02:00:10,,,,,0-4388553,carthe_cnr_00,,,,,,,,


In [28]:
hvplot = (
    (dfi.x.hvplot(label ='interpolated') 
     * df_.set_index('time').x.hvplot(label = 'raw')
    )
    +(
        dfi.lon.hvplot(label ='interpolated') 
      * df_.set_index('time').lon.hvplot(label = 'raw')
     )
 #* df_spydell_30.x.hvplot(label ='spydell_30min')
)#+(df_spydell_5.u.hvplot(label ='spydell_5min') 
 #* df_.set_index('time').velocity_east.hvplot(label = 'raw')
 #* df_spydell_30.velocity_east.hvplot(label ='spydell_30min')
#)#+(df_spydell_5.acceleration_east.hvplot(label ='spydell_5min') 
 #* df_.set_index('time').acceleration_east.hvplot(label = 'raw')
 #* df_spydell_30.acceleration_east.hvplot(label ='spydell_30min')
#)

layout = hv.Layout(hvplot).cols(1)
layout

In [29]:
hvplot = (dfi.x.hvplot(label ='interpolated') 
 * df_.set_index('time').x.hvplot(label = 'raw')
)+(dfi.u.hvplot(label ='interpolated') 
 * df_.set_index('time').velocity_east.hvplot(label = 'raw')
)+(dfi.ax.hvplot(label ='interpolated') 
 * df_.set_index('time').acceleration_east.hvplot(label = 'raw')
)

layout = hv.Layout(hvplot).cols(1)
layout

AttributeError: 'DataFrame' object has no attribute 'velocity_east'