# Test spydell method

In [24]:
import os
from glob import glob

import numpy as np
import pandas as pd
import xarray as xr

%matplotlib inline
import matplotlib.pyplot as plt
import hvplot.pandas  # noqa
import holoviews as hv

#
import pynsitu as pyn
from pynsitu.maps import crs

from lib import raw_dir, root_dir, images_dir, KEYS, color, columns

In [35]:
key = KEYS[0]
df = pd.read_csv(glob(os.path.join(raw_dir, 'L1_' + key+'*'))[0], parse_dates = ['time'], dtype={"id":str}).set_index('id')
df_ = df.loc['0-4388553'].sort_values('time')

In [26]:
import warnings
def spydell_smooth(df,
                       t_target,
                       acc_cut = 1e-3,
                       nb_pt_mean=5,
                       import_columns=['id'],
                       geo=True,
                       acc=True, 
                      ):
    """ 
    Smooth and interpolated a trajectory with the method described in Spydell et al. 2021.
    Parameters:
    -----------
            df :  dataframe with raw trajectory, must contain 'time', 'x', 'y', 'velocity_east', 'velocity_north'
            t_target: `pandas.core.indexes.datetimes.DatetimeIndex` or str
                Output time series, as typically given by pd.date_range or the delta time of the output time series as str
                In this case, t_target is then recomputed taking start-end the start end of the input trajectory and the given delta time 
            nb_pt_mean : odd int,
                number of points of wich is applied the box mean
            acc_cut : float, 
                acceleration spike cut value
            import_columns : list of str,
                list of df constant columns we want to import (ex: id, platform)
            geo: boolean,
                optional if geo obj with projection
            acc: boolean,
                optional compute acceleration
    Return : interpolated dataframe with x, y, u, v, ax-ay computed from xy, au-av computed from u-v, +norms, id, platform with index time
    """
    
    #index = time
    if df.index.name!='time':
        if df.index.name == None:
            df = df.set_index('time')
        else : 
            df =df.reset_index().set_index('time')
    print(df.index.name)  
    
    # assert x, y in dataframe
    if 'x' not in df or 'y' not in df :
        assert False, "positions must be labelled as 'x' and 'y'"
    if 'velocity_east' not in df or 'velocity_north' not in df :
        assert False, "velocities must be labelled as 'velocity_east' and 'velocity_n'"
        
    # store projection to align with dataframes produced
    if geo :
        proj_ref = df.geo.projection_reference
    
    #t_target
    if isinstance(t_target, str) :
        t_target = pd.date_range(df.index.min(), df.index.max(), freq = t_target)
    
    #xarray for easy interpolation
    ds = df.to_xarray()[['velocity_east', 'velocity_north']]
    
    # 3) linearly interpolate velocities
    ds = ds.interp(time=t_target, method='linear')
    
    reg_dt =t_target[1] - t_target[0] 
    print(reg_dt)
    # 4) integrate velocities and find constant
    ms_x, ms_y = (df.x**2).mean(), (df.y**2).mean()
    x_cum = ds.velocity_east.cumsum('time')*reg_dt/pd.Timedelta('1s')
    y_cum = ds.velocity_north.cumsum('time')*reg_dt/pd.Timedelta('1s')
    
    
    def msx_difference(x_0) :
        return abs(ms_x-((x_0+x_cum)**2).mean())
    def msy_difference(y_0) :
        return abs(ms_y-((y_0+y_cum)**2).mean())
    from scipy.optimize import minimize
    x_0 = minimize(msx_difference, df_.x[0]).x
    y_0 = minimize(msy_difference, df_.y[0]).x

    ds['x'] = x_0+x_cum
    ds['y'] = y_0+y_cum

    # 5) remove spike and interpolate
    ds['ax'] = ds.velocity_east.differentiate('time', datetime_unit='s')
    ds['ay'] = ds.velocity_north.differentiate('time', datetime_unit='s')
    x = ds.where(ds.ax<acc_cut).x
    y = ds.where(ds.ay<acc_cut).y
    print(f"nb of spike removed { np.isnan(x).sum('time').values} over {ds.dims['time']}")
    ds['x'] = x.interpolate_na('time')
    ds['y'] = y.interpolate_na('time')
    
    
    # 6) Box mean on nb_pt_mean
    if nb_pt_mean%2==0:
        warnings.warn( 'nb_pt_mean should be odd, set to np_pt_window+1')
        nb_pt_mean +=1
    if nb_pt_mean ==0:
        assert False, 'np_pt_window=0'
        
    n = nb_pt_mean//2
    ds0 = 0
    ds1= ds
    for i in np.arange(-n,n+1):

        ds0 += ds1.shift({'time':i})
        ds1 = ds
    ds0 = ds0/nb_pt_mean
    
    # test box mean
    assert ds0.isel(time=n) == ds.isel(time=slice(0, nb_pt_mean)).mean(), 'pb with mean over n points'
    
    ds0 = ds0.drop(['ax', 'ay']).rename({'velocity_east':'u', 'velocity_north':'v'})
    
    # Build full dataframe
    df_out = ds0.to_dataframe()
    
    
    #import columns/info ex: id or time
    if import_columns :
        for column in import_columns :
             df_out[column] = df[column][0]  
          
    # update lon/lat
    if geo:
        df_out['lon'] = df.lon.mean()
        df_out['lat'] = df.lat.mean()
        # first reset reference from df
        df_out.geo.set_projection_reference(proj_ref)  # inplace
        df_out.geo.compute_lonlat()  # inplace
        
    # recompute acceleration
    if acc:
        if geo :
            df_out.geo.compute_accelerations(
                from_ = ("xy", "x", "y"),
                names = ("ax", "ay", "axy"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
            )
            df_out.geo.compute_accelerations(
                from_ = ("velocities", "u", "v"),
                names =("au", "av", "auv"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
            ) 
            # should still recompute for non-geo datasets
        else:
            pyn.geo.compute_accelerations(
                df_out,
                from_ = ("xy", "x", "y"),
                names = ("ax", "ay", "axy"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
                keep_dt=False
            )
            pyn.geo.compute_accelerations(
                df_out,
                from_ = ("velocities", "u", "v"),
                names =("au", "av", "auv"),
                centered_velocity=True,
                time='index',
                fill_startend=False,
                inplace=True,
                keep_dt=False
            )  
              
    return df_out

In [38]:
dfi = spydell_smooth(df_.drop(columns = 'x'), t_target = '5min', import_columns =['id', 'platform'], geo=True, acc=True)
dfi

time


AssertionError: position must be labelled as 'x' and 'y'

In [22]:
hvplot = (
    (dfi.x.hvplot(label ='interpolated') 
     * df_.set_index('time').x.hvplot(label = 'raw')
    )
    +(
        dfi.lon.hvplot(label ='interpolated') 
      * df_.set_index('time').lon.hvplot(label = 'raw')
     )
 #* df_spydell_30.x.hvplot(label ='spydell_30min')
)#+(df_spydell_5.u.hvplot(label ='spydell_5min') 
 #* df_.set_index('time').velocity_east.hvplot(label = 'raw')
 #* df_spydell_30.velocity_east.hvplot(label ='spydell_30min')
#)#+(df_spydell_5.acceleration_east.hvplot(label ='spydell_5min') 
 #* df_.set_index('time').acceleration_east.hvplot(label = 'raw')
 #* df_spydell_30.acceleration_east.hvplot(label ='spydell_30min')
#)

layout = hv.Layout(hvplot).cols(1)
layout

In [23]:
hvplot = (dfi.x.hvplot(label ='interpolated') 
 * df_.set_index('time').x.hvplot(label = 'raw')
)+(dfi.u.hvplot(label ='interpolated') 
 * df_.set_index('time').velocity_east.hvplot(label = 'raw')
)+(dfi.ax.hvplot(label ='interpolated') 
 * df_.set_index('time').acceleration_east.hvplot(label = 'raw')
)

layout = hv.Layout(hvplot).cols(1)
layout