# Using EcoFOCIpy to apply a 35hr Lanzcos Filter to 2D data (xarray)

- input:
    - example uses erddap but other versions may use netcdf file
- output is currently only csv files for each depth (exported from a pandas dataframe (so you could just continue analysis)

TODO:
- output to xarray modified netcdf file

In [1]:
import xarray as xa
import pandas as pd
import numpy as np
from erddapy import ERDDAP

import EcoFOCIpy.math.lanzcos as lanzcos 
import EcoFOCIpy.plots.TimeSeriesStickPlot as TimeseriesStickPlot

In [2]:
dataset = 'ADCP_Mooring_16ckp2a_final'

dataset_id = dataset.split('_')[-2]

ecofoci_server_url='http://ecofoci-field.pmel.noaa.gov:8080/erddap'

e = ERDDAP(server=ecofoci_server_url,
           protocol="griddap",)

e.dataset_id = dataset

ds = e.to_xarray()

In [3]:
ds

In [4]:

plotfigs = False
savedata = True
domath = True

for dd in range(ds.depth.size-1,-1,-1):
    udata, vdata = None,None
    depth_curr = ds.isel(depth=dd)
    print(f'depth: {depth_curr}, {dd}')
    ### filter
    try:
        udata = depth_curr.u_curr_comp[0,0,:].to_pandas()
        vdata = depth_curr.v_curr_comp[0,0,:].to_pandas()
    except:
        udata = depth_curr.ucomp_current[:,0,0].to_pandas()
        vdata = depth_curr.vcomp_current[:,0,0].to_pandas()
        
    data = pd.concat([udata, vdata], axis=1)
    data.columns = ['udata','vdata']

    data['udata'][data.udata > 1e34]=np.nan
    data['vdata'][data.vdata > 1e34]=np.nan
    data = data.resample('1H').mean()

    ## missing data fill limited by number of hours to linearly interpolate
    data['tempu'] = data['udata'].interpolate(method='time',limit=6)
    data['tempv'] = data['vdata'].interpolate(method='time',limit=6)

    # get index of missing data that wasn't filled
    # the remaining missing data will continue to be missing after analysis
    # this data and the edges of the timeseries need to have the effects of the filter window accounted for

    missing_index_u = np.isnan(data.tempu)
    missing_index_v = np.isnan(data.tempv)
    data['tempu'] = data['udata'].interpolate(method='time')
    data['tempv'] = data['vdata'].interpolate(method='time')

    #filter data with rudimentary matlab script
    data['ufdata'] = lanzcos.lanzcos(data.tempu.values,1,35)+data['udata'].mean()
    data['vfdata'] = lanzcos.lanzcos(data.tempv.values,1,35)+data['vdata'].mean()
    data['ufdata'][missing_index_u] = np.nan
    data['vfdata'][missing_index_v] = np.nan    

    #extra calculations
    if domath:
        print(dataset_id)
        print(data.describe()[['ufdata','vfdata']])

    #plot
    if plotfigs:
        try:
            p1 = TimeseriesStickPlot.Timeseries1dStickPlot()
            plt1, fig1 = p1.plot(timedata=data.index, 
                                 udata=data.ufdata.values, 
                                 vdata=data.vfdata.values,
                                 rotate=0)
            plt1.xlabel(dataset_id+str(int(ds.depth[dd].values)).zfill(2)+'_f35')
            fig1.savefig(dataset_id+str(int(ds.depth[dd].values)).zfill(2)+'_f35'+'.png',dpi=600)


            #subsample
            datasub=data.resample('D').median()
            p1 = TimeseriesStickPlot.Timeseries1dStickPlot()
            plt1, fig1 = p1.plot(timedata=datasub.index, 
                                 udata=datasub.ufdata.values, 
                                 vdata=datasub.vfdata.values,
                                 linescale=10,
                                 rotate=0)
            plt1.xlabel(dataset_id+str(int(ds.depth[dd].values)).zfill(2)+'_f35_daily')
            fig1.savefig(dataset_id+str(int(ds.depth[dd].values)).zfill(2)+'_f35_daily'+'.png',dpi=600)

            p1 = TimeseriesStickPlot.Timeseries1dStickPlot()
            plt1, fig1 = p1.plot(timedata=data.index, 
                                 udata=data.udata.values, 
                                 vdata=data.vdata.values,
                                 rotate=0)
            plt1.xlabel(dataset_id+str(int(ds.depth[dd].values)).zfill(2)+'_nofilter')
            fig1.savefig(dataset_id+str(int(ds.depth[dd].values)).zfill(2)+'_nofilter'+'.png',dpi=600)
        except:
            pass
    #save data
    if savedata:
        data[['ufdata','vfdata']].to_csv(dataset_id+'_'+str(int(ds.depth[dd].values)).zfill(2)+'_f35'+'.csv')

depth: <xarray.Dataset>
Dimensions:        (time: 7866, latitude: 1, longitude: 1)
Coordinates:
  * time           (time) datetime64[ns] 2016-09-14T23:00:00 ... 2017-08-08T1...
    depth          float64 4.0
  * latitude       (latitude) float32 71.23
  * longitude      (longitude) float64 195.8
Data variables:
    vcomp_current  (time, latitude, longitude) float32 -10.86 -4.105 ... -18.06
    ucomp_current  (time, latitude, longitude) float32 -2.802 3.661 ... 11.66
    w_1204         (time, latitude, longitude) float32 0.5 1.6 0.2 ... 0.6 0.1
    Werr_1201      (time, latitude, longitude) float32 -0.2 3.3 -3.2 ... 0.3 0.8
Attributes: (12/37)
    cdm_data_type:                 Grid
    COMPOSITE:                     1
    Conventions:                   COARDS, CF-1.6, ACDD-1.3
    CREATION_DATE:                 June 05, 2020 16:18 UTC
    creation_date:                 April 16, 2020 21:06 UTC
    DATA_CMNT:                     1774.VEL
    ...                            ...
    time_c

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ufdata'][missing_index_u] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['vfdata'][missing_index_v] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ufdata'][missing_index_u] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['vfdata'][missing_index_v] = np.nan


depth: <xarray.Dataset>
Dimensions:        (time: 7866, latitude: 1, longitude: 1)
Coordinates:
  * time           (time) datetime64[ns] 2016-09-14T23:00:00 ... 2017-08-08T1...
    depth          float64 8.0
  * latitude       (latitude) float32 71.23
  * longitude      (longitude) float64 195.8
Data variables:
    vcomp_current  (time, latitude, longitude) float32 -2.153 0.1573 ... -3.721
    ucomp_current  (time, latitude, longitude) float32 -2.474 -0.2743 ... 9.349
    w_1204         (time, latitude, longitude) float32 -0.2 -0.1 ... -0.1 -0.3
    Werr_1201      (time, latitude, longitude) float32 0.4 1.0 -1.8 ... 0.6 0.1
Attributes: (12/37)
    cdm_data_type:                 Grid
    COMPOSITE:                     1
    Conventions:                   COARDS, CF-1.6, ACDD-1.3
    CREATION_DATE:                 June 05, 2020 16:18 UTC
    creation_date:                 April 16, 2020 21:06 UTC
    DATA_CMNT:                     1774.VEL
    ...                            ...
    time_

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ufdata'][missing_index_u] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['vfdata'][missing_index_v] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ufdata'][missing_index_u] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['vfdata'][missing_index_v] = np.nan


16ckp2a
            ufdata       vfdata
count  7866.000000  7866.000000
mean      7.575592    -0.770982
std       9.273029     6.664277
min     -28.479328   -23.111249
25%       2.837713    -4.795739
50%       7.399619    -0.523169
75%      12.553143     2.788882
max      39.498178    32.432168
depth: <xarray.Dataset>
Dimensions:        (time: 7866, latitude: 1, longitude: 1)
Coordinates:
  * time           (time) datetime64[ns] 2016-09-14T23:00:00 ... 2017-08-08T1...
    depth          float64 16.0
  * latitude       (latitude) float32 71.23
  * longitude      (longitude) float64 195.8
Data variables:
    vcomp_current  (time, latitude, longitude) float32 -2.88 0.1977 ... 0.05662
    ucomp_current  (time, latitude, longitude) float32 -1.907 -0.9803 ... 13.89
    w_1204         (time, latitude, longitude) float32 -0.1 0.0 ... -0.3 -0.4
    Werr_1201      (time, latitude, longitude) float32 -0.4 0.1 ... -0.7 0.5
Attributes: (12/37)
    cdm_data_type:                 Grid
    COMPOSITE: 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ufdata'][missing_index_u] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['vfdata'][missing_index_v] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ufdata'][missing_index_u] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['vfdata'][missing_index_v] = np.nan
A va

16ckp2a
            ufdata       vfdata
count  7866.000000  7866.000000
mean      7.763288    -1.351365
std       8.508879     6.205602
min     -26.062785   -21.988386
25%       3.143978    -5.026156
50%       7.689334    -1.084846
75%      13.071648     2.151028
max      35.895582    29.761929
depth: <xarray.Dataset>
Dimensions:        (time: 7866, latitude: 1, longitude: 1)
Coordinates:
  * time           (time) datetime64[ns] 2016-09-14T23:00:00 ... 2017-08-08T1...
    depth          float64 28.0
  * latitude       (latitude) float32 71.23
  * longitude      (longitude) float64 195.8
Data variables:
    vcomp_current  (time, latitude, longitude) float32 -1.282 -1.266 ... 7.306
    ucomp_current  (time, latitude, longitude) float32 4.842 6.784 ... 18.41
    w_1204         (time, latitude, longitude) float32 -0.1 -0.1 ... -0.2 -0.1
    Werr_1201      (time, latitude, longitude) float32 -0.5 0.0 0.5 ... 0.3 -0.3
Attributes: (12/37)
    cdm_data_type:                 Grid
    COMPOSITE: