# L2
- Apply LOWESS to all

In [33]:
import os
from glob import glob

import numpy as np
import pandas as pd
import xarray as xr

%matplotlib inline
import matplotlib.pyplot as plt
import hvplot.pandas  # noqa


#
import pynsitu as pyn
from pynsitu.maps import crs

import dask.dataframe as dd
from lib import raw_dir, root_dir, KEYS

import datetime

In [34]:
# drifters
campaign = "drifters_CSWOT_BIOSWOT_UWA"
yaml = f"{campaign}.yaml"

cp = pyn.Campaign(os.path.join(root_dir, yaml))
KEYS

['carthe_cnr',
 'carthe_lops',
 'code_ogs',
 'svp_ogs',
 'svp_scripps',
 'svp_shom',
 'svp_bcg',
 'spotter_lops',
 'carthe_uwa',
 'melodi_eodyn']

In [119]:
# CHOOSE HERE
key = KEYS[6]
t_target = '30min'

In [120]:
path = glob(os.path.join(raw_dir, "L1_" + key + "*"+".csv"))[0]
df = pd.read_csv(path, parse_dates=["time"], dtype={"id": "string"}).set_index("id")
ds_L1 = xr.open_dataset(path.replace('.csv', '.nc'))
ids = sorted(list(df.index.unique()))
# df = df.repartition(8).reset_index().persist()


ids_map = {p: cp[p]["serial_number"] for p in cp if key in p}
ids_yaml = sorted(set([id for _, id in ids_map.items()]))
ids_imap = {v: k for k, v in ids_map.items()}

print("drifter id's in data file:")
print(ids)
print("drifter id's in yaml file:")

print(ids_yaml)

flag = set(ids).issuperset(ids_yaml)
if flag:
    print("Data file and campaign yaml file agree upon drifter ids")
else:
    assert False, "Data file and campaign yaml file do not agree upon drifter ids"

FileNotFoundError: [Errno 2] No such file or directory: b'/Users/mdemol/DATA_DRIFTERS/drifters/raw/L1_svp_bcg_20230509_070000.nc'

# Attributes
- description
- smoothing_method_description (lien git ?)
- smoothing_method_parameters_dict

## changing
- sampling
- generation_date


## unherited from L1
- raw_download_max_date
- drifter_type
- laboratiry
- campaign
- campaign doi
- contact


In [None]:
attrs_ds = ds_L1.attrs
attrs_ds['description']="L2 product - applied smoothing method on L1-datasets. Trajectories are cut into segments, removing all gaps of more than 4 hours, and the LOWESS method is then applied on these segments. To obtain a regularly sampled product, gaps are then filled with linear interpolation, and flagged with a ‘gap\_mask' = 1 value (‘gap\_mask' = 0 otherwise))."
attrs_ds["generation_date"]=str(datetime.datetime.now())
attrs_ds['smoothing_method_description'] = 'LOWESS method (polynomial=linear, 3 iterations, Elipot et al.2016) + low pass filter with cutoff frequency of 13 cpd and 20 days pad, see https://github.com/apatlpo/pynsitu/tree/9eea25a9cb1ea112f9882fc3252de21e4290a47d/pynsitu'
attrs_ds['smoothing_method_param_dict'] = f'{pyn.drifters.param_lowess}'
attrs_ds['sampling'] = t_target


def set_usual_attrs(ds, attrs_ds):
    ds.attrs.update(attrs_ds)
    ds.id.attrs.update(longname="id", description="drifter id")
    ds.time.attrs.update(longname="Time")
    ds.lat.attrs.update(longname="Latitude", units="°")
    ds.lon.attrs.update(longname="Longitude", units="°")
    ds.platform.attrs.update(
        longname="Platform", description="second way of identification"
    )
    ds.x.attrs.update(
        longname="Zonal position",
        description="Zonal position in the local frame",
        units="m",
    )
    ds.y.attrs.update(
        longname="Meridional position",
        description="Meridional position in the local frame",
        units="m",
    )
    ds.X.attrs.update(longname="Position norm", units="m")
    ds.u.attrs.update(
        longname="Zonal velocity",
        description="Zonal velocity computed via centered differentiation from x",
        units="m/s",
    )
    ds.v.attrs.update(
        longname="Meridional velocity",
        description="Meridional velocity computed via centered differentiation from y",
        units="m/s",
    )
    ds.U.attrs.update(longname="Velocity norm", units="m/s")
    ds.ax.attrs.update(
        longname="Zonal acceleration",
        description="Zonal acceleration computed via centered differentiation from x",
        units=r"$m.s^2$",
    )
    ds.ay.attrs.update(
        longname="Meridional acceleration",
        description="Meridional acceleration computed via centered differentiation from y",
        units=r"$m.s^2$",
    )
    ds.Axy.attrs.update(longname="Acceleration norm", description=r"\sqrt(ax^2+ay^2)", units=r"$m.s^2$")
    ds.au.attrs.update(
        longname="Zonal acceleration",
        description="Zonal acceleration computed via centered differentiation from u",
        units=r"$m.s^2$",
    )
    ds.av.attrs.update(
        longname="Meridional acceleration",
        description="Meridional acceleration computed via centered differentiation from v",
        units=r"$m.s^2$",
    )
    ds.Auv.attrs.update(longname="Acceleration norm", description=r"\sqrt(au^2+av^2)", units=r"$m.s^2$")
    
    ds.lonc.attrs.update(
        longname="Longitude of reference",
        description="Longitude used as reference for the local frame projection",
    )
    ds.latc.attrs.update(
        longname="Latitude of reference",
        description="Longitude used as reference for the local frame projection",
    )
    ds.gap_mask.attrs.update(
        longname="Gap mask",
        description="Flagged for gaps bigger than 4 hours in raw data : 1 if time value is in a longer than 4 hours time gaps in raw data, 0 otherwise. CAUTION : Variables in these gaps were linearly interpolated to have an regular sampling"
    )
    ds.gaps.attrs.update(
        longname="Gaps",
        description="distance to the nearest raw time",
    )

In [None]:
dfs = pyn.drifters.optimize_smooth_all(df,t_target)
dss = dfs.reset_index().set_index(["id", "time"]).to_xarray()
dss['platform']=dfs.set_index('id').platform.groupby('id').first().to_xarray()
dss['lonc'] = dss['lonc'].mean('time')
dss['latc'] = dss['latc'].mean('time')
set_usual_attrs(dss, attrs_ds)
dss

In [None]:
path = path.replace('L1_', 'L2_').replace('.csv','_'+t_target + '.nc')
dss.to_netcdf(path)

In [106]:
dss