# CARTHE - clean data: drifters

To do:

- (tmp) analysis with drifters: 
    - [ ] maps of trajectories
    - [ ] trajectories in relative frame of reference
    - [ ] relative dispersion
    - [ ] produce movies of trajectories
    - [ ] produce movies of relative evolution


In [None]:
import os
from glob import glob

import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import hvplot.pandas  # noqa

#
import pynsitu as pin
from pynsitu.maps import crs

In [None]:
pwd


In [None]:
## data directory 
#raw_dir = '/home/datawork-lops-osi/aponte/cswot/drifters/raw'
raw_dir = '/Users/mdemol/DATA_DRIFTERS/drifters/raw'

#root_dir = '/home1/datahome/mdemol/PhD/insitu_trajectories'
root_dir = '/Users/mdemol/code/PhD/insitu_drifters_trajectories'
# drifters
campaign="drifters_CSWOT_BIOSWOT"
yaml = f"{campaign}.yaml"

cp = pin.Campaign(os.path.join(root_dir,yaml))

# plot all deployments
for label, deployment, platform, sensor, meta in cp.get_all_deployments():
    print(label, platform, sensor, deployment)

In [None]:
def read_raw_carthe():
    filecb = glob(os.path.join(raw_dir,'carthe_*cnr*'))
    dfcb =pd.read_csv(filecb[0], sep=',').rename(columns={'DeviceName':'id', 'DeviceDateTime':'time', 'Latitude':'lat', 'Longitude':'lon' }).drop(['CommId','DataId'], axis=1)
    dfcb['time']=pd.to_datetime(dfcb['time'])
    filecc = glob(os.path.join(raw_dir, 'carthe_*lops*'))
    dfcc =pd.read_csv(filecc[0], sep=',').rename(columns={'DeviceName':'id', 'DeviceDateTime':'time', 'Latitude':'lat', 'Longitude':'lon' }).drop(['CommId','DataId'], axis=1)
    dfcc['time']=pd.to_datetime(dfcc['time'])
    return dfcb, dfcc

#df = read_raw_carthe()[0]
df = read_raw_carthe()[1]
df = df.set_index('id')


________
# LOPS

In [None]:
ids = sorted(list(df.index.unique()))
ids

Test if ids in data file and yaml file are identical

In [None]:
# filter out earlier deployments
df = df.loc[ df.time>=cp["start"] ]

# filter out taos deployments
ids = [i for i in ids if df.loc[i].lat.max()<45]
df = df.loc[ids]

# print basic information
for i in ids:
    print(i, df.loc[i].time.min(), df.loc[i].time.max())

In [None]:
# extract deployment order
df.groupby(df.index).apply(lambda df: df.sort_values("time").iloc[0]).sort_values("time")

In [None]:
#
ids_map = {p: cp[p]["serial_number"] for p in cp if  "carthe_lops" in p}
ids_yaml = sorted(set([id for _, id in ids_map.items()]))
ids_imap = {v: k for k, v in ids_map.items()}

print("drifter id's in data file:")
print(ids)
print("drifter id's in yaml file:")
print(ids_yaml)

flag = set(ids).issuperset(ids_yaml)
if flag:
    print("Data file and campaign yaml file agree upon drifter ids")
else:
    assert False, "Data file and campaign yaml file do not agree upon drifter ids"


### compute and show typical time intervals

This is to decide on a reasonable target timeline.
A 1 minutes sampling rate seems reasonable after interpolation.

In [None]:
df["dt"] = (
    df.groupby(df.index)
    .apply(lambda df: df.sort_values("time")["time"].diff() / pd.Timedelta("1m"))
    .droplevel(0)
)

# drop small time intervals
# df = df.loc[df.dt>3]

fig, ax = plt.subplots(1, 1)
df["dt"].plot.hist(bins=np.arange(0, 20, 0.5))
ax.set_yscale("log")
ax.grid()
ax.set_title("Time interval between positions")
ax.set_xlabel("minutes")

_frac = df["dt"].loc[np.abs(df.dt - 5) < 1].size / df["dt"].size
print(f"Percentage of time invervals between 4 and 6 minutes: {_frac*100:.0f}%")

---
## manually adjust deployment times first

This step is useful to infer deployment times a posteriori for each drifter individually.

In [None]:
p, d = "carthe_lops_03", "d0"

#p, d = "drifter7", "d0"
#p, d = "drifter8", "d0"
#p, d = "drifter9", "d0"

_id = ids_map[p]
print(f"Drifter id: {_id}")

_df = df.loc[_id].set_index("time").sort_index()
_df.geo.compute_velocities(inplace=True)
_df.geo.plot_bokeh(deployments=cp[p][d], velocity=True, acceleration=True)

In [None]:
phv, coords = _df.geo.plot_on_map(s=10, c="velocity", clim=(0,1), cmap="magma")
phv