# Computation of velocity and acceleration 

- [ ] velocity
- [ ] acceleration


In [1]:
import os
from glob import glob

import numpy as np
import pandas as pd
import xarray as xr
import dask.dataframe as dd
import dask.array as da

# import cartopy.crs as ccrs
# import cartopy.feature as cfeature
# import geopandas as gpd
from shapely.geometry import Polygon

%matplotlib inline
from matplotlib import pyplot as plt


import drifters.utils as ut
import pynsitu as pin

from dask.delayed import delayed  # important to read parquet correctly

In [2]:
from dask.distributed import Client

if True:
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client

    cluster = PBSCluster(cores=4, processes=4, walltime="04:00:00")
    w = cluster.scale(jobs=3)
else:
    from dask.distributed import LocalCluster

    cluster = LocalCluster()

client = Client(cluster)
client



0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.1.106:8787/status,

0,1
Dashboard: http://10.148.1.106:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.1.106:52309,Workers: 0
Dashboard: http://10.148.1.106:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [4]:
# load data

col_sel = [
    "time",
    "id",
    "lon",
    "lat",
    "ve",
    "vn",
    "typebuoy",
    "gap",
    "deploy_date",
    "deploy_lat",
    "deploy_lon",
    "end_date",
    "end_lat",
    "end_lon",
    "drogue_lost_date",
    "typedeath",
    "lon360",
    "err_lat",
    "err_lon",
    "err_ve",
    "err_vn",
]

bandwidth = 0.1

df_argos = ut.load_gdp("argos", suffix=f"_filtered_b{bandwidth}")
df_argos = (
    df_argos.where(df_argos != -1.0e34)
    .reset_index()
    .rename(columns=dict(index="id"))[col_sel]
    .set_index("time")
    .persist()
)

df_gps = ut.load_gdp("gps", suffix=f"_filtered_b{bandwidth}")
df_gps = (
    (df_gps.where(df_gps != -1.0e34).reset_index().rename(columns=dict(index="id")))[
        col_sel
    ]
    .set_index("time")
    .persist()
)  # CAUTION : replace missing value -1.e+34 by Nan

### Duplicates

In [5]:
df1 = df_gps.get_partition(1)
df2 = df1.drop_duplicates(keep="first")
print(len(df1), len(df2))

266483 256902


## Computation : 
Derivation of velocities and acceleration :
- ve, vn already computed via LOWESS METHOD
- vex, vey centered derivation using projection of lon, lat
- aex, any double centered derivation using projection of lon, lat
- ae, an centered derivation of ve, vn

In [6]:
import warnings


def c_vel_acc(df):
    # INDEX TIME ?
    if not df.index.name == "time":
        warnings.warn("Are you sure time is the index ? ", UserWarning)
    # SORTED TIME ?
    if not df.index.is_monotonic_increasing:
        warnings.warn("time sorting dataframe", UserWarning)
        df.sort_index()

    _geo = pin.geo.GeoAccessor(df)
    _geo.compute_velocities(
        centered=True,
        names=(
            "vex",
            "vny",
            "vxy",
        ),
        inplace=True,
    )
    print(df.columns)
    _geo.compute_accelerations(
        names=("ae", "an", "aen"),
        from_=("velocities", "ve", "vn"),
        centered_velocity=True,
        inplace=True,
    )
    print(df.columns)
    _geo.compute_accelerations(
        names=("aex", "any", "axy"), from_=("lonlat", "lon", "lat"), inplace=True
    )
    print(df.columns)
    _geo.compute_velocities(
        distance="diff",
        centered=True,
        names=("vex_diff", "vny_diff", "vxy_diff"),
        inplace=True,
    )  # keep_dt=True,)
    print(df.columns)
    return _geo._obj

def c_vel_acc(df):
    df.geo.compute_velocities( centered=True, names=('vex', 'vny','vxy'), inplace=True)
    print(df.columns)
    df.geo.compute_accelerations(names=('ae','an','aen'), from_ = ('velocities','ve','vn'), centered_velocity=True, inplace=True)
    print(df.columns)
    df.geo.compute_accelerations( names=('aex','any', 'axy'), from_ = ('lonlat','lon','lat'), inplace=True )
    print(df.columns)
    df.geo.compute_velocities( distance='diff', centered=True, names=('vex_diff', 'vny_diff','vxy_diff'), inplace=True)
    print(df.columns)
    return df

In [7]:
df_argos.get_partition(0).head()

Unnamed: 0_level_0,id,lon,lat,ve,vn,typebuoy,gap,deploy_date,deploy_lat,deploy_lon,end_date,end_lat,end_lon,drogue_lost_date,typedeath,lon360,err_lat,err_lon,err_ve,err_vn
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1987-10-02 13:00:00,8707978,-137.744492,46.454159,0.3154,-0.009,b'SVP ',6048.0,1987-10-02,46.45,-137.75,1988-01-12,46.24,-133.02,1988-01-12,3.0,222.255508,0.00947,0.00345,0.0298,0.1484
1987-10-02 14:00:00,8707978,-137.728226,46.459862,0.3094,-0.09,b'SVP ',6048.0,1987-10-02,46.45,-137.75,1988-01-12,46.24,-133.02,1988-01-12,3.0,222.271774,0.00663,0.00236,0.0274,0.1503
1987-10-02 15:00:00,8707978,-137.714447,46.456089,0.3058,-0.0997,b'SVP ',9936.0,1987-10-02,46.45,-137.75,1988-01-12,46.24,-133.02,1988-01-12,3.0,222.285553,0.00388,0.00183,0.0237,0.0803
1987-10-02 16:00:00,8707978,-137.700195,46.452862,0.3052,-0.1041,b'SVP ',9936.0,1987-10-02,46.45,-137.75,1988-01-12,46.24,-133.02,1988-01-12,3.0,222.299805,0.00596,0.0018,0.0238,0.0703
1987-10-02 17:00:00,8707978,-137.685577,46.449841,0.2805,-0.1314,b'SVP ',6048.0,1987-10-02,46.45,-137.75,1988-01-12,46.24,-133.02,1988-01-12,3.0,222.314423,0.0053,0.02986,0.3121,0.0919


In [12]:
df = df_argos.get_partition(0).groupby("id").get_group(8707978).compute()
meta = c_vel_acc(df)
df_gps_out = df_gps.groupby("id").apply(c_vel_acc, meta=meta).reset_index().persist()
df_argos_out = (
    df_argos.groupby("id").apply(c_vel_acc, meta=meta).reset_index().persist()
)

Index(['id', 'lon', 'lat', 've', 'vn', 'typebuoy', 'gap', 'deploy_date',
       'deploy_lat', 'deploy_lon', 'end_date', 'end_lat', 'end_lon',
       'drogue_lost_date', 'typedeath', 'lon360', 'err_lat', 'err_lon',
       'err_ve', 'err_vn', 'x', 'y', 'vex', 'vny', 'vxy'],
      dtype='object')
Index(['id', 'lon', 'lat', 've', 'vn', 'typebuoy', 'gap', 'deploy_date',
       'deploy_lat', 'deploy_lon', 'end_date', 'end_lat', 'end_lon',
       'drogue_lost_date', 'typedeath', 'lon360', 'err_lat', 'err_lon',
       'err_ve', 'err_vn', 'x', 'y', 'vex', 'vny', 'vxy', 'ae', 'an', 'aen'],
      dtype='object')
Index(['id', 'lon', 'lat', 've', 'vn', 'typebuoy', 'gap', 'deploy_date',
       'deploy_lat', 'deploy_lon', 'end_date', 'end_lat', 'end_lon',
       'drogue_lost_date', 'typedeath', 'lon360', 'err_lat', 'err_lon',
       'err_ve', 'err_vn', 'x', 'y', 'vex', 'vny', 'vxy', 'ae', 'an', 'aen',
       'aex', 'any', 'axy'],
      dtype='object')
Index(['id', 'lon', 'lat', 've', 'vn', 'typebuoy'



# Store in parquet
https://github.com/dask/dask/issues/8650

In [13]:
root_dir = "/home1/datawork/mdemol/GDP"
parquet_argos = os.path.join(root_dir, "argos_av_time.parquet")
parquet_gps = os.path.join(root_dir, "gps_av_time.parquet")

In [14]:
df_load_gps = df_gps_out.set_index("time").repartition(partition_size="100MB").persist()
df_load_argos = (
    df_argos_out.set_index("time").repartition(partition_size="100MB").persist()
)

In [15]:
df_load_gps.to_parquet(
    parquet_gps, engine="pyarrow"
)  # alternative: fastparquet pyarrow
df_load_argos.to_parquet(parquet_argos, engine="pyarrow")  # alternative: fastparquet

In [16]:
df_load_gps

Unnamed: 0_level_0,id,lon,lat,ve,vn,typebuoy,gap,deploy_date,deploy_lat,deploy_lon,end_date,end_lat,end_lon,drogue_lost_date,typedeath,lon360,err_lat,err_lon,err_ve,err_vn,x,y,vex,vny,vxy,ae,an,aen,aex,any,axy,vex_diff,vny_diff,vxy_diff
npartitions=175,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
,int64,float32,float32,float32,float32,object,float64,datetime64[ns],float64,float64,datetime64[ns],float64,float64,datetime64[ns],float64,float32,float32,float32,float32,float32,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [17]:
# df_load_gps.ven.compute()

# Open test

In [18]:
df_argos_reload = dd.read_parquet(parquet_argos)  # .persist()

In [20]:
df_argos_reload.ve.compute()

time
1987-10-02 13:00:00    0.3154
1987-10-02 14:00:00    0.3094
1987-10-02 15:00:00    0.3058
1987-10-02 16:00:00    0.3052
1987-10-02 17:00:00    0.2805
                        ...  
2020-06-30 19:00:00    0.0498
2020-06-30 20:00:00    0.0512
2020-06-30 21:00:00   -0.2276
2020-06-30 22:00:00   -0.2425
2020-06-30 23:00:00   -0.0584
Name: ve, Length: 125205847, dtype: float32

In [15]:
df_gps_reload.columns

Index(['id', 'lon', 'lat', 've', 'vn', 'typebuoy', 'gap', 'deploy_date',
       'deploy_lat', 'deploy_lon', 'end_date', 'end_lat', 'end_lon',
       'drogue_lost_date', 'typedeath', 'lon360', 'err_lat', 'err_lon',
       'err_ve', 'err_vn', 'x', 'y', 'vex', 'vny', 'vxy', 'ae', 'an', 'aen',
       'aex', 'any', 'axy', 'dt', 'vex_diff', 'vny_diff', 'vxy_diff'],
      dtype='object')

In [22]:
cluster.close()