# Comparison velocities and acceleration computation

In [2]:
import os
from glob import glob

import numpy as np
import pandas as pd
import xarray as xr
import dask.dataframe as dd
import dask.array as da

#import cartopy.crs as ccrs
#import cartopy.feature as cfeature
#import geopandas as gpd
from shapely.geometry import Polygon

%matplotlib inline
from matplotlib import pyplot as plt


import drifters.utils as ut
import pynsitu as pin

from dask.delayed import delayed

import GDP_lib as gdp

from GDP_lib import root_dir

In [3]:
from dask.distributed import Client

if True:
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client
    #cluster = PBSCluster()
    #w = cluster.scale(jobs=3) # 2 not enough for lon, lat, year, binning
    cluster = PBSCluster(cores=3, processes=3, walltime='04:00:00')
    w = cluster.scale(jobs=8)
else:
    from dask.distributed import LocalCluster
    cluster = LocalCluster()
    
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.0.84:8787/status,

0,1
Dashboard: http://10.148.0.84:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.0.84:54831,Workers: 0
Dashboard: http://10.148.0.84:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [4]:
parquet_argos = os.path.join(root_dir, "argos_av_time.parquet")
parquet_gps = os.path.join(root_dir, "gps_av_time.parquet")

In [5]:
df_gps = dd.read_parquet(parquet_gps).persist()
df_argos = dd.read_parquet(parquet_argos).persist()

__________
## Global mean/variance/std

In [78]:
def print_mean_var(df):
    for l in df.columns :
        print(l + f': mean = {df[l].mean().compute()}, variance = {df[l].var().compute()}, std = {df[l].std().compute()}')

In [79]:
print_mean_var(df_gps[['ve', 'vn', 'ae', 'an', 'vex', 'vny', 'aex', 'any']])

ve: mean = 0.011794044249571098, variance = 0.0628432184457779, std = 0.2506854832172394
vn: mean = 0.0033113528339724953, variance = 0.04800022020936012, std = 0.21908952295780182
ae: mean = -2.767218635963441e-10, variance = 1.4280341147867293e-10, std = 1.1950038137122113e-05
an: mean = -7.194039849104883e-10, variance = 1.4384357997304564e-10, std = 1.1993480728005763e-05
vex: mean = 0.011957733667706385, variance = 0.06443895856264234, std = 0.25384829832528394
vny: mean = -0.0010015627356488742, variance = 0.12362395520161872, std = 0.35160198406951393
aex: mean = -8.944412796126405e-10, variance = 6.241906007793556e-10, std = 2.4983806771173916e-05
any: mean = -2.009514858902962e-09, variance = 5.235646728768863e-10, std = 2.288153563196505e-05


In [80]:
print_mean_var(df_argos[['ve', 'vn', 'ae', 'an', 'vex', 'vny', 'aex', 'any']])

ve: mean = 0.005583182291101385, variance = 0.07471064478158951, std = 0.27333247661590576
vn: mean = 0.005280918545608172, variance = 0.0525890477001667, std = 0.22932301461696625
ae: mean = -5.596267706297923e-10, variance = 3.04483495199027e-10, std = 1.7449455441331887e-05
an: mean = 2.750317536421524e-11, variance = 2.323882180063487e-10, std = 1.5244284765325945e-05
vex: mean = 0.005567501588995124, variance = 0.07575240500319723, std = 0.2752315479795098
vny: mean = 0.005090686674079184, variance = 0.1401732260550757, std = 0.3743971501695435
aex: mean = -2.493054724798962e-09, variance = 2.4987916769100803e-09, std = 4.998791530870317e-05
any: mean = -4.6236329216281857e-10, variance = 1.5814457297611844e-09, std = 3.97673953102436e-05


______________
## Trajectories

In [None]:
grp_id = df_gps.get_partition(0).groupby('id').get_group(22192).set_index('time').compute()

In [None]:
grp_id['ve'].plot(label = 've')
grp_id['vn'].plot(label = 'vn')
grp_id['vex'].plot(label = 'vex')
grp_id['vny'].plot(label = 'vny')
plt.legend()

In [None]:
grp_id['ae'].plot(label = 'ae')
grp_id['an'].plot(label = 'an')
grp_id['aex'].plot(label = 'aex')
grp_id['any'].plot(label = 'any')
plt.legend()

______________
## Dependence on x,y

In [None]:
def deriv_dep2_x(df):
    xmin = (df["x"].min()).compute()
    xmax = (df["x"].max()).compute()
    df['ae-ax/ae']=df['ae-ax']/df['ae']
    df['an-ay/an']=df['an-ay']/df['an']
    df['ve-ux/ve']=df['ve-ux']/df['ve']
    df['vn-uy/vn']=df['vn-uy']/df['vn']
    #delta_gap = (gap_min - t_start)/pd.Timedelta('1H')
    xbins = np.arange(xmin, xmax, 500) # 10min binning
    df['xcut'] = df["x"].map_partitions(pd.cut, xbins)
    mean = (df[['ae-ax', 'an-ay', 've-ux', 'vn-uy','ae-ax/ae', 'an-ay/an', 've-ux/ve', 'vn-uy/vn']]
         .groupby(df.xcut)
         .mean()
         .compute()
         .rename(columns={l:'mean_'+ l for l in ['ae-ax', 'an-ay', 've-ux', 'vn-uy','ae-ax/ae', 'an-ay/an', 've-ux/ve', 'vn-uy/vn']})
         #.to_frame() ALREADY A DATAFRAME
        )
    mean['x'] = mean.index.to_series().apply(lambda d: d.mid)
    mean = mean.set_index('x')

    ds = (mean
          #.drop(columns=["year",])
          .to_xarray()
         )
    return ds

In [None]:
def deriv_dep2_y(df):
    xmin = (df["y"].min()).compute()
    xmax = (df["y"].max()).compute()
    df['ae-ax/ae']=df['ae-ax']/df['ae']
    df['an-ay/an']=df['an-ay']/df['an']
    df['ve-ux/ve']=df['ve-ux']/df['ve']
    df['vn-uy/vn']=df['vn-uy']/df['vn']
    #delta_gap = (gap_min - t_start)/pd.Timedelta('1H')
    xbins = np.arange(xmin, xmax, 500) # 10min binning
    df['ycut'] = df["y"].map_partitions(pd.cut, xbins)
    mean = (df[['ae-ax', 'an-ay', 've-ux', 'vn-uy','ae-ax/ae', 'an-ay/an', 've-ux/ve', 'vn-uy/vn']]
         .groupby(df.ycut)
         .mean()
         .compute()
         .rename(columns={l:'mean_'+ l for l in ['ae-ax', 'an-ay', 've-ux', 'vn-uy','ae-ax/ae', 'an-ay/an', 've-ux/ve', 'vn-uy/vn']})
         #.to_frame() ALREADY A DATAFRAME
        )
    mean['y'] = mean.index.to_series().apply(lambda d: d.mid)
    mean = mean.set_index('y')

    ds = (mean
          #.drop(columns=["year",])
          .to_xarray()
         )
    return ds

In [None]:
ds_x_gps = deriv_dep2_x(df_gps)
ds_y_gps = deriv_dep2_y(df_gps)

In [None]:
abs(ds_x_gps['mean_ae-ax/ae']).plot(label='mean_ae-ax/ae')
abs(ds_x_gps['mean_an-ay/an']).plot(label='mean_an-ay/an')
plt.legend()

In [None]:
abs(ds_x_gps['mean_ae-ax']).plot(label='mean_ae-ax')
abs(ds_x_gps['mean_an-ay']).plot(label='mean_an-ay')
plt.legend()

In [None]:
abs(ds_y_gps['mean_ae-ax/ae']).plot(label='mean_ae-ax/ae')
abs(ds_y_gps['mean_an-ay/an']).plot(label='mean_an-ay/an')
plt.legend()

In [None]:
abs(ds_y_gps['mean_ae-ax']).plot(label='mean_ae-ax')
abs(ds_y_gps['mean_an-ay']).plot(label='mean_an-ay')
plt.legend()

In [None]:
abs(ds_x_gps['mean_ve-ux/ve']).plot(label='mean_ve-ux/ve')
abs(ds_x_gps['mean_vn-uy/vn']).plot(label='mean_vn-uy/vn')
plt.legend()

In [None]:
abs(ds_y_gps['mean_ve-ux/ve']).plot(label='mean_ve-ux/ve')
abs(ds_y_gps['mean_vn-uy/vn']).plot(label='mean_vn-uy/vn')
plt.legend()

In [None]:
abs(ds_y_gps['mean_ve-ux']).plot(label='mean_ve-ux')
abs(ds_y_gps['mean_vn-uy']).plot(label='mean_vn-uy')
plt.legend()

## Dependence on the gap