# GDP spectral analysis

- [ ] velocity
- [ ] acceleration

Derivation of velocities and acceleration :
- ve, vn already computed via LOWESS METHOD
- vx, vy centered derivation using projection of lon, lat
- acc_x, acc_y double centered derivation using projection of lon, lat
- ae, an centered derivation of ve, vn£


In [1]:
import os

import numpy as np
import pandas as pd
import xarray as xr
import dask.dataframe as dd

#import cartopy.crs as ccrs
#import cartopy.feature as cfeature
#import geopandas as gpd
#from shapely.geometry import Polygon

%matplotlib inline
from matplotlib import pyplot as plt

import drifters.utils as ut
import pynsitu as pin



In [2]:
from dask.distributed import Client

if True :
    from dask.distributed import Client
    from dask_jobqueue import PBSCluster
    #cluster = PBSCluster(cores=56, processes=28, walltime='04:00:00')
    #cluster = PBSCluster(cores=7, processes=7, walltime='04:00:00')
    cluster = PBSCluster(cores=3, processes=3, walltime='04:00:00')
    w = cluster.scale(jobs=8)
    #from dask_jobqueue import PBSCluster
    #cluster = PBSCluster()
    #w = cluster.scale(jobs=8) # 5 for gps, 8 for argos
else:
    from dask.distributed import LocalCluster
    cluster = LocalCluster()
    
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.0.40:8787/status,

0,1
Dashboard: http://10.148.0.40:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.0.40:48513,Workers: 0
Dashboard: http://10.148.0.40:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Choose key 

In [3]:
key = 'argos' # 'gps' or 'argos'

# Load

In [4]:
# LOAD files with velocities and accelerations computed

root_dir = "/home1/datawork/mdemol/GDP"
parquet = os.path.join(root_dir, key + "_av_time.parquet")

# drop all variables but relevant ones
col = ["id",'time',  "lon", "lat", "ve", "vn", 'ae', 'an', 'vex', 'vny', 'aex', 'any']
df = dd.read_parquet(parquet).reset_index()[col].persist()

In [5]:
df

Unnamed: 0_level_0,id,time,lon,lat,ve,vn,ae,an,vex,vny,aex,any
npartitions=508,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
,int64,datetime64[ns],float32,float32,float32,float32,float64,float64,float64,float64,float64,float64
,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...


# Generate Spectra

In [6]:
df = df.rename(columns={'time': 'date'})

# add time in hours
time_unit = pd.Timedelta('1H')
t_ref = pd.Timestamp(2000,1,1)
df["time"] = (df["date"] - t_ref ) / time_unit

In [7]:
T_str = '60D' # days
dt = '1H' # sampling

N = int(pd.Timedelta(T_str)/pd.Timedelta(dt)) # output size
T = pd.Timedelta(T_str)/time_unit # must be in the same units than time

columns = ['lon', 'lat']
columns0 = columns + ['ve', 'vn']   
columns1 = columns + ['ae', 'an']   
columns2 = columns + ['vex', 'vny'] 
columns3 = columns + ['aex', 'any'] 

labels = ['ven', 'aen', 'vxy', 'axy']
Columns = {'ven':columns0, 'aen':columns1, 'vxy':columns2, 'axy':columns3 }


In [8]:
def process_uv(lon, lat, u, v, N, dt, **kwargs):
    ''' Wraps spectral calculation: add complex velocity
    Assumes the time series is regularly sampled
    
    Parameters:
    -----------
        u, v: pd.Series
            zonal, meridional index by time (in days)
        N: int,
            length of the spectrum
        dt: float
            Time sampling in days
        **kwargs:
            passed to mit_equinox.drifters.get_spectrum
    '''
    if lon is None:
        uv = None
    else:
        uv = u + 1j*v
    return pin.tseries.get_spectrum(uv, N, dt=dt, **kwargs)

In [9]:
df.head()

Unnamed: 0,id,date,lon,lat,ve,vn,ae,an,vex,vny,aex,any,time
0,8707978,1987-10-02 13:00:00,-137.744492,46.454159,0.3154,-0.009,-1.333335e-06,-1.3e-05,0.320565,-0.055649,-1.5e-05,-8.1e-05,-107363.0
1,8707978,1987-10-02 14:00:00,-137.728226,46.459862,0.3094,-0.09,-1.333335e-06,-1.3e-05,0.320565,-0.055649,-1.5e-05,-8.1e-05,-107362.0
2,8707978,1987-10-02 15:00:00,-137.714447,46.456089,0.3058,-0.0997,-5.833308e-07,-2e-06,0.299095,-0.313299,3e-06,5e-06,-107361.0
3,8707978,1987-10-02 16:00:00,-137.700195,46.452862,0.3052,-0.1041,-3.513888e-06,-4e-06,0.308067,-0.298328,2e-06,2e-06,-107360.0
4,8707978,1987-10-02 17:00:00,-137.685577,46.449841,0.2805,-0.1314,-4.02778e-06,-1.2e-05,0.286443,-0.401779,-1.4e-05,-5.9e-05,-107359.0


### For ve, vn; ae, an; ux, uy; acc_x, acc_y

In [10]:
#pin.drifters.
group = tuple(df.get_partition(0)['id'].loc[0].values.compute())[0]
dfg = df.groupby("id").get_group(group).compute()

out = pin.drifters.time_window_processing(dfg, process_uv, columns0, T, N, id_label='id', dt=dt, geo=True)
#dfg.set_index("time").plot(x="lon", y="lat")


Df_chunked = {}
for l in Columns : 
    df_chunked = (df
              .groupby("id")
              .apply(pin.drifters.time_window_processing,
                     process_uv,
                     Columns[l],
                     T,
                     N,
                     id_label='id',
                     dt=dt,
                     geo=True,
                     meta=out,
                    )
              .persist()
             )
    # recompute date
    df_chunked["date"] = t_ref + df_chunked.index*time_unit
    # rename x/y
    df_chunked = df_chunked.rename(columns=dict(x="lon", y="lat"))
    
    Df_chunked[l] = df_chunked
    

In [11]:
Df_chunked['aen'].tail()

Unnamed: 0,lon,lat,id,0.0,0.016666666666666666,0.03333333333333333,0.05,0.06666666666666667,0.08333333333333333,0.1,...,-0.15,-0.13333333333333333,-0.11666666666666667,-0.1,-0.08333333333333333,-0.06666666666666667,-0.05,-0.03333333333333333,-0.016666666666666666,date
31662.0,163.196994,-43.397268,2134178.0,2.633586e-15,4.920969e-14,4.105733e-14,4.507258e-13,4.103772e-13,6.60612e-13,1.272366e-12,...,2.661579e-12,1.659756e-15,1.382293e-13,1.639739e-13,2.813248e-13,6.734116e-13,5.679699e-13,1.362287e-13,8.004302e-14,2003-08-12 06:00:00
32382.0,164.227657,-43.512773,2134178.0,3.534635e-14,1.925849e-14,5.310236e-13,3.896556e-13,3.491136e-14,1.020677e-13,1.18113e-12,...,2.866753e-12,1.785844e-12,9.208276e-13,2.022257e-13,3.035232e-14,7.162978e-14,5.038342e-14,7.481193e-14,4.07149e-14,2003-09-11 06:00:00
33102.0,165.774268,-42.970209,2134178.0,1.671043e-14,3.92464e-14,6.55465e-13,1.072192e-12,1.187901e-13,7.343901e-13,1.82575e-13,...,1.761219e-12,7.264808e-13,1.949031e-12,2.688347e-12,1.505727e-12,7.64421e-13,4.296256e-13,2.22363e-13,6.262603e-14,2003-10-11 06:00:00
33822.0,167.471498,-42.773006,2134178.0,4.430023e-14,4.267319e-14,4.348557e-13,3.969733e-13,5.12256e-13,1.704473e-12,2.223015e-12,...,3.362423e-13,3.01816e-13,1.044373e-13,4.372598e-14,4.117025e-13,2.151684e-13,4.098078e-14,1.879096e-14,4.18204e-14,2003-11-10 06:00:00
34542.0,167.743541,-43.462793,2134178.0,2.599718e-15,2.163957e-14,5.643174e-16,3.131383e-14,1.851255e-13,2.564317e-13,9.991017e-14,...,3.250549e-13,1.614989e-13,8.914635e-13,1.717389e-12,9.10175e-13,9.512946e-13,2.323654e-12,1.878491e-12,3.74217e-13,2003-12-10 06:00:00


In [12]:
# store
#ut.store_diag()
#df_chunked = df_chunked.repartition(partition_size="100MB")
#df_chunked.to_parquet(..., mode="w")

---

# Bin geographically

https://github.com/apatlpo/mit_equinox/blob/master/parcels/spectra_binned.ipynb

In [13]:
# bin geographically
dl = 2.
lon_bins = np.arange(-180.,180.+dl, dl)#CAUTION : add dl to upper bound (prevent from pb with last binning intervals)
lat_bins = np.arange(-90, 90+dl, dl)

### For ve, vn; ae, an; ux, uy; acc_x, acc_y

In [14]:
DF = Df_chunked
DF_geo = {}
Ds = {}
for l in DF :
    DF[l]['lon'] = (DF[l]['lon']+180)%360 - 180
    DF[l]['lon_cut'] = DF[l]['lon'].map_partitions(pd.cut, bins=lon_bins)
    DF[l]['lat_cut'] = DF[l]['lat'].map_partitions(pd.cut, bins=lat_bins)
    DF_geo[l] = DF[l].groupby(['lon_cut', 'lat_cut']).mean().compute()

    #Converting pandas dataframe in xarray dataset:
    index = pd.MultiIndex.from_arrays([DF_geo[l].index.map(lambda v: v[0].mid),
                                       DF_geo[l].index.map(lambda v: v[1].mid)
                                      ], 
                                      names=('lon_cut', 'lat_cut'),
                                     )
    ds = (pd.melt(DF_geo[l].set_index(index)
                  .drop(columns=['id', 'lon','lat'])
                  .reset_index(), id_vars=['lon_cut', 'lat_cut',],
                  var_name='frequency',
                 )
          .rename(columns={'lon_cut': 'lon_bins', 'lat_cut': 'lat_bins', 'value': 'E_'+l})
         ).to_xarray().set_index(index=['lon_bins', 'lat_bins','frequency']).unstack()
    ds['frequency'] = ds['frequency'].astype(float)
    ds = ds.sortby('frequency')
    #ds = ds.chunk({'frequency': 100})
    Ds[l]=ds
    print(l)

ven
aen
vxy
axy


In [15]:
ds = xr.merge(list(Ds.values()))

#set attrs
ds.E_ven.attrs={'long_name':"Power density spectra v = ve + jvn", 'units':r'$m^2/s^2/cpd$'}
ds.E_vxy.attrs={'long_name':"Power density spectra v = vx + jvy", 'units':r'$m^2/s^2/cpd$'}
ds.E_axy.attrs={'long_name':"Power density spectra a = ax + jay", 'units':r'$m^2/s^4/cpd$'}
ds.E_aen.attrs={'long_name':"Power density spectra a = ae + jan", 'units':r'$m^2/s^4/cpd$'}
ds.frequency.attrs={'long_name':"frequency", 'units':'cpd'}

In [16]:
ds['E_venw2']=ds['E_ven']*(ds['frequency']*2*np.pi/3600/24)**2
ds['E_vxyw2']=ds['E_vxy']*(ds['frequency']*2*np.pi/3600/24)**2

In [16]:
ds

_________
# Store spectra

In [17]:
root_dir = "/home1/datawork/mdemol/GDP"
zarr = os.path.join(root_dir, key + "_spectra.zarr")

In [18]:
ds.to_zarr(zarr, mode="w") 

<xarray.backends.zarr.ZarrStore at 0x2aab1383f190>

In [19]:
ds_reload = xr.open_zarr(zarr).persist()
ds_reload

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [20]:
cluster.close()

distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
