# Spectra geographically bins


In [1]:
import os

import numpy as np
import pandas as pd
import xarray as xr
import dask.dataframe as dd

#import cartopy.crs as ccrs
#import cartopy.feature as cfeature
#import geopandas as gpd
#from shapely.geometry import Polygon

%matplotlib inline
from matplotlib import pyplot as plt

import drifters.utils as ut
import pynsitu as pin
from GDP_lib import root_dir



In [2]:
from dask.distributed import Client

if True :
    from dask.distributed import Client
    from dask_jobqueue import PBSCluster
    #cluster = PBSCluster(cores=56, processes=28, walltime='04:00:00')
    #cluster = PBSCluster(cores=7, processes=7, walltime='04:00:00')
    cluster = PBSCluster(cores=3, processes=3, walltime='04:00:00')
    w = cluster.scale(jobs=8)
    #from dask_jobqueue import PBSCluster
    #cluster = PBSCluster()
    #w = cluster.scale(jobs=8) # 5 for gps, 8 for argos
else:
    from dask.distributed import LocalCluster
    cluster = LocalCluster()
    
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.0.25:8787/status,

0,1
Dashboard: http://10.148.0.25:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.0.25:38761,Workers: 0
Dashboard: http://10.148.0.25:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Choose key 

In [4]:
key = 'gps' # 'gps' or 'argos'

# Load

In [5]:
# LOAD files with velocities and accelerations computed

parquet = os.path.join(root_dir, key + "_av_time.parquet")
#parquetd = os.path.join(root_dir, key + "_av_time_w.parquet")

# drop all variables but relevant ones
col = ["id",'time',  "lon", "lat", "ve", "vn", 'ae', 'an', 'vex', 'vny', 'aex', 'any', 'vex_diff', 'vny_diff']
df = dd.read_parquet(parquet).reset_index()[col].persist()
#dfd = dd.read_parquet(parquetd).reset_index()[col].persist()

In [6]:
df

Unnamed: 0_level_0,id,time,lon,lat,ve,vn,ae,an,vex,vny,aex,any,vex_diff,vny_diff
npartitions=171,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
,int64,datetime64[ns],float32,float32,float32,float32,float64,float64,float64,float64,float64,float64,float64,float64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


_________________
# Generate Spectra

In [7]:
df = df.rename(columns={'time': 'date'})
df['ven2'] = df.ve**2 +df.vn**2

# add time in hours
time_unit = pd.Timedelta('1H')
t_ref = pd.Timestamp(2000,1,1)
df["time"] = (df["date"] - t_ref ) / time_unit

In [8]:
T_str = '60D' # days
dt = '1H' # sampling

N = int(pd.Timedelta(T_str)/pd.Timedelta(dt)) # output size
T = pd.Timedelta(T_str)/time_unit # must be in the same units than time

columns = ['lon', 'lat',]
columns0 = columns + ['ve', 'vn']   
columns1 = columns + ['ae', 'an']   
columns2 = columns + ['vex', 'vny'] 
columns3 = columns + ['aex', 'any'] 
columns4 = columns + ['vex_diff', 'vny_diff'] 

labels = ['ven', 'aen', 'vxy', 'axy', 'vxydiff']
Columns = {'ven':columns0, 'aen':columns1, 'vxy':columns2, 'axy':columns3, 'vxydiff':columns4}


In [9]:
def process_uv(lon, lat, u, v, N, dt, **kwargs):
    ''' Wraps spectral calculation: add complex velocity
    Assumes the time series is regularly sampled
    
    Parameters:
    -----------
        u, v: pd.Series
            zonal, meridional index by time (in days)
        N: int,
            length of the spectrum
        dt: float
            Time sampling in days
        **kwargs:
            passed to mit_equinox.drifters.get_spectrum
    '''
    if lon is None:
        uv = None
    else:
        uv = u + 1j*v
    return pin.tseries.get_spectrum(uv, N, dt=dt, **kwargs)

In [10]:
df.head()

Unnamed: 0,id,date,lon,lat,ve,vn,ae,an,vex,vny,aex,any,vex_diff,vny_diff,ven2,time
0,22192,2001-07-01 02:00:00,-94.795609,5.53791,0.646,-0.29,-2.777825e-07,-3.472219e-07,0.649771,-0.761679,1.698938e-06,-3.157259e-06,0.649776,-0.761667,0.501416,13130.0
1,22192,2001-07-01 03:00:00,-94.774597,5.52852,0.6466,-0.2901,-2.777825e-07,-3.472219e-07,0.649771,-0.761679,1.698938e-06,-3.157259e-06,0.649776,-0.761667,0.50225,13131.0
2,22192,2001-07-01 04:00:00,-94.753387,5.51876,0.644,-0.2925,-1.361113e-06,-1.180553e-06,0.656827,-0.78517,2.221023e-06,-5.036993e-06,0.656843,-0.785128,0.500292,13132.0
3,22192,2001-07-01 05:00:00,-94.731918,5.50841,0.6368,-0.2986,-2.000001e-06,-1.694444e-06,0.660713,-0.807147,-6.206238e-08,8.546555e-08,0.660741,-0.807076,0.494676,13133.0
4,22192,2001-07-01 06:00:00,-94.710457,5.49807,0.6296,-0.3047,-2.000001e-06,-1.694444e-06,0.660724,-0.80672,6.839744e-08,4.095455e-09,0.660764,-0.806619,0.489238,13134.0


### Compute spectra

In [11]:
#pin.drifters.
group = tuple(df.get_partition(0)['id'].loc[0].values.compute())[0]
dfg = df.groupby("id").get_group(group).compute()

out = pin.drifters.time_window_processing(dfg, process_uv, columns0, T, N, id_label='id', dt=dt, geo=True)
#dfg.set_index("time").plot(x="lon", y="lat")


Df_chunked = {}
for l in Columns : 
    df_chunked = (df
              .groupby("id")
              .apply(pin.drifters.time_window_processing,
                     process_uv,
                     Columns[l],
                     T,
                     N,
                     id_label='id',
                     dt=dt,
                     geo=True,
                     meta=out,
                    )
              .persist()
             )
    # recompute date
    df_chunked["date"] = t_ref + df_chunked.index*time_unit
    # rename x/y
    df_chunked = df_chunked.rename(columns=dict(x="lon", y="lat"))
    #add nrj
    #df_chunked['ven2'] = df.ve**2 +df.vn**2
    
    Df_chunked[l] = df_chunked
    

In [11]:
Df_chunked['aen'].tail()

Unnamed: 0,lon,lat,id,0.0,0.016666666666666666,0.03333333333333333,0.05,0.06666666666666667,0.08333333333333333,0.1,...,-0.15,-0.13333333333333333,-0.11666666666666667,-0.1,-0.08333333333333333,-0.06666666666666667,-0.05,-0.03333333333333333,-0.016666666666666666,date
147487.0,-179.871005,-11.975244,145703.0,2.380645e-14,5.010007e-14,3.642163e-13,1.323019e-12,4.313937e-13,5.058799e-13,2.024908e-12,...,2.763542e-12,9.807117e-14,6.640284e-13,1.1967e-12,5.647899e-14,1.474547e-12,1.123894e-12,6.803424e-13,2.767707e-13,2016-10-28 07:00:00
148207.0,178.648856,-13.726922,145703.0,1.292187e-13,4.166144e-15,6.296172e-14,1.024656e-12,1.379822e-12,2.686008e-13,4.126079e-13,...,6.658919e-12,1.555552e-12,2.078073e-12,1.321306e-12,1.278577e-12,4.611099e-12,4.150278e-12,1.558539e-12,2.223496e-12,2016-11-27 07:00:00
148927.0,176.900652,-14.278134,145703.0,4.096352e-13,4.178954e-13,1.221878e-13,3.0804e-12,6.939296e-12,1.745103e-12,8.575256e-15,...,6.096989e-13,3.07162e-12,4.078496e-12,1.689709e-12,2.632645e-12,2.15233e-11,2.230381e-11,4.551371e-12,7.471569e-14,2016-12-27 07:00:00
149647.0,173.92428,-14.550017,145703.0,5.697027e-13,2.316566e-12,8.451948e-12,6.72722e-12,7.412221e-13,4.158147e-12,3.128578e-12,...,9.953363e-13,4.089224e-13,7.445718e-13,1.038335e-13,2.876857e-12,4.749523e-12,2.584859e-12,7.823834e-13,1.290914e-13,2017-01-26 07:00:00
150367.0,171.219387,-15.276158,145703.0,3.232104e-13,2.405287e-12,3.689545e-12,6.004295e-12,2.683437e-12,3.836132e-12,1.324006e-12,...,4.361785e-13,2.924997e-13,2.480702e-12,1.805236e-12,2.940023e-12,2.416977e-12,1.765523e-13,3.840724e-13,1.217311e-13,2017-02-25 07:00:00


## Store spectra

In [12]:
DF = Df_chunked
DFs ={}
for l in DF :
    DFs[l] = DF[l].rename(columns={i : str(i) for i in list(DF[l].columns) if isinstance(i, float)})

In [13]:
root_dir = "/home1/datawork/mdemol/GDP"

def store_spectra(Df, root_dir = root_dir):
    for var_key in Columns:
        parquet = os.path.join(root_dir, key + '_' + var_key +"_spectra.parquet")
        df = Df[var_key].repartition(partition_size="100MB").persist()
        df.to_parquet(parquet, engine='pyarrow')
        print(var_key)
    
store_spectra(DFs)

ven
aen
vxy
axy
vxydiff


---

# Geographically binned

https://github.com/apatlpo/mit_equinox/blob/master/parcels/spectra_binned.ipynb

In [14]:
DF={}
labels = ['ven', 'aen', 'vxy', 'axy', 'vxydiff']
for l in labels:
    parquet = os.path.join(root_dir, key + '_' + l +"_spectra.parquet")
    df = dd.read_parquet(parquet)
    df= df.rename(columns={i : float(i) for i in list(df.columns) if i not in ['lon', 'lat', 'id', 'date']})
    DF[l]=df

In [15]:
DF['aen'].head()

Unnamed: 0,lon,lat,id,0.0,0.016666666666666666,0.03333333333333333,0.05,0.06666666666666667,0.08333333333333333,0.1,...,-0.15,-0.13333333333333333,-0.11666666666666667,-0.1,-0.08333333333333333,-0.06666666666666667,-0.05,-0.03333333333333333,-0.016666666666666666,date
116969.0,-40.333228,21.868049,114956.0,3.696571e-13,4.400866e-13,2.76857e-13,2.304005e-13,1.988067e-13,2.385747e-13,1.818402e-12,...,4.266598e-12,1.707232e-12,1.233788e-13,3.543694e-13,3.725933e-14,1.680991e-12,2.397301e-12,3.262385e-12,8.947643e-13,2013-05-05 17:00:00
117689.0,-42.771159,21.321839,114956.0,8.438138e-16,2.429e-14,3.085316e-14,5.980328e-15,4.742883e-14,6.631397e-13,4.479189e-13,...,1.572166e-12,1.832819e-12,1.863201e-12,3.808806e-12,1.62655e-12,1.225777e-12,4.568962e-12,1.375236e-12,3.423859e-13,2013-06-04 17:00:00
118409.0,-43.805268,21.78466,114956.0,6.024656e-14,6.812545e-13,6.670413e-13,3.736421e-13,1.575517e-13,1.474057e-14,2.905624e-13,...,2.057499e-13,3.893149e-13,7.954812e-13,1.050209e-12,4.607444e-13,1.64055e-12,1.77384e-12,9.53101e-13,4.041392e-13,2013-07-04 17:00:00
119129.0,-44.566782,23.591216,114956.0,1.145533e-14,2.564396e-14,8.810119e-13,2.021174e-13,4.913435e-13,9.323369e-13,7.658083e-14,...,5.252909e-14,7.28582e-15,8.874921e-15,8.401535e-14,9.139186e-14,8.093673e-14,6.278955e-14,5.500635e-14,9.01075e-14,2013-08-03 17:00:00
119849.0,-45.869155,25.494035,114956.0,3.168237e-14,1.600959e-13,1.018675e-12,8.761843e-13,6.656875e-13,3.485784e-13,2.077295e-13,...,2.068052e-14,7.394532e-14,9.609076e-14,2.865897e-14,8.454405e-14,3.452706e-14,8.114799e-14,1.739098e-13,1.795902e-14,2013-09-02 17:00:00


In [16]:
# bin geographically
dl = 2
lon_bins = np.arange(-180.,180.+dl, dl)#CAUTION : add dl to upper bound (prevent from pb with last binning intervals)
lat_bins = np.arange(-90, 90+dl, dl)


### Computing

In [17]:

DF_geo = {}
Ds = {}
for l in DF :
    #DF[l].drop(columns=['ven2'], axis=1, inplace=True)
    DF[l]['lon'] = (DF[l]['lon']+180)%360 - 180
    DF[l]['lon_cut'] = DF[l]['lon'].map_partitions(pd.cut, bins=lon_bins)
    DF[l]['lat_cut'] = DF[l]['lat'].map_partitions(pd.cut, bins=lat_bins)
    DF_geo[l] = DF[l].groupby(['lon_cut', 'lat_cut']).mean().compute()# MEAN SPECTRUM OVER ALL SEGMENT IN THE LON, LAT bins

    #Converting pandas dataframe in xarray dataset:
    index = pd.MultiIndex.from_arrays([DF_geo[l].index.map(lambda v: v[0].mid),
                                       DF_geo[l].index.map(lambda v: v[1].mid)
                                      ], 
                                      names=('lon_cut', 'lat_cut'),
                                     )
    ds = (pd.melt(DF_geo[l].set_index(index)
                  .drop(columns=['id', 'lon','lat'])
                  .reset_index(), id_vars=['lon_cut', 'lat_cut',],
                  var_name='frequency',
                 )
          .rename(columns={'lon_cut': 'lon_bins', 'lat_cut': 'lat_bins', 'value': 'E_'+l})
         ).to_xarray().set_index(index=['lon_bins', 'lat_bins','frequency']).unstack()
    ds['frequency'] = ds['frequency'].astype(float)
    ds = ds.sortby('frequency')
    #ds = ds.chunk({'frequency': 100})
    Ds[l]=ds
    print(l)
    
#nb counts
dsc = (DF[l].groupby(['lon_cut', 'lat_cut']).size().compute()
             .to_frame('nb_geobins').set_index(index).reset_index()
             .to_xarray().rename({'lon_cut': 'lon_bins', 'lat_cut': 'lat_bins'})
             .set_index(index=['lon_bins', 'lat_bins']).unstack())

ven
aen
vxy
axy
vxydiff


In [18]:
ds = xr.merge(list(Ds.values())+[dsc])

#set attrs
ds.E_ven.attrs={'long_name':"Power density spectra v = ve + jvn", 'units':r'$m^2/s^2/cpd$', 'description': 'LOWESS method'}
ds.E_vxy.attrs={'long_name':"Power density spectra v = vx + jvy", 'units':r'$m^2/s^2/cpd$', 'description': 'geoid method'}
ds.E_vxydiff.attrs={'long_name':"Power density spectra v = vx + jvy", 'units':r'$m^2/s^2/cpd$', 'description': 'finite differentiation method'}
ds.E_axy.attrs={'long_name':"Power density spectra a = ax + jay", 'units':r'$m^2/s^4/cpd$', 'description': 'from position'}
ds.E_aen.attrs={'long_name':"Power density spectra a = ae + jan", 'units':r'$m^2/s^4/cpd$','description': 'from ve,vn'}

ds.frequency.attrs={'long_name':"frequency", 'units':'cpd'}
ds.nb_geobins.attrs={'long_name':"nb_geobins", 'description':'number of values per bins'}

In [19]:
ds['E_venw2']=ds['E_ven']*(ds['frequency']*2*np.pi/3600/24)**2
ds['E_vxyw2']=ds['E_vxy']*(ds['frequency']*2*np.pi/3600/24)**2

In [24]:
ds.E_venw2.attrs={'long_name':"E_ven $  \omega^2$", 'units':r'$m^2/s^4/cpd$', 'description': 'from position'}
ds.E_vxyw2.attrs={'long_name':"E_vxy $  \omega^2$", 'units':r'$m^2/s^4/cpd$','description': 'from ve,vn'}

In [26]:
ds


## Store geo spectra

In [27]:
root_dir = "/home1/datawork/mdemol/GDP"
zarr = os.path.join(root_dir, key + f"_geospectra_{int(dl)}.zarr")

In [28]:
ds.to_zarr(zarr, mode="w") 

<xarray.backends.zarr.ZarrStore at 0x2aab0c802eb0>

In [29]:
ds_reload = xr.open_zarr(zarr).persist()
ds_reload

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(180, 90)","(180, 90)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 126.56 kiB 126.56 kiB Shape (180, 90) (180, 90) Dask graph 1 chunks in 1 graph layer Data type int64 numpy.ndarray",90  180,

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(180, 90)","(180, 90)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [31]:
cluster.close()