# Compute and store geographically bins spectra  
 - average spectra computed in parquet_time_window_spectra.ipynb on 2°bins using this mean positions
 - store it in one zarr

In [1]:
import os

import numpy as np
import pandas as pd
import xarray as xr
import dask.dataframe as dd

# import cartopy.crs as ccrs
# import cartopy.feature as cfeature
# import geopandas as gpd
# from shapely.geometry import Polygon

%matplotlib inline
from matplotlib import pyplot as plt

import drifters.utils as ut
import pynsitu as pin
from GDP_lib import root_dir

In [2]:
from dask.distributed import Client

if True:
    from dask.distributed import Client
    from dask_jobqueue import PBSCluster

    # cluster = PBSCluster(cores=56, processes=28, walltime='04:00:00')
    # cluster = PBSCluster(cores=7, processes=7, walltime='04:00:00')
    cluster = PBSCluster(cores=5, processes=5, walltime="04:00:00")
    w = cluster.scale(jobs=5)
    # from dask_jobqueue import PBSCluster
    # cluster = PBSCluster()
    # w = cluster.scale(jobs=8) # 5 for gps, 8 for argos
else:
    from dask.distributed import LocalCluster

    cluster = LocalCluster()

client = Client(cluster)
client



0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.0.111:8787/status,

0,1
Dashboard: http://10.148.0.111:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.0.111:43882,Workers: 0
Dashboard: http://10.148.0.111:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Choose key 

In [3]:
key = "argos"  # 'gps' or 'argos'
key2 = "_corrected_90_9999"  # or '_corrected'

---

# Geographically binned

https://github.com/apatlpo/mit_equinox/blob/master/parcels/spectra_binned.ipynb

In [14]:
DF = {}
labels = [
    "ven",
    "aen",
    "vxy",
    "axy",
    "vxydiff",
    "xy",
    "err_lonlat",
    "err_lonlatm",
    "err_vevn",
    "avxy",
]
for l in labels:
    parquet = os.path.join(root_dir, key + "_" + l + "_spectra" + key2 + ".parquet")
    df = dd.read_parquet(parquet)
    df = df.rename(
        columns={
            i: float(i)
            for i in list(df.columns)
            if i not in ["lon", "lat", "id", "date"]
        }
    )
    DF[l] = df

In [15]:
DF["xy"].head()

Unnamed: 0,lon,lat,id,0.0,0.016666666666666666,0.03333333333333333,0.05,0.06666666666666667,0.08333333333333333,0.1,...,-0.15,-0.13333333333333333,-0.11666666666666667,-0.1,-0.08333333333333333,-0.06666666666666667,-0.05,-0.03333333333333333,-0.016666666666666666,date
112252.0,-37.563995,24.345945,36727.0,123347000000.0,1288541000.0,19909330000.0,3316851000.0,497391600.0,5862846.0,173784300.0,...,4959079.0,29906040.0,15315450.0,285695200.0,481766100.0,152029900.0,1363517000.0,1591061000.0,113528600000.0,2012-10-21 04:00:00
112972.0,-38.001958,23.565256,36727.0,640010400000.0,79791280000.0,2728473000.0,2661587000.0,194117900.0,102949800.0,295810800.0,...,13242460.0,6670999.0,62122260.0,127850900.0,39873070.0,338793300.0,1657978000.0,3204981000.0,371603100000.0,2012-11-20 04:00:00
113692.0,-38.948332,23.100361,36727.0,1220302000000.0,180992500000.0,3904875000.0,373227500.0,924242900.0,554717300.0,423160000.0,...,14194130.0,110542400.0,142926900.0,36321180.0,172829700.0,2616386000.0,28184470.0,18695360000.0,423162700000.0,2012-12-20 04:00:00
114412.0,-39.941199,22.911843,36727.0,3209069000000.0,699951600000.0,5306800000.0,2793859000.0,136630000.0,31536920.0,10901360.0,...,17395810.0,80353690.0,23957320.0,195499000.0,438238900.0,1314140000.0,1297292000.0,32500420000.0,1278537000000.0,2013-01-19 04:00:00
115132.0,-40.420775,22.69194,36727.0,4260704000000.0,1268102000000.0,4581270000.0,87368340.0,22558590.0,6970933.0,4997699.0,...,2776335.0,6822774.0,42352960.0,23027540.0,9128547.0,131030200.0,688953200.0,2023347000.0,955309000000.0,2013-02-18 04:00:00


In [15]:
# bin geographically
dl = 2
lon_bins = np.arange(
    -180.0, 180.0 + dl, dl
)  # CAUTION : add dl to upper bound (prevent from pb with last binning intervals)
lat_bins = np.arange(-90, 90 + dl, dl)

### Computing

In [16]:
DF_geo = {}
Ds = {}
for l in DF:
    # DF[l].drop(columns=['ven2'], axis=1, inplace=True)
    DF[l]["lon"] = (DF[l]["lon"] + 180) % 360 - 180
    DF[l]["lon_cut"] = DF[l]["lon"].map_partitions(pd.cut, bins=lon_bins)
    DF[l]["lat_cut"] = DF[l]["lat"].map_partitions(pd.cut, bins=lat_bins)
    DF_geo[l] = (
        DF[l].groupby(["lon_cut", "lat_cut"]).mean().compute()
    )  # MEAN SPECTRUM OVER ALL SEGMENT IN THE LON, LAT bins

    # Converting pandas dataframe in xarray dataset:
    index = pd.MultiIndex.from_arrays(
        [
            DF_geo[l].index.map(lambda v: v[0].mid),
            DF_geo[l].index.map(lambda v: v[1].mid),
        ],
        names=("lon_cut", "lat_cut"),
    )
    ds = (
        (
            pd.melt(
                DF_geo[l]
                .set_index(index)
                .drop(columns=["id", "lon", "lat"])
                .reset_index(),
                id_vars=[
                    "lon_cut",
                    "lat_cut",
                ],
                var_name="frequency",
            ).rename(
                columns={
                    "lon_cut": "lon_bins",
                    "lat_cut": "lat_bins",
                    "value": "E_" + l,
                }
            )
        )
        .to_xarray()
        .set_index(index=["lon_bins", "lat_bins", "frequency"])
        .unstack()
    )
    ds["frequency"] = ds["frequency"].astype(float)
    ds = ds.sortby("frequency")
    # ds = ds.chunk({'frequency': 100})
    Ds[l] = ds
    print(l)

# nb counts
dsc = (
    DF[l]
    .groupby(["lon_cut", "lat_cut"])
    .size()
    .compute()
    .to_frame("nb_geobins")
    .set_index(index)
    .reset_index()
    .to_xarray()
    .rename({"lon_cut": "lon_bins", "lat_cut": "lat_bins"})
    .set_index(index=["lon_bins", "lat_bins"])
    .unstack()
)

ven
aen
vxy
axy
vxydiff
xy
err_lonlat
err_lonlatm
err_vevn
avxy


In [17]:
ds = xr.merge(list(Ds.values()) + [dsc])

# set attrs
ds.E_xy.attrs = {
    "long_name": "Power density spectra X = x + jy",
    "units": r"$m^2/cpd$",
    "description": "LOWESS method",
}
ds.E_ven.attrs = {
    "long_name": "Power density spectra v = ve + jvn",
    "units": r"$m^2/s^2/cpd$",
    "description": "LOWESS method",
}
ds.E_vxy.attrs = {
    "long_name": "Power density spectra v = vx + jvy",
    "units": r"$m^2/s^2/cpd$",
    "description": "geoid method",
}
ds.E_vxydiff.attrs = {
    "long_name": "Power density spectra v = vx + jvy",
    "units": r"$m^2/s^2/cpd$",
    "description": "finite differentiation method",
}
ds.E_axy.attrs = {
    "long_name": "Power density spectra a = ax + jay",
    "units": r"$m^2/s^4/cpd$",
    "description": "from position",
}
ds.E_aen.attrs = {
    "long_name": "Power density spectra a = ae + jan",
    "units": r"$m^2/s^4/cpd$",
    "description": "from ve,vn",
}

ds.frequency.attrs = {"long_name": "frequency", "units": "cpd"}
ds.nb_geobins.attrs = {
    "long_name": "nb_geobins",
    "description": "number of T days time window spectra per bins",
}

In [18]:
ds["E_venw2"] = ds["E_ven"] * (ds["frequency"] * 2 * np.pi / 3600 / 24) ** 2
ds["E_vxyw2"] = ds["E_vxy"] * (ds["frequency"] * 2 * np.pi / 3600 / 24) ** 2

In [19]:
ds.E_venw2.attrs = {
    "long_name": "E_ven $  \omega^2$",
    "units": r"$m^2/s^4/cpd$",
    "description": "from position",
}
ds.E_vxyw2.attrs = {
    "long_name": "E_vxy $  \omega^2$",
    "units": r"$m^2/s^4/cpd$",
    "description": "from ve,vn",
}

In [19]:
ds


## Store geo spectra

In [20]:
root_dir = "/home1/datawork/mdemol/GDP"
zarr = os.path.join(root_dir, key + f"_geospectra_{int(dl)}" + key2 + ".zarr")

In [21]:
zarr

'/home1/datawork/mdemol/GDP/argos_geospectra_2_corrected_90_9999.zarr'

In [22]:
ds.to_zarr(zarr, mode="w")

<xarray.backends.zarr.ZarrStore at 0x2aab2ae27820>

In [23]:
ds_reload = xr.open_zarr(zarr).persist()
ds_reload

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 177.98 MiB 1.45 MiB Shape (180, 90, 1440) (23, 23, 360) Dask graph 128 chunks in 1 graph layer Data type float64 numpy.ndarray",1440  90  180,

Unnamed: 0,Array,Chunk
Bytes,177.98 MiB,1.45 MiB
Shape,"(180, 90, 1440)","(23, 23, 360)"
Dask graph,128 chunks in 1 graph layer,128 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(180, 90)","(180, 90)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 126.56 kiB 126.56 kiB Shape (180, 90) (180, 90) Dask graph 1 chunks in 1 graph layer Data type int64 numpy.ndarray",90  180,

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(180, 90)","(180, 90)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [25]:
cluster.close()