In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import dask.dataframe as dd

%matplotlib inline
from matplotlib import pyplot as plt

import drifters.utils as ut
import pynsitu as pin
from sstats import signals as sg
from sstats import sigp as sigp
from sstats import tseries as ts

In [2]:
from dask.distributed import Client

if False:
    from dask_jobqueue import PBSCluster

    cluster = PBSCluster()
    w = cluster.scale(jobs=1)  # 2 not enough for lon, lat, year, binning
else:
    from dask.distributed import LocalCluster

    cluster = LocalCluster()

client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 8
Total threads: 56,Total memory: 100.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:47079,Workers: 8
Dashboard: http://127.0.0.1:8787/status,Total threads: 56
Started: Just now,Total memory: 100.00 GiB

0,1
Comm: tcp://127.0.0.1:53077,Total threads: 7
Dashboard: http://127.0.0.1:35325/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:39400,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-ey2_y32q,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-ey2_y32q

0,1
Comm: tcp://127.0.0.1:45941,Total threads: 7
Dashboard: http://127.0.0.1:57934/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:60779,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-al8cv27w,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-al8cv27w

0,1
Comm: tcp://127.0.0.1:51950,Total threads: 7
Dashboard: http://127.0.0.1:35770/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:49458,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-lzuwvov9,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-lzuwvov9

0,1
Comm: tcp://127.0.0.1:52826,Total threads: 7
Dashboard: http://127.0.0.1:49475/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:60608,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-p2_ggvm9,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-p2_ggvm9

0,1
Comm: tcp://127.0.0.1:59827,Total threads: 7
Dashboard: http://127.0.0.1:40401/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:56463,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-fduft2kr,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-fduft2kr

0,1
Comm: tcp://127.0.0.1:33918,Total threads: 7
Dashboard: http://127.0.0.1:40591/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:52976,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-o5y2330k,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-o5y2330k

0,1
Comm: tcp://127.0.0.1:57444,Total threads: 7
Dashboard: http://127.0.0.1:49087/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:59955,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-to4l90e5,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-to4l90e5

0,1
Comm: tcp://127.0.0.1:56483,Total threads: 7
Dashboard: http://127.0.0.1:59265/status,Memory: 12.50 GiB
Nanny: tcp://127.0.0.1:38284,
Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-xhv7ugqq,Local directory: /dev/shm/pbs.3917512.datarmor0/dask-worker-space/worker-xhv7ugqq


In [3]:
def white_noise_time_series(t, noise_std, lon_ref=0.0, lat_ref=45.0, add_to="lonlat"):
    draw = 2  # x, y
    da = ts.normal(time=t, draws=draw) * noise_std
    distance = "geoid"

    if add_to == "lonlat":
        nlon = da.isel(draw=0).rename("nlon").drop("draw")
        nlat = da.isel(draw=1).rename("nlat").drop("draw")
        nlon[0] = 0  # centering
        nlat[0] = 0
        lon = (nlon / np.cos(np.pi / 180 * lat_ref) + lon_ref).rename("lon")
        lat = (nlat + lat_ref).rename("lat")
        ds = xr.merge([nlon, nlat, lon, lat])

    if add_to != "lonlat":
        lon = xr.ones_like(da.isel(draw=0).drop("draw")).rename("lon") * lon_ref
        lat = xr.ones_like(da.isel(draw=0).drop("draw")).rename("lat") * lat_ref
        ds = xr.merge([lon, lat])

    ds["noise_std"] = noise_std
    ds["id"] = 0
    # ds.attrs = {"description": f"white noise with std={noise_std} on {add_to}"}
    df = ds.to_dataframe()

    # add x, y , velocity and acceleration noise
    # INDEX TIME ?
    if not df.index.name == "time":
        warnings.warn("Are you sure time is the index ? ", UserWarning)
    # SORTED TIME ?
    if not df.index.is_monotonic_increasing:
        warnings.warn("time sorting dataframe", UserWarning)
        df.sort_index()

    _geo = pin.geo.GeoAccessor(df)

    if add_to == "xy":
        nx = da.isel(draw=0).drop("draw")
        ny = da.isel(draw=1).drop("draw")
        nx[0] = 0  # centering
        ny[0] = 0
        _geo._obj["x"] = nx
        _geo._obj["y"] = ny
        distance = ""

    if add_to == "v":
        vx = da.isel(draw=0).rename("vx")
        vy = da.isel(draw=1).rename("vy")
        _geo._obj["vx"] = vx
        _geo._obj["vy"] = vy
        _geo._obj["x"] = vx * 0
        _geo._obj["y"] = vy * 0

    if add_to != "v":
        _geo.compute_velocities(
            centered=True,
            names=(
                "vx",
                "vy",
                "vxy",
            ),
            distance=distance,
            inplace=True,
        )
        _geo.compute_velocities(
            centered=False,
            names=(
                "vx_unc",
                "vy_unc",
                "vxy_unc",
            ),
            distance=distance,
            inplace=True,
        )
    if add_to == "lonlat":
        _geo.compute_accelerations(
            names=("ax", "ay", "axy"),
            from_=("lon", "lon", "lat"),
            inplace=True,
        )
    if add_to == "xy":
        _geo.compute_accelerations(
            names=("ax", "ay", "axy"),
            from_=("xy", "x", "y"),
            inplace=True,
        )
    if add_to == "v":
        _geo.compute_accelerations(
            names=("ax", "ay", "axy"),
            from_=("velocities", "vx", "vy"),
            centered_velocity=True,
            inplace=True,
        )
    _geo._obj.attrs = {"description": f"white noise with std={noise_std} on {add_to}"}
    return _geo._obj


def process_uv(lon, lat, u, v, N, dt, **kwargs):
    """Wraps spectral calculation: add complex velocity
    Assumes the time series is regularly sampled

    Parameters:
    -----------
        u, v: pd.Series
            zonal, meridional index by time (in days)
        N: int,
            length of the spectrum
        dt: float
            Time sampling in days
        **kwargs:
            passed to mit_equinox.drifters.get_spectrum
    """
    if lon is None:
        uv = None
    else:
        uv = u + 1j * v
    return pin.tseries.get_spectrum(uv, N, dt=dt, **kwargs)


def noise_traj(
    noise_std=5e-4,
    T="60D",
    dt="1H",
    t_ref=pd.Timestamp(2000, 1, 1),
    t_size=1e6,
    lon_ref=45.0,
    lat_ref=60.0,
    add_to="lonlat",
):
    """
    Generate times series and spectra for a virtually still drifter with only noise on position
        noise_std : float
                    std of the noise in °
        T : str or Timedelta
                    lenght of the time window for spectra
        dt : str
                    time series time delta
        t_ref :     Time delta
                    time reference
        t_size :    int
                    lenght of time index
        lon_ref:    float
                    longitude of the virtual drifter
        lat_ref:    float
                    latitude of the virtual drifter
    """
    if type(T) == str:
        T = pd.Timedelta(T)
    time_unit = pd.Timedelta(dt)
    t = pd.date_range(t_ref, periods=t_size, freq=time_unit)

    df = white_noise_time_series(t, noise_std, lon_ref, lat_ref, add_to)
    attrs = df.attrs["description"]
    df = df.reset_index().rename(columns={"time": "date"})
    # add time in hours
    df["time"] = (df["date"] - t_ref) / time_unit
    df = dd.from_pandas(df, npartitions=2)

    N = int(T / time_unit)  # output size
    T = T / time_unit  # must be in the same units than time

    columns = [
        "lon",
        "lat",
    ]
    Columns = {
        "n": columns + ["x", "y"],
        # "v_n": columns + ["vx", "vy"],
        # "a_n": columns + ["ax", "ay"],
    }

    # pin.drifters.
    group = tuple(df["id"].loc[0].values.compute())[0]
    dfg = df.groupby("id").get_group(group).compute()

    Df_chunked = {}
    for l in Columns:
        df_chunked = pin.drifters.time_window_processing(
            dfg,
            process_uv,
            Columns[l],
            T,
            N,
            id_label="id",
            dt=dt,
            geo=True,
        )
        # rename x/y
        # df_chunked = df_chunked.rename(columns=dict(x="lon", y="lat"))

        Df_chunked[l] = df_chunked.drop(columns=["id", "x", "y"])

    D = []
    for l in Df_chunked:
        d = Df_chunked[l].mean(axis=0)
        d = (
            d.reindex(d.index.astype("float"))
            .to_xarray()
            .rename({"index": "frequency"})
            .rename(l)
            .sortby("frequency")
        )
        D.append(d)
    ds = xr.merge(D)
    ds.frequency.attrs = {"long_name": "frequency", "units": "cpd"}
    ds.attrs = {
        "lon": lon_ref,
        "lat": lat_ref,
        "noise_std": noise_std,
        "description": attrs,
    }
    return df, ds

In [4]:
T = "60D"
dt = "1H"
noise_std = 5e-4
t_ref = pd.Timestamp(2000, 1, 1)
t_size = 1e6
time_unit = pd.Timedelta(dt)
t = pd.date_range(t_ref, periods=t_size, freq=time_unit)
df = white_noise_time_series(t, noise_std=100.0, add_to="xy")



In [5]:
df

Unnamed: 0_level_0,lon,lat,noise_std,id,x,y,vx,vy,vxy,vx_unc,vy_unc,vxy_unc,ax,ay,axy
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2000-01-01 00:00:00,0.0,45.0,100.0,0,0.000000,0.000000,0.057049,-0.103559,0.118233,0.013899,-0.033045,0.035849,-0.000012,1.958726e-05,0.000023
2000-01-01 01:00:00,0.0,45.0,100.0,0,50.036692,-118.961708,-0.007676,0.002212,0.007988,0.013899,-0.033045,0.035849,-0.000012,1.958726e-05,0.000023
2000-01-01 02:00:00,0.0,45.0,100.0,0,-55.264808,15.927534,-0.025127,0.008067,0.026390,-0.029250,0.037469,0.047535,0.000002,-1.633470e-05,0.000016
2000-01-01 03:00:00,0.0,45.0,100.0,0,-130.875689,-60.880995,-0.011843,-0.000175,0.011844,-0.021003,-0.021336,0.029939,0.000005,1.175597e-05,0.000013
2000-01-01 04:00:00,0.0,45.0,100.0,0,-140.533864,14.667841,0.010911,-0.014114,0.017840,-0.002683,0.020986,0.021157,0.000008,-1.949977e-05,0.000021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2114-01-29 11:00:00,0.0,45.0,100.0,0,27.659454,-71.214635,0.001550,-0.029721,0.029761,0.005791,-0.030153,0.030704,-0.000002,2.401649e-07,0.000002
2114-01-29 12:00:00,0.0,45.0,100.0,0,17.970609,-176.652972,0.012775,0.008160,0.015159,-0.002691,-0.029288,0.029412,0.000009,2.080477e-05,0.000023
2114-01-29 13:00:00,0.0,45.0,100.0,0,119.641101,-12.461499,-0.006836,0.042488,0.043035,0.028242,0.045609,0.053645,-0.000019,-1.733648e-06,0.000020
2114-01-29 14:00:00,0.0,45.0,100.0,0,-31.252155,129.261895,-0.023873,0.002994,0.024060,-0.041915,0.039368,0.057504,0.000010,-2.020780e-05,0.000023


In [6]:
df, ds = noise_traj(noise_std=40, add_to="xy")



In [7]:
df.var().compute()

lon          0.000000e+00
lat          0.000000e+00
noise_std    0.000000e+00
id           0.000000e+00
x            1.601156e+03
y            1.603430e+03
vx           6.179064e-05
vy           6.182614e-05
vxy          2.650403e-05
vx_unc       2.470306e-04
vy_unc       2.475903e-04
vxy_unc      1.062339e-04
ax           5.717465e-11
ay           5.733480e-11
axy          2.456694e-11
time         8.333342e+10
dtype: float64

In [8]:
df.mean().compute()

  meta = self._meta_nonempty.mean(


lon          4.500000e+01
lat          6.000000e+01
noise_std    4.000000e+01
id           0.000000e+00
x           -5.328655e-02
y            1.617213e-02
vx          -5.045908e-08
vy           2.155503e-08
vxy          9.854575e-03
vx_unc      -9.685807e-09
vy_unc       8.181899e-09
vxy_unc      1.970753e-02
ax           1.493728e-11
ay          -5.082907e-12
axy          9.483798e-06
time         4.999995e+05
dtype: float64

# Parseval

In [9]:
(df.x.var() + df.y.var()).compute()  # Caution: spectrum x+iy -> integral = varx + vary

3204.5857377171897

In [10]:
ds.n.integrate("frequency")

In [11]:
cluster.close()

distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client


In [3]:
# import scipy.fft as fft
import scipy.signal as sg

In [29]:
n = 1024
x = np.random.randn(n)

In [34]:
np.mean(x**2)

0.9999242617681767

In [32]:
# xhat = fft.fft(x*sg.hann(n))
xhat = fft.fft(x)

In [33]:
np.sum(abs(xhat) ** 2) / n**2

0.9999242617681767

In [8]:
np.mean(sg.hann(n) ** 2)

0.37463378906250006

In [10]:
3 / 8

0.375

In [16]:
?fft.fft

[0;31mSignature:[0m     
[0mfft[0m[0;34m.[0m[0mfft[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mx[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnorm[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moverwrite_x[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mworkers[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mplan[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mCall signature:[0m [0mfft[0m[0;34m.[0m[0mfft[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mType:[0m           _Function
[0;31mString form:[0m    <uarray multimethod 'fft'>
[0;31mFile:[

In [35]:
?sg.periodogram

[0;31mSignature:[0m
[0msg[0m[0;34m.[0m[0mperiodogram[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mx[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfs[0m[0;34m=[0m[0;36m1.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mwindow[0m[0;34m=[0m[0;34m'boxcar'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnfft[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdetrend[0m[0;34m=[0m[0;34m'constant'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreturn_onesided[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscaling[0m[0;34m=[0m[0;34m'density'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Estimate power spectral density using a periodogram.

Parameters
----------
x : array_like
    Time series of measurement values
fs : float, optional
    Sampling frequency of the `x` time series. Defaults t

In [37]:
0.014 * 24

0.0336

In [38]:
1 / 24

0.041666666666666664

In [40]:
3.521 - 3.564

-0.04300000000000015

In [44]:
raw_file = "/home/datawork-lops-osi/equinox/gdp/2018_2019/p2018_p2019.dat"

In [43]:
pd.read_csv(raw_file, delimiter=)

Unnamed: 0,id mm dd yy drogue temp volt. sensor 4 sensor 5 sensor 6
0,122573 1 1.008 2018 58.00 ...
1,122573 1 1.010 2018 58.00 ...
2,122573 1 1.043 2018 13.00 ...
3,122573 1 1.389 2018 56.00 ...
4,122573 1 1.402 2018 23.00 ...
...,...
25948583,147120 12 30.574 2019 1.00 ...
25948584,147120 12 30.615 2019 1.00 ...
25948585,147120 12 30.657 2019 1.00 ...
25948586,147120 12 30.719 2019 1.00 ...


In [41]:
?pd.read_csv

[0;31mSignature:[0m
[0mpd[0m[0;34m.[0m[0mread_csv[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfilepath_or_buffer[0m[0;34m:[0m [0;34m'FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msep[0m[0;34m:[0m [0;34m'str | None | lib.NoDefault'[0m [0;34m=[0m [0;34m<[0m[0mno_default[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdelimiter[0m[0;34m:[0m [0;34m'str | None | lib.NoDefault'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mheader[0m[0;34m:[0m [0;34m"int | Sequence[int] | None | Literal['infer']"[0m [0;34m=[0m [0;34m'infer'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnames[0m[0;34m:[0m [0;34m'Sequence[Hashable] | None | lib.NoDefault'[0m [0;34m=[0m [0;34m<[0m[0mno_default[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mindex_col[0m[0;34m:[0m [0;34m'IndexLabel | Literal[False] | None'[0m [0