# Test irregular_time_sampling

In [1]:
import os
from glob import glob
import pandas as pd
import numpy as np
import xarray as xr
from sstats import tseries as ts
%matplotlib inline
from matplotlib import pyplot as plt

import random
import warnings

import lib as lib



In [16]:
def cyclic_selection(array, istart, replicate=1):
    cyclic_array = np.concatenate((array[istart:], array[0:istart]))
    if replicate ==1 : 
        return cyclic_array
    else : 
        return np.concatenate([cyclic_array]*replicate)

def cyclic_selection_len(array, istart,n):
    narray = len(array)
    if n ==None : 
        n=narray
    if n > narray : 
        warnings.warn('dt dasaset to small, will contain duplicated values')
    if istart + n < narray : 
        return array[istart:istart+n]
    else : 
        return np.concatenate((array[istart:], array[0:n-narray+istart]))

def time_from_dt_array(tstart, tend, dt):
    time_length = np.sum(dt)
    replicate = (tend-tstart)//time_length+1
    if replicate > 1 : 
        warnings.warn('dt dasaset to small, will contain duplicated values of dt')
    istart = int(30)#random.randrange(len(dt))
    dt_ = cyclic_selection(dt, istart, replicate)

    time = xr.DataArray(tstart.values + np.cumsum(dt))
    time = time.where(time<tend, drop=True)
    return time  

def irregular_time_sampling(ds,t,dt=1/24,offset_type='uniform', file=None, inplace=False):
    
    if not inplace : 
        ds=ds.copy()
        
    if offset_type == 'random_uniform':
        offset = (ts.uniform(t, low=-dt/2, high=dt/2)*pd.Timedelta("1D")).data
        ds["time_off"] = (ds.time.dims, ds.time.data + offset)
        
    if offset_type == 'carthe':
        path_dt = '/Users/mdemol/code/PhD/filtering/example_dt_list/'
        file = path_dt + '/carthe_dt.csv'
        DT = (pd.read_csv(path_dt + 'carthe_dt.csv')['dt']*pd.Timedelta("1s")).values
        ds["time_off"] = time_from_dt_array(ds.time.min(), ds.time.max(), DT)

    if offset_type == 'file':
        try :
            dt = (pd.read_csv(file)['dt']*pd.Timedelta("1s")).values
        except : 
            assert False, 'Please give file argument'
        try : 
            offset = dt[0:int(np.ceil(t[0]/t[1]))]
        except :
            assert False, 'Need more dt in csv files'
        
    ds["time_off"] = ds["time_off"].where(ds.time_off>ds.time[0], other=ds.time[0])
    ds["time_off"] = ds["time_off"].where(ds.time_off<ds.time[-1], other=ds.time[-1])
    time_off = ds["time_off"].values
    ds_off = ds.interp(time=time_off)[['x', 'y', 'time_days']]
    ds_off["time_uniform"] = xr.DataArray(data=ds.time.data,dims=["time_uniform"])
    if not inplace : 
        return ds_off

In [17]:
## PARAMETERS

# timeline: 100 days with 10 min sampling
dt = 1/24
t = (100, dt)
# number of random draws
N = 10 

# use a common decorrelation timescale, no rationale
#T = [5,10,20,40]
T = 10

# velocity amplitudes
U_low = 0.3
U_ni = 0.2
U_2 = 0.05
U_1 = 0.02
tau_eta = 0.1 # short timescale
n_layers = 5 # number of layers

ds_true = lib.synthetic_traj(t, N , T, tau_eta, n_layers, U_low, U_ni, U_2, U_1)
ds_obs = irregular_time_sampling(ds_true, t, dt=1/24, offset_type='carthe', file=None, inplace=False)

ds_obs

In [4]:
## PARAMETERS

# timeline: 100 days with 10 min sampling
dt = 1/24
t = (100, dt)
# number of random draws
N = 10 

# use a common decorrelation timescale, no rationale
#T = [5,10,20,40]
T = 10

# velocity amplitudes
U_low = 0.3
U_ni = 0.2
U_2 = 0.05
U_1 = 0.02
tau_eta = 0.1 # short timescale
n_layers = 5 # number of layers

In [5]:
# Generate ds_true and ds_obs

ds_true = lib.synthetic_traj(t, N , T, U_low, U_ni, U_2, U_1, tau_eta, n_layers)

# Simulated observed trajectories
#position_noise = 100. # m
#ds_obs = irregular_time_sampling(ds_true,t,dt=1/24,offset_type='carthe')


In [22]:
def cyclic_selection(array, istart, replicate=1):
    cyclic_array = np.concatenate((array[istart:], array[0:istart]))
    if replicate ==1 : 
        return cyclic_array
    else : 
        return np.concatenate([cyclic_array]*replicate)
cyclic_selection(DT, 30, replicate=1)

array([ 600000000000,  300000000000,  301000000000, ...,  263000000000,
        589000000000, 1488000000000], dtype='timedelta64[ns]')

In [24]:
def time_from_dt_array(tstart, tend, dt):
    time_length = np.sum(dt)
    replicate = (tend-tstart)//time_length+1
    print(replicate)
    istart = int(30)#random.randrange(len(dt))
    dt_ = cyclic_selection(dt, istart, replicate)

    time = xr.DataArray(tstart.values + np.cumsum(dt))
    time = time.where(time<tend, drop=True)
    return time


<xarray.DataArray 'time' ()>
array(1, dtype=int64)
