In [4]:
import sys
sys.path.append('../../src/estimate_vorticity_from_data/')

from scipy.signal import butter, lfilter, freqz,filtfilt
from scipy import signal
from haversine import haversine
import scipy.io as sio
import numpy as np
import pandas as pd
from datetime import datetime,timedelta
import gsw
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(font_scale=1.3,context='poster',style='whitegrid')

In [5]:
def butter_lowpass(cutoff, fs, order=5):
    '''create butterworth filter for drifter trajectories'''
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def butter_lowpass_filter(data, cutoff, fs, order=5):
    '''run butterworth filter forward and backward to remove edge-effects'''
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = filtfilt(b, a, data)
    return y

def datenum2datetime(matlab_datenum):
    """Convert Matlab datenum to python datetime object
        at this point, only size-1 skalar can be converted.
    """
    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum%1) - timedelta(days = 366)

In [6]:
ddata = sio.loadmat('./asiri_RR1513_data.mat',squeeze_me=True,struct_as_record=False)['data']

In [7]:
lon=[];lat=[];ts=[];ids=[];uv=[];sal=[];sst=[]
for i,data in enumerate(ddata):
    lon.append(data.lon)
    lat.append(data.lat)
    sal.append(data.salinity)
    sst.append(data.sst)
    ts.append(data.ts)
    uv.append(data.uv)
    ids.append(np.ones_like(data.ts)*i)

In [8]:
dat=pd.DataFrame()
for i,data in enumerate(ddata):
    temp=pd.DataFrame({'sst':sst[i],'sal':sal[i],'lat':lat[i],'lon':lon[i],'uv':uv[i],'particle':i,'time':ts[i]})
    temp['time']=temp['time'].apply(datenum2datetime)
    temp.set_index('time',inplace=True)
    dat = pd.concat( [dat,temp] )

In [9]:
dat = dat.drop(dat[ ((dat.particle == 7) & (dat.index > '2015-11-20')) ].index)

In [10]:
p1 = dat[dat['particle']==1]
ff=gsw.f( np.nanmin( p1['lat'] ) )
Tinertial=  2*np.pi/ff # seconds
order = 6
#cutoff = 1/(1.2*Tinertial)  # desired cutoff frequency of the filter, Hz
cutoff = 1/(3*60*60)  # desired cutoff frequency of the filter, Hz
# 3h in seconds
fs = 1/(3600)

new=pd.DataFrame()
for i in range(45):
    p = dat[dat['particle']==i]
    p.drop(p[~np.isfinite(p.lat)].index,inplace=True)
    p.drop(p[~np.isfinite(p.lon)].index,inplace=True)
    #p.sort_index(ascending=False,inplace=True)
    temp = pd.DataFrame(index=p.index)
    #temp['uv_filt'] = butter_lowpass_filter(p.uv, cutoff, fs, order)
    temp['lat_filt'] = butter_lowpass_filter(p.lat, cutoff, fs, order)
    temp['lon_filt'] = butter_lowpass_filter(p.lon, cutoff, fs, order)
    temp['particle'] = i
    temp['uv'] = p['uv']
    temp['lon'] = p['lon']
    temp['lat'] = p['lat']
    temp['sst'] = p['sst']
    temp['sal'] = p['sal']
    new= pd.concat([new,temp])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
  b = a[a_slice]


In [15]:
def latlon2uv_dir(lon,lat,direction):
    ''' forward/backward differences
    
        convert time series of lon and lat into time series of u and v
        using: haversine
        
        lat,lon: pandas columns,degrees
        u,v: vectores, m/s
    '''
    dt = 60*60 #now in second
    lon2 = lon.reindex(index=np.roll(lon.index,-1)).values
    lat2 = lat.reindex(index=np.roll(lat.index,-1)).values
    lon=lon.values
    lat=lat.values
    if direction=='back':
        lon=np.flipud(lon)
        lon2=np.flipud(lon2)
        lat=np.flipud(lat)
        lat2=np.flipud(lat2)
        
    dr = np.array( [haversine([lon[i],lat[i]],[lon2[i],lat2[i]]) for i in range(len(lon))] )

    xx=np.sin(np.deg2rad(lon2-lon))*np.cos(np.deg2rad(lat2))
    yy=np.cos(np.deg2rad(lat))*np.sin(np.deg2rad(lat2))-np.sin(np.deg2rad(lat))*np.cos(np.deg2rad(lat2))*np.cos(np.deg2rad(lon-lon))

    gamma=np.arctan2(yy,xx)
    
    if direction=='back':
        dr = np.flipud(dr)
        gamma = np.flipud(gamma)
    
    c=1000;
    u=c*dr/dt*np.cos(gamma)
    v=c*dr/dt*np.sin(gamma)
    return u,v

def latlon2uv(lon,lat):
    '''apply both forward and backward differences to get centered difference'''
    u1,v1 = latlon2uv_dir(lon,lat,'for')
    u2,v2 = latlon2uv_dir(lon,lat,'back')
    
    # correct the edges!
    u1[-1]=u2[0]
    v1[-1]=v2[0]
    u2[-1]=u1[0]
    v2[-1]=v1[0]
    
    uv = 0.5*(u1+u2) + 1j*0.5*(v1+v2)
    return uv

In [16]:
total=pd.DataFrame()
for i in range(45):
    #print(i)
    temp=new[new.particle==i]
    lon=temp.lon_filt
    lat=temp.lat_filt
    uv=latlon2uv(lon,lat)
    temp['uv_filt']=uv
    total = pd.concat([total,temp])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
def downsample_trajectories(df):
    alldata=pd.DataFrame([]) # empty dataframe
    for i in range(len(df.particle.unique())): 
        # loop over drifters
        temp_df = df[df.particle==i] 
        tempp = temp_df.resample('1h').bfill(limit=1).interpolate('pchip') # if resampling has gaps, fill with last entry!
        # reindexing not necessary
        tempp.loc[:,'time']=tempp.index
        tempp.set_index('particle',inplace=True)
        tempp.loc[:,'particle']=tempp.index
        # concat multiple drifters
        alldata = pd.concat( [alldata,tempp] )
    return alldata

In [18]:
alldata = downsample_trajectories(total)

In [19]:
alldata['particle'] = np.round(alldata.particle).astype(np.int32)

In [20]:
alldata.set_index('particle',inplace=True)

In [21]:
def mem_usage(pandas_obj):
    if isinstance(pandas_obj,pd.DataFrame):
        usage_b = pandas_obj.memory_usage(deep=True).sum()
    else: # we assume if not a df it's a series
        usage_b = pandas_obj.memory_usage(deep=True)
    usage_mb = usage_b / 1024 ** 2 # convert bytes to megabytes
    return "{:03.2f} MB".format(usage_mb)

In [22]:
# optimize storage data type if necessary (already adjusted in least square script)    
df_float = alldata.select_dtypes(include=['float'])
converted_float = df_float.apply(pd.to_numeric,downcast='float')
optimized_df = alldata.copy()
optimized_df[converted_float.columns] = converted_float

In [23]:
print( len( optimized_df.index.unique() ) )
optimized_df.drop(optimized_df[~np.isfinite(optimized_df.lat)].index,inplace=True)
optimized_df.to_pickle('posveldata_3h.pkl')

45


In [24]:
ls -la -h

total 75744
drwxr-xr-x@ 11 sebastian  staff   352B Dec 13 01:04 [34m.[m[m/
drwxr-xr-x@  6 sebastian  staff   192B Sep  6 14:16 [34m..[m[m/
drwxr-xr-x@  3 sebastian  staff    96B Dec 13 00:58 [34m.ipynb_checkpoints[m[m/
-rw-r--r--@  1 sebastian  staff    16M Oct 24  2017 asiri_RR1513_data.mat
-rw-r--r--@  1 sebastian  staff   5.5M Dec  6  2016 data_hourly_120616.mat
-rw-r--r--@  1 sebastian  staff   3.7M Oct 26  2017 posdata.pkl
-rw-r--r--@  1 sebastian  staff   5.1M Nov  9  2017 posveldata.pkl
-rw-r--r--@  1 sebastian  staff   7.6M Dec 13 01:04 posveldata_3h.pkl
-rw-r--r--@  1 sebastian  staff   8.8M Mar 27  2018 posveldata_all.pkl
-rw-r--r--@  1 sebastian  staff   7.2M Dec  2  2017 posveldata_filt.pkl
-rw-r--r--@  1 sebastian  staff    14K Dec 13 01:04 read_filter_bin_pandas_data.ipynb


In [25]:
pwd

'/Users/sebastian/Dropbox (MIT)/jgr_deformation/data/drifter'

In [2]:
!jupyter nbconvert --to script *.ipynb

[NbConvertApp] Converting notebook read_filter_bin_pandas_data.ipynb to script
[NbConvertApp] Writing 6401 bytes to read_filter_bin_pandas_data.py
