In [None]:
%matplotlib inline

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
from cycler import cycler

In [None]:
INPUT_FILE = 'ships_direction_and_speed_icoads_201101_201111.txt'
INPUT_PATH = 'in/'
INPUT = INPUT_PATH + INPUT_FILE
dtype = [np.int32] + [(np.str_, 8)] + [(np.str_, 6)] + [np.float32]*3
usecols = [0,3,4,5,6,7]
names = ['timeseries_index', 'date', 'time', 'lat', 'lon', 'obs']
skip_header = 1

In [None]:
raw_data = pd.DataFrame(np.genfromtxt(INPUT, usecols=usecols, dtype=dtype, names=names, skip_header=skip_header))

In [None]:
def create_timestamp(data):
    date = data['date']
    time = ('000000' + data['time']).str[-6:]
    timestamp = date + time
    data['timestamp'] = pd.to_datetime(timestamp, format='%Y%m%d%H%M%S')
    data.drop(['date','time'], axis=1, inplace=True)
    
    return data

In [None]:
def df_dot_product(u,v):   
    return u*u.shift(-1) + v*v.shift(-1)

In [None]:
def compute_deltas(data):
    # compute deltas
    data['delta_lat'] = data['lat'].shift(-1) - data['lat']
    data['delta_lon'] = data['lon'].shift(-1) - data['lon']
    
    # normalize deltas
    d_lat = data.delta_lat
    d_lon = data.delta_lon
    data['delta_lat_norm'] = d_lat / np.sqrt(d_lat**2 + d_lon**2)
    data['delta_lon_norm'] = d_lon / np.sqrt(d_lat**2 + d_lon**2)
    
    # fast compute angle between two segments
    d_lat_norm = data.delta_lat_norm
    d_lon_norm = data.delta_lon_norm
    data['angle'] = np.degrees(np.arccos(np.clip(df_dot_product(d_lat_norm, d_lon_norm), -1.0, 1.0)))
    
    return data

In [None]:
atlantic = raw_data[(raw_data['lon'] <= 0) & (raw_data['lon'] >= -90)]
atlantic = create_timestamp(atlantic)
atlantic = atlantic.groupby(['timeseries_index','timestamp'])[['lon','lat']].mean()
atlantic = compute_deltas(atlantic)
atlantic = atlantic.reset_index()
atlantic[:10]

In [None]:
def get_tracks(data, thresh):
    timestamps = data['timestamp']
    deltas = timestamps.shift(-1) - timestamps
    mask = deltas > thresh
    mask = np.absolute(mask.shift(+1) + mask)
    data['track'] = mask.cumsum()   
    return data

In [None]:
def get_subtracks(data, thresh):  
    mask = ((np.absolute(data['delta_lat']) > thresh) | (np.absolute(data['delta_lon']) > thresh))
    mask = np.absolute(mask.shift(+1) + mask)
    data['subtrack'] = mask.cumsum()
    return data

In [None]:
def track_span(x):
    return np.sqrt((x['lon'].max() - x['lon'].min())**2 + (x['lat'].max() - x['lat'].min())**2)

In [None]:
def track_density(x):
    return len(x)/(np.sqrt((x['lon'].max() - x['lon'].min())**2 + (x['lat'].max() - x['lat'].min())**2))

In [None]:
def drop_short_tracks(data, thresh):
    return data.groupby('track').filter(lambda x: x['timestamp'].count() > thresh)

In [None]:
def drop_vertical_tracks(data, thresh):
    return data.groupby('track').filter(lambda x: (x['lon'].max() - x['lon'].min()) > thresh)

In [None]:
def drop_sparse_tracks(data, thresh):
    return data.groupby('track').filter(lambda x: track_density(x) > thresh)

In [None]:
def drop_outsiders(data, thresh):
    mask = ~((np.absolute(data['delta_lat']) > thresh) | (np.absolute(data['delta_lon']) > thresh))
    data = data[mask]
    return data

In [None]:
def create_canvas():
    fig = plt.figure(figsize=[60,20])
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.add_feature(
        cartopy.feature.LAND,
        zorder=100, 
        edgecolor=[0.2,0.2,0.2], 
        facecolor=[0.5,0.5,0.5])
    return fig,ax

In [None]:
def filt(data):
    _data = get_tracks(data, np.timedelta64(3,'D'))
    _data = get_subtracks(data, 2)
    _data = drop_short_tracks(data, 1000)
    _data = drop_vertical_tracks(data, 60)
    _data = drop_sparse_tracks(data, 20)
    return _data

In [None]:
# plot raw data 
fig, ax = create_canvas()
data = atlantic
data.plot('lon','lat', ax=ax)
ax.set_global()

In [None]:
# plot filtered data
fig, ax = create_canvas()
_data = filt(atlantic)
_data.groupby(['track','subtrack']).plot('lon','lat', ax=ax)
ax.set_global()

#cmap='jet'
#cm = plt.get_cmap(cmap)
#colors = cm(np.arange(0, atlantic['track'].max()))
#plt.rc('axes', prop_cycle=(cycler('color', colors)))
#ax.set_prop_cycle(cycler('color', colors))
#grouped = atlantic.groupby(['timeseries_index','timestamp']).mean()
#for track, color in zip(tracks, colors):
    #plt.plot(track['lon'], track['lat'], color=color, marker='o')