# Extracting locations

In [None]:
%matplotlib inline

In [None]:
import urllib
import os
import numpy as np
import pandas as pd
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon, MultiPoint
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle

import sys
sys.path.append("..")
import movingpandas as mpd

import warnings
warnings.simplefilter("ignore")

In [None]:
df = read_file('data/demodata_ais.gpkg')
df['t'] = pd.to_datetime(df['Timestamp'], format='%d/%m/%Y %H:%M:%S')
df = df.set_index('t')

In [None]:
print("Original size: {} rows".format(len(df)))
df = df[df.SOG>0]
print("Reduced to {} rows after removing 0 speed records".format(len(df)))

In [None]:
MIN_LENGTH = 100 # meters
traj_collection = mpd.TrajectoryCollection(df, 'MMSI', MIN_LENGTH)
print("Finished creating {} trajectories".format(len(traj_collection)))

In [None]:
traj_collection.min_length = 100
trips = traj_collection.split_by_observation_gap(timedelta(minutes=5))
print("Extracted {} individual trips from {} continuous vessel tracks".format(len(trips), len(traj_collection)))

In [None]:
trips.hvplot()

## OD Matrix

In [None]:
kms_per_radian = 6371.0088
EPSILON = 0.1 / kms_per_radian

In [None]:
def make_od_line(row, od_clusters):
    return LineString([od_clusters.loc[row['od'][0]].geometry, od_clusters.loc[row['od'][-1]].geometry])

def get_centermost_point(cluster):
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    return Point(tuple(centermost_point)[1], tuple(centermost_point)[0])

def extract_od_gdf(trips):
    origins = trips.get_start_locations(['SOG', 'ShipType'])
    origins['type'] = '0'
    destinations = trips.get_end_locations(['SOG', 'ShipType'])
    destinations['type'] = '1'
    od = origins.append(destinations)
    od['lat'] = od.geometry.y
    od['lon'] = od.geometry.x
    return od

def dbscan_cluster_ods(od_gdf, eps):
    matrix = od_gdf.as_matrix(columns=['lat', 'lon'])
    db = DBSCAN(eps=eps, min_samples=1, algorithm='ball_tree', metric='haversine').fit(np.radians(matrix))
    cluster_labels = db.labels_
    num_clusters = len(set(cluster_labels))
    clusters = pd.Series([matrix[cluster_labels == n] for n in range(num_clusters)])
    return cluster_labels, clusters

def extract_od_clusters(od_gdf, eps):    
    cluster_labels, clusters = dbscan_cluster_ods(od_gdf, eps)
    od_gdf['cluster'] = cluster_labels
    od_by_cluster = pd.DataFrame(od_gdf).groupby(['cluster'])
    clustered = od_by_cluster['ShipType'].unique().to_frame(name='types')
    clustered['n'] = od_by_cluster.size()
    clustered['symbol_size'] = clustered['n']*10 # for visualization purposes
    clustered['sog'] = od_by_cluster['SOG'].mean()
    clustered['geometry'] = clusters.map(get_centermost_point) 
    clustered = clustered[clustered['n']>0].sort_values(by='n', ascending=False)
    return clustered
    
def extract_od_matrix(trips, eps, directed=True):
    od_gdf = extract_od_gdf(trips)
    matrix_nodes = extract_od_clusters(od_gdf, eps)
    od_by_traj_id = pd.DataFrame(od_gdf).sort_values(['type']).groupby(['traj_id']) # Groupby preserves the order of rows within each group.
    od_by_traj_id = od_by_traj_id['cluster'].unique().to_frame(name='clusters')  # unique() preserves input order according to https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.unique.html
    if directed: 
        od_matrix = od_by_traj_id.groupby(od_by_traj_id['clusters'].apply(tuple)).count().rename({'clusters':'n'}, axis=1)
    else:
        od_matrix = od_by_traj_id.groupby(od_by_traj_id['clusters'].apply(sorted).apply(tuple)).count().rename({'clusters':'n'}, axis=1)
    od_matrix['od'] = od_matrix.index
    od_matrix['geometry'] = od_matrix.apply(lambda x: make_od_line(row=x, od_clusters=matrix_nodes), axis=1 )
    return od_matrix, matrix_nodes
    
od_matrix, matrix_nodes = extract_od_matrix(trips, EPSILON*2, directed=True)

( GeoDataFrame(od_matrix).hvplot(title='OD flows', geo=True, tiles='OSM', hover_cols=['n'], line_width='n', alpha=0.5) *
  GeoDataFrame(matrix_nodes).hvplot(c='sog', size='symbol_size', hover_cols=['cluster', 'n'], geo=True,  cmap='RdYlGn')
)