In [None]:
ais_gpkg = '../../data/gpkg/mssis-ais-records.gpkg'
traj_out_geojson = '../../data/processed/timestamped-trajectory.geojson'
traj_out_gpkg = '../../data/gpkg/timestamped-trajectory.gpkg'

In [None]:
import seaconex

import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import json
import random

from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt


import warnings
warnings.simplefilter("ignore")

In [None]:
mpd.__version__

In [None]:
pd.set_option("max_columns", None)

## Loading the AIS .gpkg data

In [None]:
%%time

gdf_ais = gpd.read_file(ais_gpkg)
wgs84 = gdf_ais.crs

print("Finished reading {}".format(len(gdf_ais)))

In [None]:
gdf_ais.head()

In [None]:
gdf_ais.info()

In [None]:
gdf_ais.plot()

In [None]:
gdf_ais.groupby(['mssis_eez_country_name', 'mssis_ao', 'wpi_port_name']).size().reset_index().rename(columns={0:'ais_count'}).sort_values('ais_count', ascending=False)

In [None]:
gdf_ais['t'] = pd.to_datetime(gdf_ais['ais_time'], format='%Y-%m-%d %H:%M:%S')
gdf_ais = gdf_ais.set_index('t')

In [None]:
gdf_ais.info()

In [None]:
gdf_ais['ais_sog'].hist(bins=100, figsize=(15,3))

In [None]:
gdf_ais.info()

In [None]:
print("Reduced to {} rows after removing speed records > 5".format(len(gdf_ais)))
gdf_ais['ais_sog'].hist(bins=100, figsize=(15,3))

In [None]:
print("Original size: {} rows".format(len(gdf_ais)))
gdf_ais = gdf_ais[gdf_ais.ais_sog>0]
print("Reduced to {} rows after removing 0 speed records".format(len(gdf_ais)))
gdf_ais['ais_sog'].hist(bins=100, figsize=(15,3))

In [None]:
wgs84 = gdf_ais.crs

In [None]:
%%time

MIN_LENGTH = 5 # meters
traj_collection = mpd.TrajectoryCollection(gdf_ais, 'vessel_mmsi', min_length=MIN_LENGTH)
print("Finished creating {} trajectories".format(len(traj_collection)))

In [None]:
%%time
traj_collection = mpd.MinTimeDeltaGeneralizer(traj_collection).generalize(tolerance=timedelta(minutes=1))

In [None]:
# https://stackoverflow.com/questions/28999287/generate-random-colors-rgb
# import matplotlib.pyplot as plt


mmsi_colors = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(gdf_ais.vessel_mmsi.unique().size)]
mmsi_list = gdf_ais.vessel_mmsi.unique().tolist()
mmsi_to_color = dict(zip(mmsi_list, mmsi_colors))

In [None]:
%%time
traj_collection.plot(column='vessel_mmsi', column_to_color=mmsi_to_color, linewidth=1, capstyle='round')

In [None]:
%%time
traj_collection.hvplot(title='vessel_mmsi', line_width=2)

In [None]:
%%time

gdf_ais[gdf_ais['wpi_port_name']!= ""]['wpi_port_name'].value_counts().plot(kind='bar', figsize=(15,3))
# gdf_ais.loc[gdf_ais['wpi_port_name'].str.len() > -1].value_counts().plot(kind='bar', figsize=(15,3))

In [None]:
%%time
gdf_ais['vessel_name'].value_counts().plot(kind='bar', figsize=(15,3))

In [None]:
%%time
gdf_ais['mssis_eez_country_name'].value_counts().plot(kind='bar', figsize=(15,3))

In [None]:
%%time
traj_collection.hvplot(
#     title='Trajectory {}'.format(str(independent_pursuit.id)), 
    height=300, 
    line_width=5.0, 
    c='ais_sog', 
    cmap='Dark2'
) 

In [None]:
for traj in traj_collection:
    traj.add_speed
#     print(traj.to_linestring())

In [None]:
trips = mpd.ObservationGapSplitter(traj_collection).split(gap=timedelta(days=1))
# print("Extracted {} individual trips from {} continuous vessel tracks".format(len(independent_pursuit_trips), len(independent_pursuit)))

In [None]:
len(trips)

In [None]:
trips.hvplot(title='trips', line_width=2)

In [None]:
gdf_ais.crs

In [None]:
traj_collection.trajectories[0].df

In [None]:
def traj_to_timestamped_geojson(trajectory_collection):
    features = []
    
    for trajectory in traj_collection.trajectories:
    
        df = trajectory.df.copy()
        df["previous_geometry"] = df["geometry"].shift()
        df["time"] = df.index
        df["previous_time"] = df["time"].shift()
        df["previous_ais_sog"] = df["ais_sog"].shift()
        df["previous_ais_heading"] = df["ais_heading"].shift()
        
        for _, row in df.iloc[1:].iterrows():
            coordinates = [
                [
                    row["previous_geometry"].xy[0][0],
                    row["previous_geometry"].xy[1][0]
                ],
                [
                    row["geometry"].xy[0][0],
                    row["geometry"].xy[1][0]
                ]
            ]
            times = [row["previous_time"].isoformat(), row["time"].isoformat()]
            sogs = [row["previous_ais_sog"], row["ais_sog"]]
            headings = [row["previous_ais_heading"], row["ais_heading"]]
            data = row.to_dict()
            data.pop('geometry', None)
            features.append(
                {
                    "type": "Feature",
                    "geometry": {
                        "type": "LineString",
                        "coordinates": coordinates,
                    },
                    "properties": {
                        "times": times,
                        "ais_sog":sogs,
                        "headings":headings,
#                          'mssis_wpi',
#                          'mssis_eez',
#                          'mssis_ao',
                        "vessel_mmsi": row["vessel_mmsi"],
                        "vessel_name": row["vessel_name"],
                        "carrier": row["carrier"]
                    },
                }
            )
    return features

In [None]:
geojson = {
  "type": "FeatureCollection",
    "crs": { 
        "type": "name", 
        "properties": { 
#             "name": "urn:ogc:def:crs:EPSG::3857" 
            "name": "urn:ogc:def:crs:EPSG::4326"
        } 
    },
  "features": traj_to_timestamped_geojson(traj_collection)
}

In [None]:
# for f in features:
#     geojson = {
#       "type": "FeatureCollection",
#       "features": features
#     }
    
#     out.append(geojson)

In [None]:
# with open(out_path + '.json', 'w') as json_file:
#     json.dump(geojson, json_file, indent=2)

In [None]:
with open(traj_out_geojson, 'w') as json_file:
    json.dump(geojson, json_file)

In [None]:
# gpd.read_file(traj_out_geojson)

In [None]:
traj_collection = mpd.MinTimeDeltaGeneralizer(traj_collection).generalize(tolerance=timedelta(minutes=1))

## Plotting trajectories

Let's give the most common ship types distinct colors. The remaining ones will be just grey:

In [None]:
shiptype_to_color = {'Container': 'blue', 'ConRo': 'red'}
traj_collection.plot(column='vessel_type', column_to_color=shiptype_to_color, linewidth=1, capstyle='round')

In [None]:
traj_collection.hvplot(title='All', line_width=2)

## Identifying trip origins and destinations

Since AIS records with a speed over ground (SOG) value of zero have been removed from the dataset, we can use the `split_by_observation_gap()` function to split the continuous observations into individual trips:

In [None]:
trips = mpd.ObservationGapSplitter(traj_collection).split(gap=timedelta(days=0.5))
print("Extracted {} individual trips from {} continuous vessel tracks".format(len(trips), len(traj_collection)))

Let's plot the resulting trips!

In [None]:
trips.hvplot(title='Trips, 12 hr observation gap', line_width=2)

Compared to plotting the original continuous observations, this visualization is much cleaner because there are no artifacts at the border of the area of interest. 

Next, let's get the trip origins:

In [None]:
origins = trips.get_start_locations()
origins.hvplot(title='Trip origins by ship type', c='vessel_name', geo=True, tiles='OSM')

In [None]:
origins.hvplot(title='Origins by speed', c='ais_sog', geo=True, tiles='OSM')

## Clustering origins

To run this section, you need to have the scikit-learn package installed. 

In [None]:
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from shapely.geometry import MultiPoint

In [None]:
origins = trips.get_start_locations()
origins['lat'] = origins.geometry.y
origins['lon'] = origins.geometry.x
matrix = origins[['lat','lon']].values

In [None]:
kms_per_radian = 6371.0088
epsilon = 0.1 / kms_per_radian

In [None]:
db = DBSCAN(eps=epsilon, min_samples=1, algorithm='ball_tree', metric='haversine').fit(np.radians(matrix))
cluster_labels = db.labels_
num_clusters = len(set(cluster_labels))
clusters = pd.Series([matrix[cluster_labels == n] for n in range(num_clusters)])
print('Number of clusters: {}'.format(num_clusters))

In [None]:
origins['cluster'] = cluster_labels

In [None]:
def get_centermost_point(cluster):
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    return Point(tuple(centermost_point)[1], tuple(centermost_point)[0])
centermost_points = clusters.map(get_centermost_point)

In [None]:
origins.hvplot(title='Clustered origins', c='cluster', geo=True, tiles='OSM', cmap='glasbey_dark')

In [None]:
origins_by_cluster = pd.DataFrame(origins).groupby(['cluster'])
summary = origins_by_cluster['vessel_type'].unique().to_frame(name='types')
summary['n'] = origins_by_cluster.size()
summary['symbol_size'] = summary['n']*10 # for visualization purposes
summary['sog'] = origins_by_cluster['ais_sog'].mean()
summary['geometry'] = centermost_points
summary = summary[summary['n']>1].sort_values(by='n', ascending=False)
summary.head()

In [None]:
cluster_of_interest_id = 117

origins[origins['cluster']==cluster_of_interest_id].hvplot(
    title='Cluster {}'.format(cluster_of_interest_id), 
    c='vessel_type', 
    geo=True, 
    tiles='OSM', 
    height=500
)

In [None]:
(
    trips.hvplot(
        title='Origin clusters by speed', 
        color='gray', 
        line_width=1
    ) *
    gpd.GeoDataFrame(
        summary, 
        crs=wgs84
    ).hvplot(
        c='ais_sog', 
        size='symbol_size', 
        geo=True,  
        cmap='RdYlGn'
    )
)