In [None]:
ais_file = '../data/raw/volpe-bermuda-mssis-vessel-history.csv'
vessel_file = '../data/raw/vessels.csv'
wpi_gpkg = '../data/gpkg/nga-wpi.gpkg'
ais_geojson = '../data/processed/mssis-ais-records.geojson'
ais_gpkg = '../data/gpkg/mssis-ais-records.gpkg'
ais_bad_pts_geojson = '../data/processed/mssis-ais-records-bad-pts.geojson'
ais_bad_pts_gpkg = '../data/gpkg/mssis-ais-records-bad-pts.gpkg'

In [None]:
import seaconex

import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import matplotlib.pyplot as plt

from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from shapely.geometry import Point

import pycountry
import json
import fiona
import warnings
warnings.simplefilter("ignore")

In [None]:
mpd.__version__

## If Running the first time, uncomment the line below to download and convert the NGA WPI data

In [None]:
# seaconex.get_all_nga_wpi()

In [None]:
%%time

df_ais = pd.read_csv(
    ais_file, 
    sep=','
).drop(
    columns=[
        'SourceNum', 
        'RxStnId', 
        'SourceNum'
    ]
).rename(
    columns = {
        'MMSI': 'vessel_mmsi', 
        'TimeOfFix': 'ais_time', 
        'SOG': 'ais_sog', 
        'Heading': 'ais_heading', 
        'PortIndex': 'mssis_wpi', 
        'EEZ': 'mssis_eez_country_code', 
        'AO': 'mssis_ao',
        'Latitude': 'latitude',
        'Longitude': 'longitude'
    }
)

print("Finished reading {}".format(len(df_ais)))

In [None]:
df_ais.mssis_eez_country_code.unique().tolist()

UN/LOCODE, the United Nations Code for Trade and Transport Locations assigns 'XZ' for international waters.

https://unece.org/trade/cefact/unlocode-code-list-country-and-territory

In [None]:
df_ais['mssis_eez_country_code'].fillna('XZ', inplace=True)

In [None]:
# https://stackoverflow.com/questions/53923433/how-to-get-country-name-from-country-abbreviation-in-python-with-mix-of-alpha-2
def country_name(x):
    try:
        if x == 'XZ':
            return 'International Waters'
        if len(x)==2:
            return pycountry.countries.get(alpha_2=x).name
        elif len(x)==3:
            return pycountry.countries.get(alpha_3=x).name
    except:
        'Invalid Code'

In [None]:
df_ais['mssis_eez_country_name'] = df_ais['mssis_eez_country_code'].apply(country_name)

In [None]:
df_ais['ais_time'] = pd.to_datetime(df_ais['ais_time'], unit='s')

In [None]:
df_ais.head()

In [None]:
%time

df_vessel = pd.read_csv(
    vessel_file, 
    sep=','
).rename(
    columns={
        'carrier_id_fk':'carrier'
    }
).fillna(0)

print("Finished reading {}".format(len(df_vessel)))

In [None]:
cols_roro_cap = ['vessel_capacity_vehicle_units', 'vessel_stern_ramp_capacity_tons']

df_vessel[cols_roro_cap] = df_vessel[cols_roro_cap].astype('int')

In [None]:
df_vessel.head()

In [None]:
%time

cols_vessel = [
#     'vessel_imo',
    'vessel_name',
    'vessel_mmsi',
    'vessel_build_year',
    'vessel_gross_tonnage',
    'vessel_type',
    'vessel_flag_country',
    'vessel_capacity_teu',
    'vessel_capacity_vehicle_units',
    'vessel_stern_ramp_capacity_tons',
    'carrier'
]

df_ais = pd.merge(
    left=df_ais,
    right=df_vessel[cols_vessel],
    how='left',
    on='vessel_mmsi',
).fillna("")

print("Finished merging {}, {}".format(len(df_ais), len(df_vessel)))

In [None]:
geometry = gpd.points_from_xy(df_ais.longitude, df_ais.latitude)

In [None]:
df_ais.drop(
    columns=['longitude', 'latitude'], 
    inplace=True
)

In [None]:
%time

gdf_ais = gpd.GeoDataFrame(
    df_ais, 
    geometry=geometry, 
    crs="EPSG:4326"
)
print("Finished reading {}".format(len(gdf_ais)))

In [None]:
gdf_ais.crs

In [None]:
gdf_ais.info()

In [None]:
%%time

gdf_wpi = gpd.read_file(wpi_gpkg)
wgs84 = gdf_wpi.crs

print("Finished reading {}".format(len(gdf_wpi)))

In [None]:
gdf_wpi.head()

In [None]:
gdf_wpi.info()

In [None]:
gdf_wpi['INDEX_NO'] = gdf_wpi['INDEX_NO'].astype('int')

In [None]:
cols_wpi = [
    'INDEX_NO',
    'PORT_NAME'
]

gdf_ais = pd.merge(
    left=gdf_ais, 
    right=gdf_wpi[cols_wpi], 
    how='left',
    left_on='mssis_wpi', 
    right_on='INDEX_NO'
).rename(
    columns = {
        'PORT_NAME': 'wpi_port_name'
    }
).fillna("")

# gdf_ais['wpi_port_name'].fillna(
#     value="", 
#     inplace=True
# )

gdf_ais.drop(
    columns=[
        'INDEX_NO'
    ],
    inplace=True
)

In [None]:
gdf_ais.info()

In [None]:
gdf_ais

In [None]:
gdf_very_bad_pts = gdf_ais.loc[(gdf_ais['mssis_eez_country_code']=='ATA') | (gdf_ais['mssis_eez_country_code']=='MAR')]

In [None]:
gdf_very_bad_pts

In [None]:
seaconex.gdf_to_geo_file(
    gdf_very_bad_pts,
    ais_bad_pts_geojson,
    ais_bad_pts_gpkg
)

In [None]:
gdf_ais = gdf_ais.loc[(gdf_ais['mssis_eez_country_code']!='ATA') | (gdf_ais['mssis_eez_country_code']!='MAR')]

In [None]:
seaconex.gdf_to_geo_file(
    gdf_ais,
    ais_geojson,
    ais_gpkg
)