# Pre-Process MSSIS AIS

Pre-processing of the Bermuda Case Study MSSIS vessel history dateset provided by Volpe from CSV to GeoJSON and GPKG.

In [None]:
file = '../data/raw/volpe-bermuda-mssis-vessel-history.csv'

In [None]:
out_path = '../data/interim/'

## volpe-bermuda-mssis-vessel-history.csv

In [None]:
import pandas as pd
import geopandas
import json
import fiona

from shapely.geometry import Point

In [None]:
!head '-n 5' $file

In [None]:
df = pd.read_csv(file, sep=',',)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isna().any()

In [None]:
df.EEZ.unique()

UN/LOCODE, the United Nations Code for Trade and Transport Locations assigns 'XZ' for international waters.

https://unece.org/trade/cefact/unlocode-code-list-country-and-territory

In [None]:
df.EEZ.fillna('XZ', inplace=True)

In [None]:
df.PortIndex.unique()

In [None]:
df.AO.unique()

In [None]:
df.SourceNum.unique()

In [None]:
df.RxStnId.unique()

In [None]:
df.drop(columns=['SourceNum', 'RxStnId'], inplace=True)

In [None]:
df['TimeOfFix'] = pd.to_datetime(df['TimeOfFix'], unit='s')

In [None]:
df.head()

In [None]:
df[['MMSI', 'PortIndex']] = df[['MMSI','PortIndex']].astype('uint32')

In [None]:
df[['AO']] = df[['AO']].astype('uint8')

In [None]:
df.info()

In [None]:
geometry = geopandas.points_from_xy(df.Longitude, df.Latitude)

In [None]:
df.drop(columns=['Longitude', 'Latitude'], inplace=True)

In [None]:
df.rename(columns = {'MMSI': 'vessel_mmsi', 'TimeOfFix': 'ais_time', 'SOG': 'ais_sog', 'Heading': 'ais_heading', 'PortIndex': 'mssis_wpi', 'EEZ': 'mssis_eez', 'AO': 'mssis_ao'}, inplace=True)

In [None]:
df.info()

In [None]:
gdf = geopandas.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

In [None]:
gdf.head()

In [None]:
gdf = gdf.rename(columns = {'geometry': 'ais_geometry'}).set_geometry('ais_geometry')

In [None]:
gdf.info()

In [None]:
gdf.crs

In [None]:
gdf.head()

In [None]:
mssis_ais_records_location_count = gdf.groupby(['mssis_wpi','mssis_eez', 'mssis_ao']).size().reset_index().rename(columns={0:'ais_count'}).to_dict(orient='records')

In [None]:
len(mssis_ais_records_location_count)

In [None]:
len(gdf.mssis_wpi.unique().tolist())

In [None]:
mssis_ais_records_location_count

In [None]:
gdf.groupby(['mssis_wpi','mssis_eez', 'mssis_ao']).size().reset_index().rename(columns={0:'ais_count'}).to_json(out_path + 'mssis-ais-region_v1.json', orient='records')

In [None]:
gdf.to_file(out_path + 'mssis-ais-records_v1.geojson', driver='GeoJSON')

In [None]:
gdf.to_file(out_path + 'mssis-ais-records_v1.gpkg', driver='GPKG')