# MobiML Nautilus Demo

In [None]:
import os
import sys
import pandas as pd

sys.path.append("..")
from mobiml.datasets import BrestAIS, TIMESTAMP, MOVER_ID, TRAJ_ID
from mobiml.preprocessing import TrajectorySubsampler, TrajectoryFilter, TrajectoryEnricher, TrajectorySplitter

## Loading Brest / Nari Data

### Dynamic

In [None]:
%%time
ais = BrestAIS(r"/mnt/c/Users/GraserA/Documents/Zenodo/Integrated Maritime/nari_dynamic.csv", filter_mid=True, nrows=10000)
ais.df.head()

### Static

In [None]:
df_static = pd.read_csv(r"/mnt/c/Users/GraserA/Documents/Zenodo/Integrated Maritime/nari_static.csv")
df_static = df_static.sort_values('t') \
    .dropna(subset=['shiptype']) \
    .drop_duplicates(subset=['sourcemmsi'], keep='last')[['sourcemmsi', 'shiptype']]
df_static

## Preprocessing 

### Subsample Trajectories with $\Delta t_{min}$

In [None]:
%%time
ais = TrajectorySubsampler(ais).subsample(min_dt_sec=10)

print(f'[Subsampling] Dataset AIS Positions: {len(ais.df)}')
print(f'{ais.df.sort_values(TIMESTAMP).groupby(MOVER_ID)[TIMESTAMP].diff().dt.total_seconds().describe().astype(str)=}')

### Drop Trajectories with less than $Points_{min}$ Locations

In [None]:
%%time
ais = TrajectoryFilter(ais).filter_min_pts(min_pts=20)

print(f'[Trajectory Pruning] Dataset AIS Positions: {len(ais.df)}')

### Re-calculate Speed and Course over Ground

In [None]:
ais.df

In [None]:
%%time
ais = TrajectoryEnricher(ais).add_speed(units=('nm','h'), overwrite=True)
ais = TrajectoryEnricher(ais).add_direction(name='courseoverground', overwrite=True)

In [None]:
ais.df

### Drop Speed Outliers 

In [None]:
%%time
ais = TrajectoryFilter(ais).filter_speed(min_speed=1, max_speed=50)
print(f'[Speed Outliers] Dataset AIS Positions: {len(ais.df)}')
print(f'{ais.df["speed"].describe().round(5).astype(str)=}')

### Temporal Segmentation / Splitting Trajectories

In [None]:
%%time
from datetime import timedelta
ais = TrajectorySplitter(ais).split(observation_gap=timedelta(minutes=30))
ais = TrajectoryFilter(ais).filter_min_pts(min_pts=10)
print(f'[Temporal Segmentation] Dataset AIS Positions: {len(ais.df)}')

In [None]:
ais.df.groupby([MOVER_ID, TRAJ_ID]).apply(len).sort_values()

### Save Results

In [None]:
ais.df.sort_values(TIMESTAMP).to_csv(
    os.path.join('data/nautilus_trajectories_preprocessed.csv'),
    index=True, 
    header=True
)

## Training