# MobiML Nautilus Demo

In [None]:
import os
import sys
import pandas as pd
from torch.utils.data import DataLoader

sys.path.append("..")
from mobiml.datasets import BrestAIS, TIMESTAMP, MOVER_ID, TRAJ_ID, PreprocessedBrestAIS
from mobiml.preprocessing import TrajectorySubsampler, TrajectoryFilter, TrajectoryEnricher, TrajectorySplitter
from mobiml.loaders import TemporalSplitter
from mobiml.transforms import DeltaDatasetCreator

## Loading Brest / Nari data

### Dynamic

##### This dataset can be downloaded from: https://zenodo.org/record/1167595/files/%5BP1%5D%20AIS%20Data.zip?download=1

In [None]:
%%time
ais = BrestAIS(r"../examples/data/nari_dynamic.csv", filter_mid=True, nrows=100000)
ais.df.head()

### Static

In [None]:
df_static = pd.read_csv(r"../examples/data/nari_static.csv")
df_static = df_static.sort_values('t') \
    .dropna(subset=['shiptype']) \
    .drop_duplicates(subset=['sourcemmsi'], keep='last')[['sourcemmsi', 'shiptype']]
df_static

## Preprocessing 

### Subsample trajectories with $\Delta t_{min}$

In [None]:
%%time
ais = TrajectorySubsampler(ais).subsample(min_dt_sec=10)

print(f'[Subsampling] Dataset AIS Positions: {len(ais.df)}')
print(f'{ais.df.sort_values(TIMESTAMP).groupby(MOVER_ID)[TIMESTAMP].diff().dt.total_seconds().describe().astype(str)=}')

### Drop trajectories with fewer than $Points_{min}$ locations

In [None]:
%%time
ais = TrajectoryFilter(ais).filter_min_pts(min_pts=20)

print(f'[Trajectory Pruning] Dataset AIS Positions: {len(ais.df)}')

### Re-calculate speed and course over ground

In [None]:
ais.df

In [None]:
%%time
ais = TrajectoryEnricher(ais).add_speed(units=('nm','h'), overwrite=True)
ais = TrajectoryEnricher(ais).add_direction(overwrite=True)

In [None]:
ais.df

### Drop speed outliers 

In [None]:
%%time
ais = TrajectoryFilter(ais).filter_speed(min_speed=1, max_speed=50)
print(f'[Speed Outliers] Dataset AIS Positions: {len(ais.df)}')
print(f'{ais.df["speed"].describe().round(5).astype(str)=}')

### Temporal segmentation / splitting trajectories

In [None]:
%%time
from datetime import timedelta
ais = TrajectorySplitter(ais).split(observation_gap=timedelta(minutes=30))
ais = TrajectoryFilter(ais).filter_min_pts(min_pts=10)
print(f'[Temporal Segmentation] Dataset AIS Positions: {len(ais.df)}')

In [None]:
ais.df.groupby([MOVER_ID, TRAJ_ID]).apply(len, include_groups=False).sort_values()

### Save results

In [None]:
ais.df.to_csv(
    os.path.join('data/nautilus_trajectories_preprocessed.csv'),
    index=False, 
    header=True
)

In [None]:
ais = PreprocessedBrestAIS('data/nautilus_trajectories_preprocessed.csv')
ais.df

### Temporal train/dev/test split

50/25/25 (e.g., 3mos, ~1.5mos will be used for train and ~0.75mos will be used for validation and testing, respectively)

In [None]:
ais = TemporalSplitter(ais).split()
ais.df

In [None]:
print(f"Sanity Check #1;\n\t{ais.df.groupby([MOVER_ID, TRAJ_ID, 'split'])[TIMESTAMP].is_monotonic_increasing.all()=}")

### Create VRF training dataset 

Create delta dataset (with x, y, and t deltas) and split it into constant-length windows for ML model training

In [None]:
traj_delta = DeltaDatasetCreator(ais).get_delta_dataset('split', njobs=4)
traj_delta

In [None]:
traj_delta_windows = DeltaDatasetCreator(ais).get_windowed_dataset('split', njobs=4)
traj_delta_windows.to_pickle('data/traj_delta_windows.pickle')
traj_delta_windows

## Training

In [None]:
import torch
import numpy as np
from mobiml.models.vrf import VesselRouteForecasting, RMSELoss, train_model, vrf_evaluate_model_singlehead
from mobiml.models.vrf_dataset import VRFDataset

### Create unified train/dev/test dataset(s)

In [None]:
traj_delta_windows = pd.read_pickle('data/traj_delta_windows.pickle')

In [None]:
train_delta_windows = traj_delta_windows.xs(1, level=1).copy()
dev_delta_windows = traj_delta_windows.xs(2, level=1).copy()
test_delta_windows = traj_delta_windows.xs(3, level=1).copy()

In [None]:
train_delta_windows

### Create kinematic features' temporal sequence (i.e. training dataset)

In [None]:
BS=1

train_dataset = VRFDataset(train_delta_windows)
dev_dataset, test_dataset = VRFDataset(dev_delta_windows, scaler=train_dataset.scaler),\
                            VRFDataset(test_delta_windows, scaler=train_dataset.scaler)

train_loader, dev_loader, test_loader = DataLoader(train_dataset, batch_size=BS, shuffle=True, collate_fn=train_dataset.pad_collate),\
                                        DataLoader(dev_dataset,   batch_size=BS, shuffle=False, collate_fn=dev_dataset.pad_collate),\
                                        DataLoader(test_dataset,  batch_size=BS, shuffle=False, collate_fn=test_dataset.pad_collate)

In [None]:


GPUID=0

device = torch.device(f'cuda:{GPUID}') if torch.cuda.is_available() else torch.device('cpu')
ddc = DeltaDatasetCreator(None)


model_params = dict({},
    input_size=len(ddc.input_feats),
    scale=dict(
        sigma=torch.Tensor(train_dataset.scaler.scale_[:2]), 
        mu=torch.Tensor(train_dataset.scaler.mean_[:2])
    ),
    bidirectional=True,
    num_layers=1,
    hidden_size=350,
    fc_layers=[150,]
)

In [None]:


model = VesselRouteForecasting(**model_params)
model.to(device)

print(model)
print(f'{device=}')

criterion = RMSELoss(eps=1e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:

MAX_DT = 1800

PATIENCE = 0  # original: 10 may be why the best model was not saved since we're only doing 3 training rounds in this demo
model_path =   f'data/nautilus.model'

evaluate_fun_params = dict(
    bins=np.arange(0, MAX_DT+1, 300)
)

early_stop_params = dict(
    patience=PATIENCE,
    save_best=True,
    path=model_path
)

save_current_params = dict(
    path=model_path
)

train_model(
    model, device, criterion, optimizer, 10, # originally: 100 rounds
    train_loader, dev_loader, early_stop=True, save_current=True, 
    evaluate_fun=vrf_evaluate_model_singlehead, evaluate_fun_params=evaluate_fun_params,
    early_stop_params=early_stop_params, save_current_params=save_current_params
)

In [None]:
checkpoint = torch.load(model_path)

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

vrf_evaluate_model_singlehead(model, device, criterion, test_loader, desc='ADE @ Test Set...', **evaluate_fun_params)