# MobiML FL demo

Using Flower and MobiML

In [2]:
import os
import sys
import pickle
import pandas as pd
import geopandas as gpd
import numpy as np
from datetime import datetime, timedelta
from copy import deepcopy
from typing import Dict, List, Tuple
from pathlib import Path
from sklearn.metrics import log_loss
from sklearn.preprocessing import MultiLabelBinarizer

import flwr as fl
from flwr.common import Metrics

import sys
sys.path.append("..")
from mobiml.datasets import AISDK, MOVER_ID, SHIPTYPE
from mobiml.transforms import StationaryClientExtractor, AISTripExtractor, TrajectoryAggregator
from mobiml.models import SummarizedAISTrajectoryClassifier
from mobiml.models.ais_trajectory_classifier import AISLoader, get_evaluate_fn, fit_round, weighted_average
from mobiml.utils import convert_wgs_to_utm

## Extract stationary client (antenna) data

In [15]:
path = "./data/aisdk-2018-02.zip"
antennas = ['Point (11.96524 57.70730)', 'Point (11.63979 57.71941)', 'Point (11.78460 57.57255)']
antenna_radius_meters = 25000

In [16]:
epsg_code = convert_wgs_to_utm(11.96524, 57.70730)

ids =  [{'client': i} for i in range(len(antennas))]
df = pd.DataFrame(ids)
df['geometry'] = gpd.GeoSeries.from_wkt(antennas)
gdf = gpd.GeoDataFrame(df, geometry=df.geometry, crs=4326)
gdf = gdf.to_crs(epsg_code)
gdf['geometry'] = gdf.buffer(antenna_radius_meters)

buffered_antennas =  gdf.to_crs(4326)
min_lon, min_lat, max_lon, max_lat = buffered_antennas.geometry.total_bounds

In [17]:
out_dir = "temp"
if not os.path.exists(out_dir):
    print(f"{datetime.now()} Creating output directory {out_dir} ...")
    os.makedirs(out_dir)

In [18]:
print(f"{datetime.now()} Loading data from {path}")
aisdk = AISDK(path, min_lon, min_lat, max_lon, max_lat)

2024-07-22 23:06:08.633591 Loading data from ./data/aisdk-2018-02.zip
2024-07-22 23:06:08.647426 Loading aisdk_20180201.csv ...
2024-07-22 23:06:35.785940 Loading aisdk_20180202.csv ...
2024-07-22 23:07:00.968983 Loading aisdk_20180203.csv ...
2024-07-22 23:07:27.307468 Loading aisdk_20180204.csv ...
2024-07-22 23:07:53.457659 Loading aisdk_20180205.csv ...
2024-07-22 23:08:19.629252 Loading aisdk_20180206.csv ...
2024-07-22 23:08:45.465551 Loading aisdk_20180207.csv ...
2024-07-22 23:09:12.192734 Loading aisdk_20180208.csv ...
2024-07-22 23:09:39.439480 Loading aisdk_20180209.csv ...
2024-07-22 23:10:07.348422 Loading aisdk_20180210.csv ...
2024-07-22 23:10:34.896663 Loading aisdk_20180211.csv ...
2024-07-22 23:11:02.491208 Loading aisdk_20180212.csv ...
2024-07-22 23:11:28.398044 Loading aisdk_20180213.csv ...
2024-07-22 23:11:56.487325 Loading aisdk_20180214.csv ...
2024-07-22 23:12:24.529539 Loading aisdk_20180215.csv ...
2024-07-22 23:12:51.019803 Loading aisdk_20180216.csv ...
20

In [19]:
print(f"{datetime.now()} Extracting client data ...")
client_gdf = StationaryClientExtractor(aisdk, buffered_antennas)

2024-07-22 23:20:00.818317 Extracting client data ...
2024-07-22 23:20:00.818393 Converting to GeoDataFrame ...
2024-07-22 23:22:22.505560 Computing overlay ...


In [20]:
client_feather_path = "temp/ais-antenna.feather"
print(f"{datetime.now()} Writing output to {client_feather_path}")
client_gdf.to_feather(client_feather_path)

2024-07-22 23:26:08.384170 Writing output to temp/ais-antenna.feather


## Prepare training data

In [21]:
h3_resolution = 8

In [22]:
print(f"{datetime.now()} Loading data from {client_feather_path} ...")
gdf = gpd.read_feather(client_feather_path)
vessels = gdf.groupby(MOVER_ID)[["ship_type", "Name"]].agg(pd.Series.mode)

2024-07-22 23:26:29.960500 Loading data from temp/ais-antenna.feather ...


In [23]:
print(f"{datetime.now()} Extracting trips ...")
trajs = AISTripExtractor(gdf).get_trips(
    gap_duration=timedelta(minutes=60)
)  

2024-07-22 23:26:56.212449 Extracting trips ...
Original Dataframe size: 17414504 rows
   Reduced to: 17414504 rows after removing records with speed=0
Creating TrajectoryCollection ...
   Created: TrajectoryCollection with 1493 trajectories
Generalizing ...


  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.head(1)["t"][0]
  prev_t = temp_df.h

Splitting at observation gaps (1:00:00) ...
   Split: TrajectoryCollection with 5928 trajectories


In [24]:
print(f"{datetime.now()} Computing trajectory features ...")
trajs = TrajectoryAggregator(trajs, vessels).aggregate_trajs(h3_resolution)

2024-07-22 23:42:42.102760 Computing trajectory features ...
2024-07-22 23:42:42.102844 Enriching trajectories ...
Enriched dataset columns: Index(['traj_id', 'start_t', 'end_t', 'geometry', 'length', 'direction',
       'client', 'mover_id', 'speed_max', 'speed_median', 'H3_seq',
       'speed_start', 'direction_start', 'x_start', 'y_start', 'speed_end',
       'direction_end', 'x_end', 'y_end', 'ship_type'],
      dtype='object')


In [25]:
with open("temp/vessels-stationary.pickle", "wb") as out_file:
    pickle.dump(vessels, out_file)

In [26]:
with open("temp/training-data-stationary.pickle", "wb") as out_file:
    pickle.dump(trajs, out_file)

## Start Flower server for federated learning

https://github.com/adap/flower/blob/main/examples/flower-in-30-minutes/tutorial.ipynb

In [27]:
np.random.seed(0)

data_path = "temp/training-data-stationary.pickle"
scenario_name = Path(data_path).stem.replace("training-data-", "")

vessel_types = ['Cargo', 'Passenger', 'Tanker']
traj_features = ['speed_max', 'speed_median', 'x_start', 'y_start', 'x_end', 'y_end', 'length']  # ['SOG_max', 'SOG_median', 'LON_start', 'LAT_start', 'LON_end', 'LAT_end', 'length']  'H3_seq'
n_features = 7  # 1804  # depends on the number of H3 cells in H3_seq
test_size = 0.33

data_loader = AISLoader(vessel_types, traj_features, test_size, path=data_path)

model = SummarizedAISTrajectoryClassifier(vessel_types, n_features)

strategy = fl.server.strategy.FedAvg(
    min_available_clients=2,
    evaluate_fn=get_evaluate_fn(model, data_loader, scenario_name),
    on_fit_config_fn=fit_round,
    evaluate_metrics_aggregation_fn=weighted_average,
    fit_metrics_aggregation_fn=weighted_average,
)

fl.server.start_server(
    server_address="0.0.0.0:8080",
    strategy=strategy,
    config=fl.server.ServerConfig(num_rounds=10),
)


Vessel types: ['Cargo', 'Passenger', 'Tanker']
Trajectory features: ['speed_max', 'speed_median', 'x_start', 'y_start', 'x_end', 'y_end', 'length']
Test size: 0.33


[92mINFO [0m:      Starting Flower server, config: num_rounds=10, no round_timeout
07/22/2024 23:45:12:INFO:Starting Flower server, config: num_rounds=10, no round_timeout
[92mINFO [0m:      Flower ECE: gRPC server running (10 rounds), SSL is disabled
07/22/2024 23:45:13:INFO:Flower ECE: gRPC server running (10 rounds), SSL is disabled
[92mINFO [0m:      [INIT]
07/22/2024 23:45:13:INFO:[INIT]
[92mINFO [0m:      Requesting initial parameters from one random client
07/22/2024 23:45:13:INFO:Requesting initial parameters from one random client


Filtering ship_type to ['Cargo', 'Passenger', 'Tanker'] ...
... 3960 found.
Available trajectory columns: Index(['traj_id', 'start_t', 'end_t', 'geometry', 'length', 'direction',
       'client', 'mover_id', 'speed_max', 'speed_median', 'H3_seq',
       'speed_start', 'direction_start', 'x_start', 'y_start', 'speed_end',
       'direction_end', 'x_end', 'y_end', 'ship_type'],
      dtype='object')
2024-07-22 23:45:12.963562 Splitting dataset ...
Using 807 movers for training and 398 for testing ...
(2647 trajectories for training and 1313 for testing)


KeyboardInterrupt: 

In [None]:
1+2