In [None]:
%load_ext autoreload
%autoreload 2


import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import geopandas as gpd
import folium
from shapely.geometry import LineString
from shapely import wkt
import numpy as np
import swifter
from preprocess import *
from preprocess import remove_outlier_trajectories
from road_network import RoadNetwork

In [None]:
# load original Porto Data
df = pd.read_csv("../data/trajectories/Porto/train.csv")
# load network
network = RoadNetwork("Porto, Portugal", network_type="drive", retain_all=True, truncate_by_edge=True)

In [None]:
# preprocess (clip trajectories to porto bounds and filter min points per trajectory (takes around 2h on pascal))
# trajectories that go out of porto and then again into porto area are splitted into seperated trajectories
fdf = preprocess_trajectories_porto(df, city_bounds=network.bounds, polyline_convert=True)
# save dataframe
# fdf.to_csv("../datasets/trajectories/Porto/clipped_porto_full_10pmin_2mil.csv")

In [None]:
from ast import literal_eval
# Map trajectories in the format for fmm matching and insert timestamp column
df_fmm = fdf.loc[:, ["TRIP_ID", "POLYLINE", "coords"]]
df_fmm["coords"] = df_fmm["coords"].swifter.apply(literal_eval)
df_fmm["timestamp"] = df_fmm["coords"].swifter.apply(lambda x: list(np.arange(0, ((len(x)-1)*15)+1, 15)))
df_fmm["id"] = np.arange(1, df_fmm.shape[0]+1)
df_fmm = df_fmm.drop(["coords"], axis=1)
df_fmm.to_csv("../data/trajectories/Porto/mapped_id_poly_clipped_timestamp.csv", sep=";", index=False)

In [None]:
tdf = pd.read_csv("../data/trajectories/Porto/mapped_id_poly_clipped_timestamp.csv", sep=";")
tdf["timestamp"] = tdf["timestamp"].str.replace("[", "")
tdf["timestamp"] = tdf["timestamp"].str.replace("]", "")
tdf.to_csv("../datasets/trajectories/Porto/mapped_id_poly_clipped_timestamp.csv", sep=";", index=False)

In [None]:
from ast import literal_eval
# check timestamp length = gps stamp
tdf = pd.read_csv("../data/trajectories/Porto/mapped_id_poly_clipped_timestamp.csv", sep=";")
cdf= fdf.loc[:, ["coords"]]
cdf["coords"] = cdf["coords"].swifter.apply(literal_eval)
tdf["timestamp"] = tdf["timestamp"].swifter.apply(literal_eval)

In [None]:
df_fmm["timestamp"].str.len().mean()

### Map Matching

Next we need to map match the trajectories. We use FastMapMatching (https://fmm-wiki.github.io/). For faster map matching, we recommend using the command line programm instead of the python wrapper, as done in the next cell. 

In [None]:
# Map porto gps points to road segments using fmm -> takes really long!
network.fmm_trajectorie_mapping(
    network_file="../osm_data/porto/edges.shp",
    input_file="../datasets/trajectories/Porto/mapped_id_poly_clipped_timestamp.csv",
    output_file="../datasets/trajectories/Porto/road-segment-mapping.txt"
)

In [None]:
from ast import literal_eval

# preprocess the mapping especially the speed and distance values need to be verified
df = pd.read_csv("../datasets/trajectories/Porto/road-segment-mapping.csv", sep=";")
df = remove_outlier_trajectories(df,  min_edges_traversed=3, max_speed=1e1)
df.to_csv("../datasets/trajectories/Porto/road_segment_map_final.csv", sep=";")

### Speed Features Normalized 



In [None]:

from trajectory import Trajectory

traj = Trajectory("../datasets/trajectories/Porto/road_segment_map_final.csv")

In [None]:
"""
Generate traj features 
"""
features = traj.generate_speed_features(network)

features[features["avg_speed"] < 0] = 0

features.to_csv("../datasets/trajectories/sf/speed_features_unnormalized.csv")