In [8]:
import geopandas as gpd
import pandas as pd

import os
import json

import trackintel as ti
from trackintel.io.dataset_reader import read_geolife, geolife_add_modes_to_triplegs

In [2]:
print(trackintel.__version__)

1.2.3


In [3]:
DATA_DIR = os.path.join("..", "..", "paths.json")
with open(DATA_DIR) as json_file:
    CONFIG = json.load(json_file)

In [4]:
## read
pfs, mode_labels = read_geolife(os.path.join("..", "..", CONFIG["data_dir"], "Data"), print_progress=True)

100%|██████████| 182/182 [00:56<00:00,  3.23it/s]


In [5]:
# generate staypoints, triplegs and trips
pfs, sp = pfs.generate_staypoints(time_threshold=5.0, gap_threshold=1e6, print_progress=True, n_jobs=-1)
sp["duration"] = (sp["finished_at"] - sp["started_at"]).dt.total_seconds()

100%|██████████| 182/182 [00:30<00:00,  5.90it/s]


In [7]:
pfs, tpls = pfs.generate_triplegs(sp, gap_threshold=15, print_progress=True)
tpls = geolife_add_modes_to_triplegs(tpls, mode_labels)

 20967947 20967948   123109   123110   118896   118897   119237   119238
   121376   121377   126464   126465 23445938 23445939  9935061  9935062] lead to invalid tripleg geometries. The resulting triplegs were omitted and the tripleg id of the positionfixes was set to nan


In [10]:
sp = sp.create_activity_flag(time_threshold=15)

In [11]:
sp, tpls, trips = sp.generate_trips(tpls, gap_threshold=15, add_geometry=False)

In [None]:

# assign mode
tpls["pred_mode"] = predict_transport_mode(tpls)["mode"]
tpls.loc[tpls["mode"].isna(), "mode"] = tpls.loc[tpls["mode"].isna(), "pred_mode"]
tpls.drop(columns={"pred_mode"}, inplace=True)

# get the length
tpls["length_m"] = calculate_haversine_length(tpls)

groupsize = tpls.groupby("trip_id").size().to_frame(name="triplegNum").reset_index()
tpls_group = tpls.merge(groupsize, on="trip_id")

# trips only with 1 triplegs
res1 = tpls_group.loc[tpls_group["triplegNum"] == 1][["trip_id", "length_m", "mode"]].copy()

# get the mode and length of remaining trips
remain = tpls_group.loc[tpls_group["triplegNum"] != 1].copy()
remain.sort_values(by="length_m", inplace=True, ascending=False)
mode = remain.groupby("trip_id").head(1).reset_index(drop=True)[["mode", "trip_id"]]

length = remain.groupby("trip_id")["length_m"].sum().reset_index()
res2 = mode.merge(length, on="trip_id")
# concat the results
res = pd.concat([res1, res2])
res.rename(columns={"trip_id": "id"}, inplace=True)
res.set_index("id", inplace=True)

trips_with_main_mode = trips.join(res, how="left")
trips_with_main_mode = trips_with_main_mode[~trips_with_main_mode["mode"].isna()]
trips_with_main_mode_cate = get_mode_geolife(trips_with_main_mode)

print(trips_with_main_mode_cate["mode"].value_counts())

# filter activity staypoints
sp = sp.loc[sp["is_activity"] == True].drop(columns=["is_activity", "trip_id", "next_trip_id"])

# generate locations
sp, locs = sp.as_staypoints.generate_locations(
    epsilon=epsilon, num_samples=2, distance_metric="haversine", agg_level="dataset", n_jobs=-1, print_progress=True
)
# filter noise staypoints
valid_sp = sp.loc[~sp["location_id"].isna()].copy()

# save locations
locs = locs[~locs.index.duplicated(keep="first")]
filtered_locs = locs.loc[locs.index.isin(sp["location_id"].unique())]

path = Path(os.path.join(".", "data"))
if not os.path.exists(path):
    os.makedirs(path)
filtered_locs.as_locations.to_csv(os.path.join(".", "data", f"locations_{dataset}.csv"))

# merge staypoint with trips info
sp = valid_sp.loc[~valid_sp["prev_trip_id"].isna()].reset_index().copy()
trips = (
    trips_with_main_mode_cate.drop(columns=["started_at", "finished_at", "user_id"])
    .reset_index()
    .rename(columns={"id": "trip_id"})
    .copy()
)
