In [44]:
import pandas as pd
import numpy as np
import geopandas as gpd
import numpy.linalg as la
from tqdm.notebook import tqdm

In [45]:
input_path = "../data/raw/entd_2008/K_deploc.csv"
output_path = "../data/entd_trips.csv"

if "snakemake" in locals():
    input_path = snakemake.input[0]
    output_path = snakemake.output[0]

In [46]:
MODES_MAP = [
    ("1", "walk"),
    ("2", "car"), #
    ("2.20", "bike"), # bike
    ("2.23", "car_passenger"), # motorcycle passenger
    ("2.25", "car_passenger"), # same
    ("3", "car"),
    ("3.32", "car_passenger"),
    ("4", "pt"), # taxi
    ("5", "pt"),
    ("6", "pt"),
    ("7", "pt"), # Plane
    ("8", "pt"), # Boat
#    ("9", "pt") # Other
]

In [47]:
df_trips = pd.read_csv(input_path, sep = ";", encoding = "latin1", usecols = [
    #"IDENT_IND", "V2_MMOTIFDES", "V2_MMOTIFORI",
    "V2_TYPJOUR", # "V2_MORIHDEP", "V2_MDESHARR", 
    "V2_MDISTTOT",
    "IDENT_JOUR", "V2_MTP", "V2_MDESDEP", "V2_MORIDEP", # "NDEP", "V2_MOBILREF",
    "PONDKI"
])

In [48]:
# Clean departement
df_trips["origin_departement_id"] = df_trips["V2_MORIDEP"].fillna("undefined").astype("category")
df_trips["destination_departement_id"] = df_trips["V2_MDESDEP"].fillna("undefined").astype("category")

In [49]:
# Trip mode
df_trips["mode"] = "pt"

for prefix, mode in MODES_MAP:
    df_trips.loc[
        df_trips["V2_MTP"].astype(str).str.startswith(prefix), "mode"
    ] = mode

In [50]:
# Further trip attributes
df_trips["routed_distance"] = df_trips["V2_MDISTTOT"] * 1000.0
df_trips["routed_distance"] = df_trips["routed_distance"].fillna(0.0) # This should be just one within Île-de-France

In [51]:
# Only leave weekday trips
df_trips = df_trips[df_trips["V2_TYPJOUR"] == 1]

In [52]:
# Add weight to trips
df_trips["weight"] = df_trips["PONDKI"]

In [53]:
# Filter for trips within the region
requested_departments = [
    "75", "92", "94", "91", "93", "77", "95", "78"
]

df_trips = df_trips[
    df_trips["origin_departement_id"].astype(str).isin(requested_departments) &
    df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]

In [54]:
df_trips["urban_origin"] = df_trips["origin_departement_id"].str == "75"
df_trips["urban_destination"] = df_trips["destination_departement_id"].str == "75"

In [55]:
df_trips[[
    "mode", "weight", "routed_distance", "urban_origin", "urban_destination"
]].to_csv(output_path, sep = ";")