In [62]:
import pandas as pd
import numpy as np
import geopandas as gpd
import numpy.linalg as la
from tqdm.notebook import tqdm

In [63]:
input_person_path = "../data/raw/egt_2010/Personnes_semaine.csv"
input_trip_path = "../data/raw/egt_2010/Deplacements_semaine.csv"
output_path = "../data/egt_trips.csv"

if "snakemake" in locals():
    input_person_path = snakemake.input["persons"]
    input_trip_path = snakemake.input["trips"]
    output_path = snakemake.output[0]

In [64]:
MODES_MAP = {
    1 : "pt",
    2 : "car",
    3 : "car_passenger",
    4 : "car",
    5 : "bike",
    #6 : "pt", # default (other)
    7 : "walk"
}

In [65]:
df_persons = pd.read_csv(
    input_person_path,
    sep = ",", encoding = "latin1", usecols = [
        "RESDEP", "NP", "POIDSP", "NQUEST", "SEXE", "AGE", "PERMVP",
        "ABONTC", "OCCP", "PERM2RM", "NBDEPL", "CS8"
    ]
)

df_trips = pd.read_csv(
    input_trip_path,
    sep = ",", encoding = "latin1", usecols = [
        "NQUEST", "NP", "ND",
        "ORDEP", "DESTDEP", "ORH", "DESTH", "ORM", "DESTM", "ORCOMM", "DESTCOMM",
        "DPORTEE", "MODP_H7", "DESTMOT_H9", "ORMOT_H9"
    ]
)

In [66]:
# Convert IDs
df_persons["person_id"] = df_persons["NP"].astype(int)
df_trips["person_id"] = df_trips["NP"].astype(int)
df_persons["household_id"] = df_persons["NQUEST"].astype(int)
df_trips["household_id"] = df_trips["NQUEST"].astype(int)

# Weight
df_persons["weight"] = df_persons["POIDSP"].astype(float)

In [67]:
# Trip mode
df_trips["mode"] = "pt"

for category, mode in MODES_MAP.items():
    df_trips.loc[df_trips["MODP_H7"] == category, "mode"] = mode

In [68]:
# Departments
df_trips["origin_departement_id"] = df_trips["ORDEP"].astype(str)
df_trips["destination_departement_id"] = df_trips["DESTDEP"].astype(str)

In [69]:
# Further trip attributes
df_trips["euclidean_distance"] = df_trips["DPORTEE"] * 1000.0

In [70]:
# Filter for trips within the region
requested_departments = [
    "75", "92", "94", "91", "93", "77", "95", "78"
]

df_trips = df_trips[
    df_trips["origin_departement_id"].astype(str).isin(requested_departments) &
    df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]

In [71]:
df_trips["urban_origin"] = df_trips["origin_departement_id"].astype(str) == "75"
df_trips["urban_destination"] = df_trips["destination_departement_id"].astype(str) == "75"

In [72]:
df_trips = df_trips[[
    "person_id", "household_id", "mode", "euclidean_distance", "urban_origin", "urban_destination"
]]

In [73]:
# Merge in weight
df_persons = df_persons[["person_id", "household_id", "weight"]]
df_trips = pd.merge(df_trips, df_persons, on = ["person_id", "household_id"])

In [74]:
df_trips[[
    "mode", "weight", "euclidean_distance", "urban_origin", "urban_destination"
]].to_csv(output_path, sep = ";")