In [15]:
import numpy as np
import pandas as pd
import gzip

In [16]:
with gzip.open("../../astra1/alternatives.csv.gz") as f:
    df = pd.read_csv(f, delimiter = ";")

In [17]:
MODES = ["car", "pt", "bike", "walk"]

In [18]:
df.loc[:,"selected"] = np.array(df["selected"]).astype(np.int)

In [19]:
alt_df = df.sort_values(
    ["person_id", "trip_index", "mode", "selected"], 
    ascending = False
).groupby(
    ["person_id", "trip_index", "mode"]
).first().reset_index()[["person_id", "trip_index", "mode", "vehicle_time", "vehicle_distance", "selected"]]

In [20]:
nosel_df = alt_df.groupby(["person_id", "trip_index"]).sum()
alt_df = alt_df.set_index(["person_id", "trip_index"]).drop(nosel_df[nosel_df["selected"] == 0.0].index).reset_index()

In [21]:
alt_df["vehicle_distance"] /= 1e3

In [22]:
alt_df["cost"] = 0
alt_df.loc[alt_df["mode"] == "car", "cost"] = 0.27 * alt_df[alt_df["mode"] == "car"]["vehicle_distance"]
alt_df.loc[alt_df["mode"] == "pt", "cost"] = 0.5 * alt_df[alt_df["mode"] == "pt"]["vehicle_distance"]

In [23]:
alt_df = pd.pivot_table(
    alt_df, 
    index = ["person_id", "trip_index"], 
    columns = 'mode', 
    values = ["vehicle_time", "vehicle_distance", "cost", "selected"]
).reset_index().dropna()

In [24]:
alt_df.columns = ['_'.join(col).strip() if col[1] != "" else col[0] for col in alt_df.columns.values]
alt_df.columns = [col.replace("vehicle_", "travel_") for col in alt_df.columns.values]

In [25]:
alt_df["choice_biogeme"] = 0

for i, mode in enumerate(MODES):
    alt_df.loc[alt_df["selected_%s" % mode] > 0.0, "choice_biogeme"] = i + 1

In [26]:
alt_df = alt_df.drop(["selected_%s" % m for m in MODES], axis = 1)

In [27]:
alt_df.set_index(["person_id", "trip_index"]).to_csv("/home/sebastian/temp/biotest/biogeme.csv", sep = "\t")