In [18]:
import os
import pandas as pd

In [21]:
# Read the input file and clean it up
with open(f"{os.getcwd()}/Quellen/mit-output.csv", "r") as file:
    probs = pd.read_csv(
        file, sep=";", header=2, index_col=0, skip_blank_lines=True, na_values=["-"]
    )

probs = probs.dropna(how="all", axis=0)
probs = probs.drop(index=["Basis ungewichtet", "Basis gewichtet"])
probs = probs.drop(columns=[" "])

# Correct data format from "-" (NaN alias) to 0 % to avoid error in conversion to float
probs.iloc[0, 8] = "0 %"

# Convert all values to float and divide by 100 to get the correct percentage
for column in probs.columns:
    probs[column] = probs[column].str.rstrip(" %").astype("float") / 100.0

# Rename the columns to be more descriptive
new_labels = {
    "Pkw (Fahrer)": "car_driver",
    "Pkw (Mitfahrer)": "car_passenger",
    "Motorrad/Moped/Mofa": "motorcycle",
    "Taxi": "taxi",
    "Fahrrad": "bicycle",
    "zu Fuß": "pedestrian",
}
probs = probs.rename(index=new_labels)

# Combine all vehicle-related columns into one (driver and passenger for cars, motorcycles, taxi)
probs.loc["auto"] = (
    probs.loc["car_driver"]
    + probs.loc["car_passenger"]
    + probs.loc["motorcycle"]
    + probs.loc["taxi"]
)

# Drop all columns except for auto, bicycle and pedestrian
probs = probs.loc[["auto", "bicycle", "pedestrian"]]

probs = probs.T

# Replace string-based index with interval-based equivalent
breaks = [0, 0.5, 1, 2, 5, 10, 20, 50, 100, float("inf")]

index = pd.IntervalIndex.from_breaks(breaks, closed="left", name="distance")

probs.index = index

# Normalize the data by dividing each value by the sum of the row
probs = probs.div(probs.agg(axis=1, func="sum"), axis=0)

In [23]:
output = {}

output["breaks"] = breaks

output["data"] = probs.to_dict(orient="records")

In [26]:
import json

json.dumps(output)

'{"breaks": [0, 0.5, 1, 2, 5, 10, 20, 50, 100, Infinity], "data": [{"auto": 0.12371134020618557, "bicycle": 0.09278350515463918, "pedestrian": 0.7835051546391752}, {"auto": 0.3010752688172043, "bicycle": 0.1827956989247312, "pedestrian": 0.5161290322580645}, {"auto": 0.47727272727272724, "bicycle": 0.2159090909090909, "pedestrian": 0.3068181818181818}, {"auto": 0.6790123456790124, "bicycle": 0.16049382716049382, "pedestrian": 0.16049382716049382}, {"auto": 0.8552631578947368, "bicycle": 0.09210526315789475, "pedestrian": 0.05263157894736842}, {"auto": 0.948051948051948, "bicycle": 0.03896103896103895, "pedestrian": 0.012987012987012984}, {"auto": 0.961038961038961, "bicycle": 0.025974025974025972, "pedestrian": 0.012987012987012986}, {"auto": 0.9726027397260274, "bicycle": 0.027397260273972605, "pedestrian": 0.0}, {"auto": 1.0, "bicycle": 0.0, "pedestrian": 0.0}]}'