## Clean the forecasted data

In [1]:
import pandas as pd

In [5]:
# 1) Load the final ensemble results
df = pd.read_csv("final_traffic_ensemble.csv", parse_dates=["DATE"])

# 2) Load the location file (with longitude, latitude, ZNAME and BEZIRK_NAME)
loc = pd.read_csv("../data/dauerzaehlstellen_location.csv")

# 3) Merge on ZNR to bring in geo-info and names
df = df.merge(
    loc[["ZNR", "LONGITUDE", "LATITUDE", "ZNAME", "BEZIRK_NAME"]],
    on="ZNR",
    how="left"
)

# 5) (Optional) sort and reset index
df = df.sort_values(["ZNR", "DATE"]).reset_index(drop=True)


In [6]:
for col in df.columns:
    print(col)

ZNR
DATE
DTVMS
ISTCOVID19
BEZIRK
AUSPENDLER
POP
PKW_DENSITY
BICYCLE
BIKESHARING
BY_FOOT
CAR
CARSHARING
MOTORBIKE
PUBLIC_TRANSPORT
DTVMS_fc_exog
DTVMS_fc_noex
DTVMS_fc_prophet
DTVMS_full_exog
DTVMS_full_noex
DTVMS_full_prophet
DTVMS_ensemble
LONGITUDE
LATITUDE
ZNAME
BEZIRK_NAME


In [8]:
import pandas as pd

# 2) aggregate mode-share columns
df["BIKE"] = df["BICYCLE"] + df["BIKESHARING"]
df["CAR"]  = df["CAR"] + df["CARSHARING"] + df["MOTORBIKE"]

# 3) drop the no-longer-needed splits
df = df.drop(columns=[
    "BICYCLE",
    "BIKESHARING",
    "CARSHARING",
    "MOTORBIKE"
])

# 4) reorder into exactly the layout you specified
new_order = [
    "DATE",
    "ZNR",
    "ZNAME",
    "BEZIRK",
    "BEZIRK_NAME",
    "LONGITUDE",
    "LATITUDE",
    "DTVMS",
    "DTVMS_fc_exog",
    "DTVMS_fc_noex",
    "DTVMS_fc_prophet",
    "DTVMS_full_exog",
    "DTVMS_full_noex",
    "DTVMS_full_prophet",
    "DTVMS_ensemble",
    "ISTCOVID19",
    "POP",
    "AUSPENDLER",
    "PKW_DENSITY",
    "CAR",
    "PUBLIC_TRANSPORT",
    "BY_FOOT",
    "BIKE"
]
df = df[new_order]

# 5) (optional) write out the cleaned file
df.to_csv("traffic_dashboard_final.csv", index=False)

In [9]:
missing_df = (
    df.isna()
      .sum()
      .reset_index()
      .rename(columns={"index":"column","0":"n_missing"})
)
print(missing_df)

                column     0
0                 DATE     0
1                  ZNR     0
2                ZNAME     0
3               BEZIRK     0
4          BEZIRK_NAME     0
5            LONGITUDE     0
6             LATITUDE     0
7                DTVMS  3864
8        DTVMS_fc_exog  5724
9        DTVMS_fc_noex  5724
10    DTVMS_fc_prophet  5724
11     DTVMS_full_exog    48
12     DTVMS_full_noex    48
13  DTVMS_full_prophet    48
14      DTVMS_ensemble    48
15          ISTCOVID19     0
16                 POP     0
17          AUSPENDLER     0
18         PKW_DENSITY     0
19                 CAR     0
20    PUBLIC_TRANSPORT     0
21             BY_FOOT     0
22                BIKE     0
