In [None]:
import polars as pl
from analysis.visualization.characterisation.clustering import cluster_timeseries_usage, usage_probabilities
from analysis.visualization.characterisation.features import build_feature_df
from data_io.loader.data_loader import DataLoader

dl = DataLoader(city="Stadt_Heidelberg")

N_CLUSTERS = 3
DATASET_START = "2016-01-01"
DATASET_END = "2025-01-01"
TIME_SERIES_MODE = "sliding"
WINDOW_MONTHS = 24

X = build_feature_df(dl)

EXCLUDE = {"station", "valid", "cluster", "date"}

FEATURES = [
    c for c in X.columns
    if c not in EXCLUDE and X[c].dtype in (pl.Float32, pl.Float64)
]

usage = cluster_timeseries_usage(
    loader=dl,
    k=N_CLUSTERS,
    features=FEATURES,
    start=DATASET_START,
    end=DATASET_END,
    mode=TIME_SERIES_MODE,
    window_months=WINDOW_MONTHS
)

usage_probs = usage_probabilities(usage).sort(["station", "probability"], descending=True)


## Impact of Public Holidays
To quantify the impact of public holidays on station usage patterns, we compare feature vectors computed from holiday periods with a baseline excluding holidays and analyse the resulting changes in the Double Peak Index (ΔDPI) and the Weekend Shape Difference (ΔWSD).


### Station Usage Patterns

In [None]:
from analysis.visualization.characterisation.event import compute_event_deltas

holiday_intervals = dl.get_all_holiday_intervals(school_vacation=False)

delta_df = (
    compute_event_deltas(loader=dl, intervals=holiday_intervals)
    .select(["station", "DPI_delta", "WSD_delta"])
)

In [None]:
from analysis.visualization.characterisation.helpers import impact_by_usage, label_deltas_with_usage

delta_labeled = label_deltas_with_usage(delta_df=delta_df, usage_probs=usage_probs)
impact_holiday = impact_by_usage(delta_labeled=delta_labeled)

impact_holiday

In [None]:
from analysis.visualization.characterisation.plotting import plot_holiday_impact

plot_holiday_impact(delta_labeled=delta_labeled)


Overall effects
- **ΔDPI < 0 for all stations:** Weekday morning and evening commuter peaks are weakened during public holidays.
- **ΔWSD < 0 for all stations:** Weekday traffic patterns become more similar to weekend profiles.

Mixed-use stations exhibit the largest absolute holiday-induced change in feature space

Recreational shows lowest change

### Utilitarian Score of Stations

In [None]:
from analysis.visualization.characterisation.plotting import plot_event_utilitarian_spline
plot_event_utilitarian_spline(loader=dl, intervals=holiday_intervals, title="Impact of holidays on utilitarian score of stations", k = 2)