In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import STL
from datetime import datetime, date, timedelta

In [None]:

def mondays_from(start: date, end_inclusive: date):
    """Erzeugt alle Montage von start bis end_inclusive (inklusive Endgrenze)."""
    current = start
    while current <= end_inclusive:
        yield current
        current += timedelta(weeks=1)


In [None]:
!uv run python extract_git_history.py -i occupancy.csv -o occupancy_history.csv --start-at be9808b76526d4b8646232e1d63148f10930576b

In [None]:
df = pd.read_csv("occupancy_history.csv", parse_dates=[0])

df["timestamp_utc"] = df.timestamp_utc.dt.tz_localize("UTC")
df["timestamp_cet"] = df.timestamp_utc.dt.tz_convert("Europe/Zurich")
df['hour'] = df['timestamp_cet'].dt.hour
df['dow'] = df['timestamp_cet'].dt.dayofweek  # 0=Mo
df['weekday'] = df['timestamp_cet'].dt.day_name("de_CH")
df['date'] = df['timestamp_cet'].dt.date

In [None]:
# Einträge ausserhalb der Öffnungszeiten entfernen

opening_hours = {
       'Fitnesspark Zürich Stadelhofen': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zug Eichstätte': {
           'start_hour': 6,
           'end_hour': 23,
           'start_hour_weekend': 8,
           'end_hour_weekend': 21,
       },
       'Fitnesspark Greifensee Milandia': {
           'start_hour': 8,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Regensdorf': {
           'start_hour': 8,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Winterthur': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Glattpark': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Puls 5': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Sihlcity': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Stockerhof': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Luzern National': {
           'start_hour': 7,
           'end_hour': 23,
           'start_hour_weekend': 8,
           'end_hour_weekend': 22,
       },
       'Fitnesspark Luzern Allmend': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Baden Trafo': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Basel Heuwaage': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 19,
       },
       'Fitnesspark Bern City': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Oberhofen': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 18,
       },
       'Fitnesspark Ostermundigen Time-Out': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
}

df['start_hour'] = np.nan
df['end_hour'] = np.nan

studios = df['gym'].unique()

for studio in studios:
    df.loc[((df['gym'] == studio) & (df.dow < 5)), "start_hour"] = opening_hours[studio]['start_hour']
    df.loc[((df['gym'] == studio) & (df.dow < 5)), "end_hour"] = opening_hours[studio]['end_hour']
    df.loc[((df['gym'] == studio) & (df.dow >= 5)), "start_hour"] = opening_hours[studio]['start_hour_weekend']
    df.loc[((df['gym'] == studio) & (df.dow >= 5)), "end_hour"] = opening_hours[studio]['end_hour_weekend']


df = df[(df.hour >= df.start_hour) & (df.hour <= df.end_hour)]

# remove today and the first day, since today data is most probably not complete
df = df[(df.date < datetime.now().date()) & (df.date > date(2025, 3, 23))]

df

In [None]:
# Resampling auf Stundenmittel
hourly = (df.set_index('timestamp_cet')
            .groupby('gym')
            .resample('1H')['occupancy'].mean()
            .dropna()
            .reset_index())

hourly

In [None]:
fig, ax = plt.subplots(figsize=(20,10))
df_stadi = hourly[hourly.gym == "Fitnesspark Zürich Stadelhofen"]
df_stadi.plot(kind='line', y='occupancy', x="timestamp_cet", ax=ax)
fig.show()

In [None]:
# Heatmap-Matrix: Durchschnitt je (dow, hour)
heat = (hourly.assign(dow=lambda x: x['timestamp_cet'].dt.dayofweek,
                      weekday=lambda x: x['timestamp_cet'].dt.day_name("de_CH"),
                      hour=lambda x: x['timestamp_cet'].dt.hour)
               .groupby(['gym','dow','weekday', 'hour'])['occupancy']
               .median()
               .reset_index())

heat

In [None]:
# Mittelwert pro Wochentag und Stunde
agg = df.groupby(['weekday', 'hour'])['occupancy'].mean().reset_index()
agg

In [None]:
# Wochentage in richtiger Reihenfolge
order = ['Montag','Dienstag','Mittwoch','Donnerstag','Freitag','Samstag','Sonntag']
pivot_agg = agg.pivot(index='weekday', columns='hour', values='occupancy').reindex(order)

# Heatmap Wochentag × Stunde
plt.figure(figsize=(12,6))
sns.heatmap(pivot_agg, cmap='viridis', annot=False)
plt.title('Aggregierte Belegung: Wochentag × Stunde')
plt.xlabel('Stunde')
plt.ylabel('Wochentag')
plt.show()


In [None]:
# Titelbild ohne Labels und gross

# Wochentage in richtiger Reihenfolge
order = ['Montag','Dienstag','Mittwoch','Donnerstag','Freitag','Samstag','Sonntag']
pivot_agg = agg.pivot(index='weekday', columns='hour', values='occupancy').reindex(order)

# Heatmap Wochentag × Stunde
plt.figure(figsize=(20,12))
sns.heatmap(pivot_agg, cmap='viridis', annot=False)
#plt.title('Aggregierte Belegung: Wochentag × Stunde')
#plt.xlabel('Stunde')
#plt.ylabel('Wochentag')
plt.xticks([])
plt.yticks([])
#plt.xlabel = ''
plt.show()


In [None]:
# Berechne die "Dichte" pro 100 m² um vergleichbare Werte über alle Studios zu bekommen
df['density_100m2'] = df['occupancy'] / (df['training_area_m2'] / 100)

In [None]:
heat = df.groupby(['weekday','hour'])['density_100m2'].mean().reset_index()
pivot = heat.pivot(index='weekday', columns='hour', values='density_100m2')
sns.heatmap(pivot, cmap='viridis')
plt.title('Belegungsdichte (Personen pro 100 m²)')

In [None]:
studios = df['gym'].unique()

for studio in studios:
    subset = df[df['gym'] == studio]
    heat = subset.groupby(['weekday','hour'])['occupancy'].mean().reset_index()
    pivot = heat.pivot(index='weekday', columns='hour', values='occupancy').reindex(order)

    plt.figure(figsize=(10,5))
    sns.heatmap(pivot, cmap='viridis', annot=False)
    plt.title(f'Belegung für Studio: {studio}')
    plt.xlabel('Stunde')
    plt.ylabel('Wochentag')
    plt.show()

In [None]:
df['density_100m2'].min()

In [None]:
heat_all = df.groupby(['weekday','hour'])['density_100m2'].mean().reset_index()
pivot_all = heat_all.pivot(index='weekday', columns='hour', values='density_100m2').reindex(order)

plt.figure(figsize=(10,5))
sns.heatmap(pivot_all, cmap='viridis', annot=False)
plt.title(f'Belegungsdichte (Personen pro 100 m²)')
plt.xlabel('Stunde')
plt.ylabel('Wochentag')
plt.show()

In [None]:
heat_all["density_100m2"].min()

In [None]:
studios = df['gym'].unique()

for studio in studios:
    # keine Fläche für Basel
    if studio == "Fitnesspark Basel Heuwaage":
        continue
    subset = df[df['gym'] == studio]
    heat = subset.groupby(['weekday','hour'])['density_100m2'].mean().reset_index()
    pivot = heat.pivot(index='weekday', columns='hour', values='density_100m2').reindex(order)

    plt.figure(figsize=(10,5))
    sns.heatmap(pivot, cmap='viridis', annot=False, vmin=heat_all["density_100m2"].min(), vmax=heat_all["density_100m2"].max())
    plt.title(f'Belegungsdichte (Personen pro 100 m²) für Studio: {studio}')
    plt.xlabel('Stunde')
    plt.ylabel('Wochentag')
    plt.show()

In [None]:
# Aggregation auf Tagesmittelwerte (für klare Saisonalität)
df_stadi = df[df.gym == "Fitnesspark Zürich Stockerhof"].dropna().reset_index()
daily = df_stadi.groupby('date')['occupancy'].mean()
daily.index = pd.to_datetime(daily.index)

# STL-Dekomposition
# period=7 für Wochenmuster (7 Tage)
stl = STL(daily, period=7, robust=True)
result = stl.fit()

# Zugriff auf Komponenten
trend, seasonal, resid = result.trend, result.seasonal, result.resid

# Resultate plotten
fig, axes = plt.subplots(4, 1, figsize=(15,15))
fig.suptitle('STL-Dekomposition der Fitnesscenter-Belegung (Zürich Stockerhof)', fontsize=16)

axes[0].plot(daily)
start_date = date(2025, 3, 24)
# Markiere die Montage
for week_start in mondays_from(start_date, datetime.now().date()):
    axes[0].axvline(week_start, color="k", linestyle="--", alpha=0.5)
axes[0].set_title('Original Zeitreihe')

axes[1].plot(trend)
# Markiere die Montage
for week_start in mondays_from(start_date, datetime.now().date()):
    axes[1].axvline(week_start, color="k", linestyle="--", alpha=0.5)
axes[1].set_title('Trend')

axes[2].plot(seasonal)
# Markiere die Montage
for week_start in mondays_from(start_date, datetime.now().date()):
    axes[2].axvline(week_start, color="k", linestyle="--", alpha=0.5)
axes[2].set_title('Saisonalität')

axes[3].plot(resid)
# Markiere die Montage
for week_start in mondays_from(start_date, datetime.now().date()):
    axes[3].axvline(week_start, color="k", linestyle="--", alpha=0.5)
axes[3].set_title('Rest')

fig.tight_layout()
plt.show()

In [None]:
estimated = trend + seasonal

plt.figure(figsize=(10,4))
plt.plot(daily, label='Tatsächliche Werte', color='blue')
plt.plot(estimated, label='Geschätzte Werte', color='orange')
plt.legend()
plt.show()

In [None]:
resid_mean = resid.mean()
resid_dev = resid.std()

lower = resid_mean - 3*resid_dev
upper = resid_mean + 3*resid_dev



In [None]:
anomalies = daily[(resid < lower) | (resid > upper)]
anomalies

In [None]:
plt.figure(figsize=(10,4))
plt.plot(resid, label='Rest', color='blue')
plt.fill_between([daily.index.min(), daily.index.max()], lower, upper, color="g", alpha=0.25, linestyle="dashed")

plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10,4))
plt.plot(daily, label='Tagesmittelwerte', color='blue')
plt.scatter(anomalies.index, anomalies.values, color="r", marker="D", label="Ausreisser")
plt.legend()
plt.show()

In [None]:
anomalies

In [None]:
daily['2025-08-03':'2025-08-10']