In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import STL

In [None]:
!uv run python extract_git_history.py -i occupancy.csv -o occupancy_history.csv --start-at be9808b76526d4b8646232e1d63148f10930576b

In [None]:
df = pd.read_csv("occupancy_history.csv", parse_dates=[0])

df["timestamp_utc"] = df.timestamp_utc.dt.tz_localize("UTC")
df["timestamp_cet"] = df.timestamp_utc.dt.tz_convert("Europe/Zurich")
df['hour'] = df['timestamp_cet'].dt.hour
df['dow'] = df['timestamp_cet'].dt.dayofweek  # 0=Mo
df['weekday'] = df['timestamp_cet'].dt.day_name("de_CH")
df['date'] = df['timestamp_cet'].dt.date

# Calculate the density pro 100m2 
df['density_100m2'] = df['occupancy'] / (df['training_area_m2'] / 100)

In [None]:
# remove entries outside of opening hours

opening_hours = {
       'Fitnesspark Zürich Stadelhofen': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zug Eichstätte': {
           'start_hour': 6,
           'end_hour': 23,
           'start_hour_weekend': 8,
           'end_hour_weekend': 21,
       },
       'Fitnesspark Greifensee Milandia': {
           'start_hour': 8,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Regensdorf': {
           'start_hour': 8,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Winterthur': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Glattpark': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Puls 5': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Sihlcity': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Zürich Stockerhof': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Luzern National': {
           'start_hour': 7,
           'end_hour': 23,
           'start_hour_weekend': 8,
           'end_hour_weekend': 22,
       },
       'Fitnesspark Luzern Allmend': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Baden Trafo': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Basel Heuwaage': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 9,
           'end_hour_weekend': 19,
       },
       'Fitnesspark Bern City': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
       'Fitnesspark Oberhofen': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 18,
       },
       'Fitnesspark Ostermundigen Time-Out': {
           'start_hour': 6,
           'end_hour': 22,
           'start_hour_weekend': 8,
           'end_hour_weekend': 20,
       },
}

df['start_hour'] = np.nan
df['end_hour'] = np.nan

studios = df['gym'].unique()

for studio in studios:
    df.loc[((df['gym'] == studio) & (df.dow < 5)), "start_hour"] = opening_hours[studio]['start_hour']
    df.loc[((df['gym'] == studio) & (df.dow < 5)), "end_hour"] = opening_hours[studio]['end_hour']
    df.loc[((df['gym'] == studio) & (df.dow >= 5)), "start_hour"] = opening_hours[studio]['start_hour_weekend']
    df.loc[((df['gym'] == studio) & (df.dow >= 5)), "end_hour"] = opening_hours[studio]['end_hour_weekend']


df = df[(df.hour >= df.start_hour) & (df.hour <= df.end_hour)]
df

In [None]:
# Resampling auf Stundenmittel
hourly = (df.set_index('timestamp_cet')
            .groupby('gym')
            .resample('1H')['occupancy'].mean()
            .dropna()
            .reset_index())

hourly


In [None]:

# Heatmap-Matrix: Durchschnitt je (dow, hour)
heat = (hourly.assign(dow=lambda x: x['timestamp_cet'].dt.dayofweek,
                      weekday=lambda x: x['timestamp_cet'].dt.day_name("de_CH"),
                      hour=lambda x: x['timestamp_cet'].dt.hour)
               .groupby(['gym','dow','weekday', 'hour'])['occupancy']
               .median()
               .reset_index())

heat

In [None]:

# Mittelwert pro Wochentag und Stunde
agg = df.groupby(['weekday', 'hour'])['occupancy'].mean().reset_index()
agg

In [None]:

# Wochentage in richtiger Reihenfolge
order = ['Montag','Dienstag','Mittwoch','Donnerstag','Freitag','Samstag','Sonntag']
pivot_agg = agg.pivot(index='weekday', columns='hour', values='occupancy').reindex(order)

# Heatmap Wochentag × Stunde
plt.figure(figsize=(12,6))
sns.heatmap(pivot_agg, cmap='viridis', annot=False)
plt.title('Aggregierte Belegung: Wochentag × Stunde')
plt.xlabel('Stunde')
plt.ylabel('Wochentag')
plt.show()


In [None]:
heat = df.groupby(['weekday','hour'])['density_100m2'].mean().reset_index()
pivot = heat.pivot(index='weekday', columns='hour', values='density_100m2')
sns.heatmap(pivot, cmap='viridis')
plt.title('Belegungsdichte (Personen pro 100 m²)')


In [None]:
studios = df['gym'].unique()

for studio in studios:
    subset = df[df['gym'] == studio]
    heat = subset.groupby(['weekday','hour'])['occupancy'].mean().reset_index()
    pivot = heat.pivot(index='weekday', columns='hour', values='occupancy').reindex(order)

    plt.figure(figsize=(10,5))
    sns.heatmap(pivot, cmap='viridis', annot=False)
    plt.title(f'Belegung für Studio: {studio}')
    plt.xlabel('Stunde')
    plt.ylabel('Wochentag')
    plt.show()


In [None]:
df

In [None]:
df.groupby(["gym", "training_area_m2"]).count().reset_index()[["gym", "training_area_m2"]]

In [None]:
studios = df['gym'].unique()

for studio in studios:
    # keine Fläche für Basel
    if studio == "Fitnesspark Basel Heuwaage":
        continue
    subset = df[df['gym'] == studio]
    heat = subset.groupby(['weekday','hour'])['density_100m2'].mean().reset_index()
    pivot = heat.pivot(index='weekday', columns='hour', values='density_100m2').reindex(order)

    plt.figure(figsize=(10,5))
    sns.heatmap(pivot, cmap='viridis', annot=False)
    plt.title(f'Belegungsdichte (Personen pro 100 m²) für Studio: {studio}')
    plt.xlabel('Stunde')
    plt.ylabel('Wochentag')
    plt.show()

In [None]:

# 2. Aggregation auf Tagesmittelwerte (für klare Saisonalität)
daily = df.groupby('date')['occupancy'].mean()
daily.index = pd.to_datetime(daily.index)

# 3. STL-Dekomposition
# period=7 für Wochenmuster (7 Tage)
stl = STL(daily, period=7, robust=True)
result = stl.fit()

# 4. Plot der Komponenten
fig = result.plot()
fig.set_size_inches(10, 8)
plt.suptitle('STL-Dekomposition der Fitnesscenter-Belegung', fontsize=14)
plt.show()

# Optional: Zugriff auf Komponenten
trend = result.trend
seasonal = result.seasonal
resid = result.resid

# Beispiel: Trend separat plotten
plt.figure(figsize=(10,4))
plt.plot(trend, label='Trend', color='blue')
plt.title('Trend-Komponente')
plt.legend()
plt.show()
