In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

sns.set_theme()

In [None]:
df = pd.read_csv(
    "out/formatted/formatted.csv", index_col="timestamp", parse_dates=["timestamp"]
)
df.columns.name = "studio"
df.columns = df.columns.str.title()

In [None]:
df

In [None]:
import numpy as np

ignore = [
    ("2023-01-10T06:13:00+01:00", "Eppendorf"),
    ("2023-01-11T14:24:00+01:00", "Eppendorf"),
    ("2023-01-11T17:25:00+01:00", "Eppendorf"),
    ("2023-01-12T12:17:00+01:00", "Eppendorf"),
    ("2023-01-12T23:14:00+01:00", "Stephansplatz"),
    ("2023-01-13T18:13:00+01:00", "Eppendorf"),
    ("2023-01-14T10:13:00+01:00", "Eppendorf"),
    ("2023-01-15T03:42:00+01:00", "Stephansplatz"),
    ("2023-01-15T03:42:00+01:00", "Eppendorf"),
    ("2023-01-15T03:42:00+01:00", "Wandsbek"),
    ("2023-01-15T03:42:00+01:00", "Altona"),
    ("2023-01-15T03:42:00+01:00", "St-Georg"),
    ("2023-01-15T03:42:00+01:00", "Niendorf"),
    ("2023-01-15T03:42:00+01:00", "Jungfernstieg"),
    ("2023-01-15T03:42:00+01:00", "Neustadt"),
    ("2023-01-17T21:14:00+01:00", "Eppendorf"),
    ("2023-01-18T15:13:00+01:00", "Eppendorf"),
    ("2023-01-18T19:16:00+01:00", "Eppendorf"),
    ("2023-01-21T05:16:00+01:00", "Niendorf"),
    ("2023-01-21T23:12:00+01:00", "Eppendorf"),
    ("2023-01-22T16:13:00+01:00", "Eppendorf"),
    ("2023-01-24T05:16:00+01:00", "Eppendorf"),
    ("2023-01-25T11:14:00+01:00", "Eppendorf"),
    ("2023-01-25T14:20:00+01:00", "Eppendorf"),
    ("2023-01-26T15:12:00+01:00", "Eppendorf"),
    ("2023-01-29T03:45:00+01:00", "Stephansplatz"),
    ("2023-01-29T03:45:00+01:00", "Eppendorf"),
    ("2023-01-29T03:45:00+01:00", "Wandsbek"),
    ("2023-01-29T03:45:00+01:00", "Altona"),
    ("2023-01-29T03:45:00+01:00", "St-Georg"),
    ("2023-01-29T03:45:00+01:00", "Niendorf"),
    ("2023-01-29T03:45:00+01:00", "Jungfernstieg"),
    ("2023-01-29T03:45:00+01:00", "Neustadt"),
    ("2023-02-01T12:12:00+01:00", "Eppendorf"),
    ("2023-02-02T07:17:00+01:00", "Stephansplatz"),
    ("2023-02-02T07:17:00+01:00", "Wandsbek"),
]

for i, c in ignore:
    df.at[i, c] = np.nan

In [None]:
list(
    map(
        lambda tpl: (tpl[0].isoformat(), tpl[1]),
        df.stack()
        .to_frame("occupancy")
        .query("occupancy == 100")
        .index.to_frame()
        .itertuples(index=False),
    )
)

In [None]:
import matplotlib.dates as mdates

plt.figure(figsize=(10, 5), dpi=200)
sns.lineplot(df)
locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
plt.gca().xaxis.set_major_locator(locator)
plt.gca().xaxis.set_major_formatter(formatter)

In [None]:
plt.figure(figsize=(10, 5), dpi=200)
sns.lineplot(df.resample("H").mean().interpolate())
locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
plt.gca().xaxis.set_major_locator(locator)
plt.gca().xaxis.set_major_formatter(formatter)

In [None]:
hour_min, hour_max = 8, 22
plt.figure(figsize=(10, 5), dpi=200)
sns.lineplot(
    df[df.index.hour.to_series(index=df.index).between(hour_min, hour_max)]
    .resample("D")
    .mean()
    .interpolate()
)
plt.title(f"Mean occupancy between {hour_min} and {hour_max}")
locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
plt.gca().xaxis.set_major_locator(locator)
plt.gca().xaxis.set_major_formatter(formatter)

In [None]:
plt.figure(figsize=(10, 5), dpi=200)
sns.lineplot(df.resample("D").max().interpolate())
plt.title(f"Max occupancy")
locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
plt.gca().xaxis.set_major_locator(locator)
plt.gca().xaxis.set_major_formatter(formatter)

In [None]:
hour = 17
plt.figure(figsize=(10, 5), dpi=200)
sns.lineplot(df.query("index.dt.hour >= @hour").resample("D").max().interpolate())
plt.title(f"Max occupancy after {hour}")
locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
plt.gca().xaxis.set_major_locator(locator)
plt.gca().xaxis.set_major_formatter(formatter)

In [None]:
df_resampled = df.resample("H").mean().interpolate().dropna()

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np


def lineplot_with_colorbar(**kwargs):
    palette = kwargs.pop("palette", "viridis")
    cmap = plt.get_cmap(palette)
    hue = kwargs.get("hue")
    ax = sns.lineplot(**kwargs, palette=cmap)
    cbar = ax.figure.colorbar(
        mpl.cm.ScalarMappable(
            norm=mpl.colors.Normalize(
                vmin=df[hue].min(), vmax=df[hue].max(), clip=False
            ),
            cmap=cmap,
        ),
        ticks=np.arange(df[hue].min(), df[hue].max() + 1),
        ax=ax,
    )
    # cbar.ax.invert_yaxis()  # optionally invert the yaxis of the colorbar
    ax.legend_.remove()  # for testing purposes don't yet remove the legend
    return ax

In [None]:
import seaborn.objects as so


def seasonality_plot(df: pd.DataFrame, title: str) -> None:
    df = df.copy()
    df["day"] = df.index.day_name()
    df["week"] = df.index.isocalendar().week.astype(int)
    df["hour"] = df.index.hour
    p = (
        so.Plot(
            data=df,
            x="hour",
            y="occupancy",
            color="day",
            linestyle="week",
        )
        .add(so.Lines())
        .scale(color="tab10")
        .layout(size=(10, 5))
        .label(title=title)
    )
    display(p)


for c in df_resampled:
    df_plot = df_resampled[c].to_frame().rename(columns={c: "occupancy"})
    seasonality_plot(df_plot, c)
    plt.show()

In [None]:
for c in df_resampled:
    df_plot = df_resampled[c].to_frame().rename(columns={c: "occupancy"})
    seasonality_plot(df_plot[df_plot.index.day_of_week < 5].copy(), f"{c} Arbeitstag")
    plt.show()

In [None]:
for c in df_resampled:
    df_plot = df_resampled[c].to_frame().rename(columns={c: "occupancy"})
    seasonality_plot(df_plot[df_plot.index.day_of_week >= 5].copy(), f"{c} Wochenende")
    plt.show()