In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df_hours = pd.read_csv("hours_exploded.csv")
df_names = pd.read_csv("names.csv")

# Add family column
df_names = df_names.assign(
    Familie=np.where(
        (df_names["Nachname"] == df_names["Nachname (1)"])
        | df_names["Nachname (1)"].isna(),
        df_names["Nachname"],
        df_names["Nachname"] + " + " + df_names["Nachname (1)"],
    )
).assign(alleinerziehend=lambda x: x["Nachname (1)"].isna())

In [None]:
hours_dict = df_hours.groupby("wer?_id")["Stunden"].sum().to_dict()

In [None]:
df_names.head()

In [None]:
df_names.query("alleinerziehend")

In [None]:
# Time Targets: 
# 1 Kind = 102
# 2 Kinder = 132
# 3 Kinder = ?
# Alleinerziehende:
# 1 Kind = 50
# 2 Kinder = 60

target_hours_dict = {
    (False, 1): 102,
    (False, 2): 132,
    (False, 3): 132, # to be determined
    (True, 1): 50,
    (True, 2): 60,
}
children_count = df_names.groupby("Familie")["Vorname"].count().sort_values().to_dict()

In [None]:
family_hours = (
    df_names[["Familie", "alleinerziehend", "Nextcloudaccount Mutter", "Nextcloudaccount Vater"]]
    .melt(id_vars=["Familie", "alleinerziehend"], value_name="nextcloud_account")
    .drop(columns="variable")
    .assign(n_children=lambda x: x["Familie"].map(children_count))
    .assign(target_hours=lambda x: x.apply(
        lambda row: target_hours_dict[(row["alleinerziehend"], row["n_children"])], axis=1
    ))
    .assign(actual_hours=lambda x: x["nextcloud_account"].map(hours_dict).fillna(0))
    .groupby(["Familie", "alleinerziehend", "target_hours", "n_children"])
    .sum(numeric_only=True)
    .reset_index() 
    .assign(progress =lambda x: x["actual_hours"] / x["target_hours"] * 100)
    .astype({"n_children": int, "progress": int})
    .sort_values(by="progress", ascending=False)
    .reset_index(drop=True)
)
family_hours

In [None]:
from datetime import datetime
start_date = datetime(2025, 9, 1)
today = datetime.now()
elapsed_days = (today - start_date).days
percent_elapsed = np.round(elapsed_days / 365 * 100,1)
percent_elapsed

In [None]:
fig = plt.figure(figsize=(8, 8))
plot_data = family_hours.sort_values(by="progress", ascending=True)
plot_data["okay"] = "no"
plot_data.loc[plot_data["progress"] >= percent_elapsed, "okay"] = "yes"
plot_data.loc[plot_data["progress"] >= 100, "okay"] = "done"
colors = {"yes": "black", "no": "darkred", "done": "green"}
plt.barh(
    plot_data["Familie"],
    plot_data["progress"],
    #labels=plot_data["okay"],
    color=plot_data["okay"].map(colors),
)
plt.axvline(100, color="green", linestyle="-")
plt.axvline(percent_elapsed, color="blue", linestyle="-")
plt.text(percent_elapsed + 2, 0, f"Fortschritt Kita-Jahr: {percent_elapsed}%", 
         verticalalignment='top', color='blue', fontsize=10)
plt.grid(axis="x", alpha=0.2)
plt.xlabel("Stunden in %")
plt.title("Kita Stundenliste")
plt.tight_layout()
plt.show()

In [None]:
category_hours = (
    df_hours.groupby("Kategorie")
    .sum(numeric_only=True)["Stunden"]
    .sort_values(ascending=False)
    .to_frame()
)
category_hours

In [None]:
plt.pie(category_hours["Stunden"], labels=category_hours.index, autopct="%1.1f%%")
plt.show()