In [186]:
import polars as pl
import altair as alt
from camminapy.plot import altair_theme
from camminapy.plot import Footer

altair_theme()
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [322]:
df = pl.read_csv("../data/Abiturnoten.csv").with_columns(
    (100 * pl.col("Anzahl") / pl.col("Anzahl").sum().over("Jahr", "Bundesland")).alias(
        "Anteil in Prozent"
    )
)
df_deutschland = (
    df.groupby("Jahr", "Note", maintain_order=True)
    .agg(pl.col("Anzahl").sum(), pl.lit("Gesamt").alias("Bundesland"))
    .with_columns(
        (100 * pl.col("Anzahl") / pl.col("Anzahl").sum().over("Jahr", "Bundesland"))
        .round(1)
        .alias("Anteil in Prozent")
    )
)

df = pl.concat(
    [
        df,
        df_deutschland.select(
            "Jahr", "Note", "Bundesland", "Anzahl", "Anteil in Prozent"
        ),
    ]
).with_columns(
    pl.when(pl.col("Bundesland") == "Gesamt")
    .then("Deutschland")
    .otherwise("Bundesländer")
    .alias("Gruppierung")
)

In [349]:
chart = (
    alt.Chart(
        df,
    )
    .mark_line(clip=True, point=False, size=4)
    .encode(
        x=alt.X("Jahr:Q").scale(domain=(2005.6, 2022.5)).axis(format="d"),
        y=alt.Y("Anteil in Prozent:Q").scale(zero=True),
        color=alt.Color("Gruppierung:N").scale(range=["gray", "blue"]),
        opacity=alt.condition(
            alt.datum["Bundesland"] == "Gesamt", alt.value(1.0), alt.value(0.2)
        ),
        detail="Bundesland:N",
        # color=(
        #     alt.Color("Bundesland:N")
        #     .scale(zero=False, scheme="category20")
        #     .legend(columns=2, symbolLimit=0, labelLimit=0)
        # ),
    )
    .transform_filter(alt.datum["Note"] == 1.0)
    .properties(width=1300, height=700)
    .properties(
        title={
            "text": "Abitur mit Bestnote (1,0)",
            **{
                "subtitle": [
                    "Daten: https://www.kmk.org/dokumentation-statistik/statistik/schulstatistik/abiturnoten.html",
                    "Analyse und Visualisierung: Thomas Camminady",
                ],
                "subtitleFontSize": 8,
                "subtitleFontWeight": "lighter",
                "subtitleColor": "gray",
                "anchor": "middle",
            },
        },
    )
)
background_pre = (
    alt.Chart(pl.DataFrame({"from": [2006, 2019.5], "to": [2019.5, 2022]}))
    .mark_area(
        line={"color": "white"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="white", offset=0),
                alt.GradientStop(color="white", offset=1),
            ],
            x1=0,
            x2=1,
            y1=1,
            y2=1,
        ),
    )
    .encode(
        x=alt.X("from:Q").title("Jahr"),
        x2=alt.X2("to:Q"),
        y=alt.value(0.0),
        y2=alt.value(700),
        opacity=alt.value(0.1),
    )
)
background_post = (
    alt.Chart(pl.DataFrame({"from": [2019.5, 2022], "to": [2022, 2022]}))
    .mark_area(
        line={"color": "white"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="white", offset=0),
                alt.GradientStop(color="black", offset=1),
            ],
            x1=0,
            x2=1,
            y1=1,
            y2=1,
        ),
    )
    .encode(
        x=alt.X("from:Q").title("Jahr"),
        x2=alt.X2("to:Q"),
        y=alt.value(0.0),
        y2=alt.value(700),
        opacity=alt.value(0.1),
    )
)
text1 = (
    alt.Chart(
        pl.DataFrame(
            {
                "Jahr": [2021.9],
                "Anteil in Prozent": [0.2],
                "Text": ["COVID-19 Pandemie"],
            }
        )
    )
    .mark_text(fontSize=18, align="right")
    .encode(x="Jahr:Q", y="Anteil in Prozent:Q", text="Text:N")
)
# background_pre + background_post + chart + text1
(
    chart
    + chart.mark_text(dx=-7, dy=-13, fontSize=14, fontWeight="bold", clip=True)
    .encode(text="Anteil in Prozent:N")
    .transform_filter(alt.datum["Gruppierung"] == "Deutschland")
    + chart.mark_point(size=100, filled=True, clip=True).transform_filter(
        alt.datum["Gruppierung"] == "Deutschland"
    )
)

In [374]:
chart = (
    alt.Chart(
        df.with_columns(
            pl.when(pl.col("Gruppierung") == "Deutschland")
            .then("Germany")
            .otherwise("Individual states")
            .alias("Gruppierung")
        ),
    )
    .mark_line(clip=True, point=False, size=4)
    .encode(
        x=alt.X("Jahr:Q").scale(domain=(2005.6, 2022.5)).axis(format="d").title("Year"),
        y=alt.Y("Anteil in Prozent:Q").scale(zero=True).title("Share in %"),
        color=alt.Color("Gruppierung:N")
        .scale(range=["blue", "gray"])
        .title("Grouping"),
        opacity=alt.condition(
            alt.datum["Gruppierung"] == "Germany", alt.value(1.0), alt.value(0.2)
        ),
        detail="Bundesland:N",
        # color=(
        #     alt.Color("Bundesland:N")
        #     .scale(zero=False, scheme="category20")
        #     .legend(columns=2, symbolLimit=0, labelLimit=0)
        # ),
    )
    .transform_filter(alt.datum["Note"] == 1.0)
    .properties(width=1300, height=700)
    .properties(
        title={
            # "text": "High-school diploma with top grade (1.0)",
            "text": "COVID-19: share of high-school diplomas with top grade up by 78%",
            **{
                "subtitle": [
                    "Numbers up by 78.9% when comparing the years 2019 and 2022. Top grade refers to an Abitur with grade 1.0.",
                    "Data: https://www.kmk.org/dokumentation-statistik/statistik/schulstatistik/abiturnoten.html",
                    "Analysis and visualization: Thomas Camminady",
                ],
                "subtitleFontSize": 8,
                "subtitleFontWeight": "lighter",
                "subtitleColor": "gray",
                "anchor": "middle",
            },
        },
    )
)
background_pre = (
    alt.Chart(pl.DataFrame({"from": [2006, 2019.5], "to": [2019.5, 2022]}))
    .mark_area(
        line={"color": "white"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="white", offset=0),
                alt.GradientStop(color="white", offset=1),
            ],
            x1=0,
            x2=1,
            y1=1,
            y2=1,
        ),
    )
    .encode(
        x=alt.X("from:Q").title("Jahr"),
        x2=alt.X2("to:Q"),
        y=alt.value(0.0),
        y2=alt.value(700),
        opacity=alt.value(0.1),
    )
)
background_post = (
    alt.Chart(pl.DataFrame({"from": [2019.5, 2022], "to": [2022, 2022]}))
    .mark_area(
        line={"color": "white"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="white", offset=0),
                alt.GradientStop(color="black", offset=1),
            ],
            x1=0,
            x2=1,
            y1=1,
            y2=1,
        ),
    )
    .encode(
        x=alt.X("from:Q").title("Jahr"),
        x2=alt.X2("to:Q"),
        y=alt.value(0.0),
        y2=alt.value(700),
        opacity=alt.value(0.1),
    )
)
text1 = (
    alt.Chart(
        pl.DataFrame(
            {
                "Jahr": [2021.9],
                "Anteil in Prozent": [0.2],
                "Text": ["COVID-19 Pandemie"],
            }
        )
    )
    .mark_text(fontSize=18, align="right")
    .encode(x="Jahr:Q", y="Anteil in Prozent:Q", text="Text:N")
)
# background_pre + background_post + chart + text1
(
    chart
    + chart.mark_text(dx=-7, dy=-13, fontSize=14, fontWeight="bold", clip=True)
    .encode(text="Anteil in Prozent:N")
    .transform_filter(alt.datum["Gruppierung"] == "Germany")
    + chart.mark_point(size=100, filled=True, clip=True).transform_filter(
        alt.datum["Gruppierung"] == "Germany"
    )
)

In [368]:
3.4 / 1.9

1.7894736842105263