In [None]:
import pandas as pd
import matplotlib.pyplot as plt

github = "https://github.com/mattharrison/datasets/"
url = f"{github}raw/refs/heads/master/data/alta-noaa-1980-2019.csv"
alta_raw = pd.read_csv(url, dtype_backend="pyarrow")
print(alta_raw.loc[:, ["NAME", "DATE", "PRCP", "SNOW", "TOBS", "TMIN"]])


# Applying the CLEAR principles to the plot
from highlight_text import ax_text
from matplotlib import colormaps
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import pandas as pd


def plot_temps(df_):
    plt.rcParams["font.family"] = "Roboto"
    figsize = (160, 165)  # pts

    def points_to_inches(points):
        return points / 72

    figsize_inches = [points_to_inches(dim) for dim in figsize]
    padding = 6  # pts

    heading_fontsize = 9.5
    heading_fontweight = "bold"
    subheading_fontsize = 8
    subheading_fontweight = "normal"
    source_fontsize = 6.5
    source_fontweight = "light"
    axis_fontsize = 7
    axis_fontweight = "normal"
    tick_fontsize = 7
    tick_fontweight = "light"
    gray = "#aaaaaa"
    red = "#e3120b"
    green = "#008000"
    cmap = colormaps.get_cmap("Blues")

    def sort_date(df_):
        idx = df_.index
        idxs = df_.loc["10-":"12-31"].index
        rest_idxs = df_.loc["1-01":"7-31"].index
        print(idx)
        return [*idxs, *rest_idxs]

    layout = [["title"], ["plot"], ["notes"]]

    fig, axs = plt.subplot_mosaic(
        layout,
        figsize=figsize_inches,
        dpi=300,
        gridspec_kw={"height_ratios": [6, 12, 1]},
        constrained_layout=True,
    )

    # ----- Title -----
    ax_title = axs["title"]
    ax_title.axis("off")
    sub_props = {
        "fontsize": subheading_fontsize,
        "fontweight": subheading_fontweight,
    }
    ax_text(
        s="<Alta Ski Resort>\n<Temperature trends by >\n"
        "<decade>< and ><last year>",
        x=0,
        y=0,
        fontsize=heading_fontsize,
        ax=ax_title,
        va="bottom",
        ha="left",
        zorder=5,
        highlight_textprops=[
            {
                "fontsize": heading_fontsize,
                "fontweight": heading_fontweight,
            },
            sub_props,
            {"color": blue, **sub_props},
            sub_props,
            {"color": red, **sub_props},
        ],
    )
    # ----- Plot -----
    ax = axs["plot"]
    season_temps = (
        df_.query('SEASON.str.contains("Ski")')
        .groupby(["DAY_OF_SEASON", "SEASON"])
        .TMEAN.first()
        .unstack()
        # .pipe(debug)
    )
    (
        season_temps
        # using pipe so I can pass in colors the same shape as the columns
        .pipe(
            lambda df_: df_.plot(
                ax=ax,
                color=[
                    cmap(i / len(df_.columns))
                    for i in range(len(df_.columns))
                ],
                legend=False,
                alpha=0.2,
                linewidth=1,
                zorder=1,
            )
        )
    )

    # ---- Decade Averages ----
    decades = [1980, 1990, 2000, 2010]
    blues = ["#000099", "#0000aa", "#0000cc", "#0000ff"]
    matches = ["198", "199", "200", "201"]
    for decade, color, match in zip(decades, blues, matches):
        decade_temps = season_temps.loc[
            :, lambda df_: df_.columns.str.contains(match)
        ].mean(axis="columns")
        decade_temps.plot(ax=ax, color=color, linewidth=1, legend=False)
        # add label to right of line
        ax.text(
            185,
            decade_temps.iloc[-1],
            f"{decade}",
            va="center",
            ha="left",
            fontsize=axis_fontsize,
            fontweight=axis_fontweight,
            color=color,
        )
        # add dot to start and end of each line
        ax.plot(
            decade_temps.index[0],
            decade_temps.iloc[0],
            marker="o",
            color=color,
            markersize=1,
            zorder=2,
        )
        ax.plot(
            decade_temps.index[-1],
            decade_temps.iloc[-1],
            marker="o",
            color=color,
            markersize=1,
            zorder=2,
        )

    # ------ Last year ------
    # plot last year in red
    last_year = season_temps.iloc[:, -1].dropna()
    (last_year.plot(ax=ax, color="red", linewidth=2, legend=False))
    # add label to right of line
    ax.text(
        185 - 10,
        last_year.iloc[-1],
        "2019",
        va="center",
        ha="right",
        fontsize=axis_fontsize,
        fontweight=axis_fontweight,
        color="red",
    )
    # add dot to start and end of each line
    ax.plot(
        last_year.index[0],
        last_year.iloc[0],
        marker="o",
        color="red",
        markersize=2,
        zorder=2,
    )
    ax.plot(
        last_year.index[-1],
        last_year.iloc[-1],
        marker="o",
        color="red",
        markersize=2,
        zorder=2,
    )
    # ------ Ticks & Lines ------
    # remove spines
    for side in ["top", "left", "right"]:
        ax.spines[side].set_visible(False)
    # add horizontal line at 32F
    ax.axhline(32, color="black", linestyle="--", linewidth=1, zorder=1)
    # set y ticks
    ax.set_yticks(ticks=[10, 32, 40])
    # set x label
    ax.set_xlabel(
        "Day of season", fontsize=axis_fontsize, fontweight=axis_fontweight
    )

    # ------ Source ------
    ax_notes = axs["notes"]
    # add source
    ax_notes.axis("off")
    ax_notes.text(
        0,
        0,
        "Source: NOAA",
        fontsize=source_fontsize,
        fontweight=source_fontweight,
        color=gray,
    )
    return ax


black = "#000000"
blue = "#0000ff"
red = "#ff0000"


def get_season(df_):
    return (
        pd.Series("Summer ", index=df_.index)
        .case_when(
            [
                (
                    (df_.DATE.dt.month <= 4) | (df_.DATE.dt.month >= 11),
                    "Ski ",
                )
            ]
        )
        .add(
            pd.Series(df_.DATE.dt.year.astype(str), index=df_.index).where(
                df_.DATE.dt.month < 11,
                (df_.DATE.dt.year).add(1).astype(str),
            )
        )
    )


def add_day_of_season(df_):
    return (
        df_.groupby("SEASON").DATE.transform(lambda s: s - s.min()).dt.days
    )


def debug(df_):
    display(df_)
    return df_


(
    alta_raw.assign(
        DATE=lambda df_: pd.to_datetime(df_["DATE"]),
        SEASON=get_season,
        DAY_OF_SEASON=add_day_of_season,
    )
    .set_index("DATE")
    .assign(
        TOBS=lambda df_: df_.TOBS.interpolate(),
        TMEAN=lambda df_: df_.TOBS.rolling(28, center=True).mean(),
    )
    .pipe(plot_temps)
)