In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.

from io import StringIO
from zipfile import ZipFile

import pandas as pd

## What is wrong?

I believe, that contrary to what is states on https://archive.ics.uci.edu/ml/datasets/PEMS-SF,
the weekday encoding is not 1=Monday to 7=Sunday, but American calendar style 1=Sunday to 7=Saturday.

This is because, if we use the former, than the decoded labels do not agree with the supposed dates!
I was able to reverse engineer what I think are the correct dates.

## Load the labels

In [None]:
def _reformat(s: str, replacements: dict) -> str:
    r"""Replace multiple substrings via dict.

    References:
        https://stackoverflow.com/a/64500851/9318372
    """
    *_, result = (s := s.replace(c, r) for c, r in replacements.items())  # noqa: F841
    return result


with ZipFile("PEMS-SF.zip") as archive:
    with archive.open("randperm") as file:
        content = file.read().decode("utf8")
        content = _reformat(content, {"[": "", "]": "", " ": "\n"})
        randperm = pd.read_csv(
            StringIO(content),
            names=["randperm"],
            dtype="uint16",
        ).squeeze()
        randperm -= 1  # python uses 0-based indexing
        invperm = randperm.copy().argsort()

    with archive.open("PEMS_trainlabels") as file:
        content = file.read().decode("utf8")
        content = _reformat(content, {"[": "", "]": "\n", " ": "\n"})
        trainlabels = pd.read_csv(
            StringIO(content), names=["label"], dtype="uint8"
        ).squeeze()

    with archive.open("PEMS_testlabels") as file:
        content = file.read().decode("utf8")
        content = _reformat(content, {"[": "", "]": "", " ": "\n"})
        testlabels = pd.read_csv(
            StringIO(content), names=["label"], dtype="uint8"
        ).squeeze()

# compine labels
labels = pd.concat([trainlabels, testlabels])

## Using Anomalies listed by N-BEATS

In [None]:
weekdays = {
    1: "Monday",
    2: "Tuesday",
    3: "Wednesday",
    4: "Thursday",
    5: "Friday",
    6: "Saturday",
    7: "Sunday",
}
anomalies = pd.DatetimeIndex({
    "Jan. 1, 2008": "New Year’s Day",
    "Jan. 21, 2008": "Martin Luther King Jr. Day",
    "Feb. 18, 2008": "Washington’s Birthday",
    "Mar. 9, 2008": "Anomaly day",
    "May 26, 2008": "Memorial Day",
    "Jul. 4, 2008": "Independence Day",
    "Sep. 1, 2008": "Labor Day",
    "Oct. 13, 2008": "Columbus Day",
    "Nov. 11, 2008": "Veterans Day",
    "Nov. 27, 2008": "Thanksgiving",
    "Dec. 25, 2008": "Christmas Day",
    "Jan. 1, 2009": "New Year’s Day",
    "Jan. 19, 2009": "Martin Luther King Jr. Day",
    "Feb. 16, 2009": "Washington’s Birthday",
    "Mar. 8, 2009": "Anomaly day",
})
dates = pd.date_range("2008-01-01", "2009-03-30", freq="d", name="day")

# remove anomalies
valid_dates = dates[~dates.isin(anomalies)]

In [None]:
# compine labels
labels = pd.concat([trainlabels, testlabels])

# apply inverse permutation
labels = labels.iloc[invperm]

# set dates
labels.index = valid_dates

# decode the labels
labels = labels.map(weekdays)

# compare day name with encoded day name
labels = labels.to_frame().assign(actual_weekday=labels.index.day_name())
matches = labels.label == labels.actual_weekday
if all(matches):
    print("All encoded labels match with the day name!")
else:
    print("Mismatches detected!")
    print(labels[~matches])

## Using reverse-engineered dates

In [None]:
weekdays = {
    1: "Sunday",
    2: "Monday",
    3: "Tuesday",
    4: "Wednesday",
    5: "Thursday",
    6: "Friday",
    7: "Saturday",
}

anomalies = pd.DatetimeIndex({
    "2008-01-01": "New Year’s Day",
    "2008-01-21": "Martin Luther King Jr. Day",
    "2008-02-18": "Washington’s Birthday",
    "2008-03-09": "anomaly",
    "2008-05-26": "Memorial Day",
    "2008-07-04": "Independence Day",
    "2008-09-01": "Labor Day",
    "2008-10-20": "???",
    "2008-11-17": "???",
    "2008-12-07": "???",
    "2009-02-23": "???",
})

dates = pd.date_range("2008-01-01", "2009-03-26", freq="d", name="day")

# remove anomalies
valid_dates = dates[~dates.isin(anomalies)]

In [None]:
# compine labels
labels = pd.concat([trainlabels, testlabels])

# apply inverse permutation
labels = labels.iloc[invperm]

# set dates
labels.index = valid_dates

# decode the labels
labels = labels.map(weekdays)

# compare day name with encoded day name
labels = labels.to_frame().assign(actual_weekday=labels.index.day_name())
matches = labels.label == labels.actual_weekday
if all(matches):
    print("All encoded labels match with the day name!")
else:
    print("Mismatches detected!")
    print(labels[~matches])