In [16]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# Read data
df = pd.read_parquet("../data/silver/power_load_1m.parquet")

In [17]:
df.shape

(44640, 44)

In [18]:
df.head()

Unnamed: 0,timestamp,avg_load,day_class,workday,year,quarter,month,day,weekday,hour,...,rolling_std_1440m,rolling_max_1440m,rolling_min_1440m,delta_5m,delta_15m,delta_60m,delta_1440m,slope_5m,slope_15m,slope_60m
0,2025-11-28 00:00:00,1912.4,full,2,2025,4,11,28,5,0,...,,,,,,,,,,
1,2025-11-28 00:01:00,1908.233333,full,2,2025,4,11,28,5,0,...,,,,,,,,,,
2,2025-11-28 00:02:00,1643.183333,full,2,2025,4,11,28,5,0,...,,,,,,,,,,
3,2025-11-28 00:03:00,2992.95,full,2,2025,4,11,28,5,0,...,,,,,,,,,,
4,2025-11-28 00:04:00,1106.0,full,2,2025,4,11,28,5,0,...,,,,,,,,,,


In [19]:
nan_load = df["avg_load"].isna().sum()
print(f"Number of NaN values in 'load' column: {nan_load} ({nan_load / len(df) * 100:.2f}%)")
df[df["avg_load"].isna()].head()

Number of NaN values in 'load' column: 40 (0.09%)


Unnamed: 0,timestamp,avg_load,day_class,workday,year,quarter,month,day,weekday,hour,...,rolling_std_1440m,rolling_max_1440m,rolling_min_1440m,delta_5m,delta_15m,delta_60m,delta_1440m,slope_5m,slope_15m,slope_60m
759,2025-11-28 12:39:00,,,,2025,4,11,28,5,12,...,,,,1750.037037,1538.740741,-4755.444444,,158.461111,7.821296,136.958165
6021,2025-12-02 04:21:00,,,,2025,4,12,2,2,4,...,,,,869.433413,-271.472727,222.077273,-322.439394,-49.588963,6.301601,4.312446
7900,2025-12-03 11:40:00,,,,2025,4,12,3,3,11,...,,,,158.866667,2734.266667,1211.633333,-308.15,-63.273333,-113.413036,-12.694768
8011,2025-12-03 13:31:00,,,,2025,4,12,3,3,13,...,,,,3256.11,3604.283333,1460.466667,729.133333,-619.803818,-234.182314,-96.648163
8012,2025-12-03 13:32:00,,,,2025,4,12,3,3,13,...,,,,,,,,,,


In [20]:
def plot_load_data(df, title_suffix):
    # -----------------------------
    # Ensure types
    # -----------------------------
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="raise")
    df = df.sort_values("timestamp")

    # -----------------------------
    # Create day label + seconds-of-day
    # -----------------------------
    df["date"] = df["timestamp"].dt.normalize()  # midnight of that date
    df["second_of_day"] = (df["timestamp"] - df["date"]).dt.total_seconds().astype(int)

    # -----------------------------
    # Select which days to plot (first 6 by date)
    # -----------------------------
    unique_dates = df["date"].drop_duplicates().sort_values()
    selected_dates = unique_dates.iloc[:6]   # like MATLAB [1:6]
    resolution = 1

    fig = go.Figure()

    for d in selected_dates:
        day_df = df[df["date"] == d].sort_values("second_of_day")
        day_df = day_df.iloc[::resolution]

        fig.add_trace(
            go.Scatter(
                x=day_df["second_of_day"].to_numpy() / 3600,
                y=day_df["avg_load"].to_numpy() / 1e3,
                mode="lines",
                name=str(d.date()),
            )
        )

    fig.update_layout(
        title=f"Daily power/demand data over {len(selected_dates)} day(s) - {title_suffix}",
        xaxis_title=f"time [hours in {resolution} sec. resolution]",
        yaxis_title="power/demand [kW]",
        template="plotly_white",
    )

    fig.show()

plot_load_data(df, "1-minute resolution")

In [23]:
profiles = (
    df
    .groupby("weekday")["avg_load"]
    .mean()
    .reset_index()
)

fig = go.Figure()

weekday_labels = {
    0: "Sunday",
    1: "Monday",
    2: "Tuesday",
    3: "Wednesday",
    4: "Thursday",
    5: "Friday",
    6: "Saturday",
}

colors = {
    0: "orange",      # Sunday
    1: "steelblue",
    2: "steelblue",
    3: "steelblue",
    4: "steelblue",
    5: "steelblue",
    6: "orange",      # Saturday
}

for wd in range(7):
    df_wd = profiles[profiles["weekday"] == wd]

    fig.add_trace(
        go.Bar(
            x=[weekday_labels[wd]],
            y=df_wd["avg_load"] / 1e3,               # kW
            name=weekday_labels[wd],
            marker_color=colors[wd],
            text=(df_wd["avg_load"] / 1e3).round(1),
            textposition="inside"
        )
    )

fig.update_layout(
    title="Average Load by Day of Week",
    yaxis_title="Average Power/Demand [kW]",
    template="plotly_white",
    showlegend=False
)

fig.show()


In [7]:
profiles = (
    df
    .groupby("workday")["avg_load"]
    .mean()
    .reset_index()
)


fig = go.Figure()

workday_labels = {
    0: "Non-Workday",
    1: "Half Workday",
    2: "Full Workday"
}

colors = {
    0: "orange",
    1: "steelblue",
    2: "seagreen"
}

for wd in [0, 1, 2]:
    df_wd = profiles[profiles["workday"] == wd]

    fig.add_trace(
        go.Bar(
            x=[workday_labels[wd]],                 # single category
            y=df_wd["avg_load"] / 1e3,               # kW
            name=workday_labels[wd],
            marker_color=colors[wd],
            text=(df_wd["avg_load"] / 1e3).round(1),
            textposition="inside"
        )
    )

fig.update_layout(
    title="Average Load by Workday Type",
    yaxis_title="Average Power/Demand [kW]",
    template="plotly_white",
    showlegend=False,
    width=600,
)

fig.show()


In [56]:
import plotly.graph_objects as go

# Compute average hourly profile per workday type
profiles = (
    df
    .assign(hour=lambda x: x["timestamp"].dt.hour)
    .groupby(["workday", "hour"])["avg_load"]
    .mean()
    .reset_index()
)

fig = go.Figure()

workday_labels = {
    0: "Non-Workday",
    1: "Half Workday",
    2: "Full Workday"
}

colors = {
    0: "orange",
    1: "steelblue",
    2: "seagreen"
}

for wd in [0, 1, 2]:
    df_wd = profiles[profiles["workday"] == wd]
    fig.add_trace(
        go.Bar(
            x=df_wd["hour"],
            y=df_wd["avg_load"] / 1e3,
            name=workday_labels[wd],
            marker_color=colors[wd],
        )
    )

fig.update_layout(
    title="Average Daily Load Profile by Workday Type (1-Minute Resolution)",
    xaxis_title="Hour of Day",
    yaxis_title="Average Power/Demand [kW]",
    barmode="group",
    template="plotly_white",
        legend=dict(
        orientation="h",   # horizontal
        yanchor="bottom",
        y=1.02,
        x =-0.06,

    )
)

fig.show()


In [55]:
profiles = (
    df
    .assign(date=df["timestamp"].dt.date)
    .groupby(["date", "workday"])["avg_load"]
    .mean()
    .reset_index()
)

fig = go.Figure()

workday_labels = {
    0: "Non-Workday",
    1: "Half Workday",
    2: "Full Workday"
}

colors = {
    0: "orange",
    1: "steelblue",
    2: "seagreen"
}

for wd in [0, 1, 2]:
    df_wd = profiles[profiles["workday"] == wd]

    fig.add_trace(
        go.Bar(
            x=df_wd["date"],
            y=df_wd["avg_load"] / 1e3,   # kW
            name=workday_labels[wd],
            marker_color=colors[wd],
        )
    )
fig.update_layout(
    title="Average Daily Load",
    xaxis_title="Date",
    yaxis_title="Average Power/Demand [kW]",
    barmode="group",          # safe even though there’s 1 bar/day
    template="plotly_white",
    legend=dict(
        orientation="h",   # horizontal
        yanchor="bottom",
        y=1.02,
        x =-0.06,

    )
)
fig.update_xaxes(
    tickmode="array",
    tickvals=profiles["date"],
    ticktext=pd.to_datetime(profiles["date"]).dt.strftime("%a %m/%d").tolist(),  # e.g. Mon Jan 05
    tickangle=0
)

fig.update_xaxes(
    tickformat="%b %d",       # e.g. Jan 05
    tickangle=-45
)

fig.show()
