# Task 6 · Time-Series Decomposition & Typical Profiles

This notebook performs seasonal-trend decomposition of the household demand series, quantifies seasonality strength across multiple horizons, and derives typical demand profiles for reporting and dashboard integration.

In [1]:
from pathlib import Path
import sys
import calendar

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import STL

# Ensure project modules are discoverable regardless of notebook launch directory
ROOT = Path.cwd().resolve()
if not (ROOT / 'src').exists():
    ROOT = ROOT.parent
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from src.plotting import (
    plot_stl_components,
    plot_typical_profiles_weekday_weekend,
    plot_typical_profiles_monthly,
)


In [2]:
pd.options.display.max_rows = 12

FIG_PATH = ROOT / "reports" / "figures"
TABLE_PATH = ROOT / "reports" / "tables"
DATA_PATH = ROOT / "data" / "raw" / "train_252145.csv"

FIG_PATH.mkdir(parents=True, exist_ok=True)
TABLE_PATH.mkdir(parents=True, exist_ok=True)


def save_figure(fig: go.Figure, filename: str, width: int = 1280, height: int = 720, scale: int = 2) -> None:
    '''Persist a Plotly figure to PNG and PDF for the academic report.'''
    png_path = FIG_PATH / f"{filename}.png"
    pdf_path = FIG_PATH / f"{filename}.pdf"
    fig.write_image(str(png_path), width=width, height=height, scale=scale)
    fig.write_image(str(pdf_path), width=width, height=height, scale=scale)


df = pd.read_csv(DATA_PATH, parse_dates=["timestamp"]).sort_values("timestamp")
df["Demand"] = pd.to_numeric(df["Demand"], errors="coerce")
df = df.set_index("timestamp").sort_index()
df = df[~df.index.duplicated(keep="first")]

# Aggregate to hourly cadence and safely interpolate small gaps for decomposition
hourly_demand = df["Demand"].resample("H").mean()
interpolated_points = hourly_demand.isna().sum()
hourly_demand = hourly_demand.interpolate(method="time", limit_direction="both")
hourly_demand = hourly_demand.dropna()

print(
    f"Hourly demand: {hourly_demand.index.min()} → {hourly_demand.index.max()} | "
    f"Observations: {len(hourly_demand):,} | Interpolated gaps: {interpolated_points}"
)


Hourly demand: 2013-07-01 00:00:00+00:00 → 2014-06-30 23:00:00+00:00 | Observations: 8,760 | Interpolated gaps: 1


In [3]:
def run_stl(series: pd.Series, period: int, seasonal: int | None = None) -> tuple[STL, pd.DataFrame]:
    seasonal_kw = {} if seasonal is None else {"seasonal": seasonal}
    stl = STL(series, period=period, robust=True, **seasonal_kw)
    result = stl.fit()
    components = pd.DataFrame(
        {
            "trend": result.trend,
            "seasonal": result.seasonal,
            "resid": result.resid,
        },
        index=series.index,
    )
    return result, components


# Daily (intraday) decomposition on hourly series
stl_daily_result, stl_daily_components = run_stl(hourly_demand, period=24)

# Weekly and annual decomposition on daily-averaged demand for stability

daily_avg_demand = hourly_demand.resample("D").mean()
stl_weekly_result, stl_weekly_components = run_stl(daily_avg_demand, period=7)
stl_annual_result, stl_annual_components = run_stl(daily_avg_demand, period=365, seasonal=31)

stl_daily_components.head()


Unnamed: 0_level_0,trend,seasonal,resid
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-07-01 00:00:00+00:00,0.510776,-0.244343,0.003567
2013-07-01 01:00:00+00:00,0.509786,-0.293644,0.013858
2013-07-01 02:00:00+00:00,0.508822,-0.268554,0.019732
2013-07-01 03:00:00+00:00,0.507883,-0.252718,0.024835
2013-07-01 04:00:00+00:00,0.50697,-0.249343,0.032373


In [4]:
def seasonality_strength(residual: pd.Series, component: pd.Series) -> float:
    resid_var = float(np.nanvar(residual))
    combined_var = float(np.nanvar(residual + component))
    if np.isclose(combined_var, 0.0):
        return np.nan
    strength = 1.0 - resid_var / combined_var
    return float(np.clip(strength, 0.0, 1.0))


strength_records = [
    {"period": "24h", "strength_type": "Seasonal", "value": seasonality_strength(stl_daily_result.resid, stl_daily_result.seasonal)},
    {"period": "24h", "strength_type": "Trend", "value": seasonality_strength(stl_daily_result.resid, stl_daily_result.trend)},
    {"period": "7d", "strength_type": "Seasonal", "value": seasonality_strength(stl_weekly_result.resid, stl_weekly_result.seasonal)},
    {"period": "7d", "strength_type": "Trend", "value": seasonality_strength(stl_weekly_result.resid, stl_weekly_result.trend)},
    {"period": "365d", "strength_type": "Seasonal", "value": seasonality_strength(stl_annual_result.resid, stl_annual_result.seasonal)},
    {"period": "365d", "strength_type": "Trend", "value": seasonality_strength(stl_annual_result.resid, stl_annual_result.trend)},
]

seasonality_strength_df = pd.DataFrame(strength_records).dropna()
seasonality_strength_df["value"] = seasonality_strength_df["value"].round(3)
seasonality_strength_df.to_csv(TABLE_PATH / "seasonality_strength.csv", index=False)
seasonality_strength_df


Unnamed: 0,period,strength_type,value
0,24h,Seasonal,0.242
1,24h,Trend,0.107
2,7d,Seasonal,0.221
3,7d,Trend,0.448
4,365d,Seasonal,1.0


In [5]:
fig_daily_components = plot_stl_components(
    timestamps=stl_daily_components.index,
    trend=stl_daily_components["trend"],
    seasonal=stl_daily_components["seasonal"],
    resid=stl_daily_components["resid"],
    title="Hourly demand STL decomposition (24h period)",
    style="academic",
)

save_figure(fig_daily_components, "demand_stl_components", width=1400, height=900)
fig_daily_components




In [6]:
fig_strength = px.bar(
    seasonality_strength_df,
    x="period",
    y="value",
    color="strength_type",
    title="Seasonality and trend strength across horizons",
    labels={"period": "Seasonality period", "value": "Strength (0-1)", "strength_type": "Component"},
    color_discrete_map={"Seasonal": "#FFA500", "Trend": "#1f77b4"},
    barmode="group",
)
fig_strength.update_layout(
    paper_bgcolor="#ffffff",
    plot_bgcolor="#ffffff",
    font=dict(family="CMU Serif, 'Times New Roman', serif", size=14, color="#222"),
    legend=dict(bgcolor="rgba(255,255,255,0.85)", bordercolor="#d0d0d0", borderwidth=1),
    margin=dict(t=60, r=40, b=60, l=70),
)
fig_strength.update_xaxes(showgrid=True, gridcolor="#e5e5e5")
fig_strength.update_yaxes(showgrid=True, gridcolor="#e5e5e5", range=[0, 1])

save_figure(fig_strength, "demand_seasonality_strength", width=900, height=600)
fig_strength


In [7]:
profiles_df = hourly_demand.to_frame(name="Demand")
profiles_df["hour"] = profiles_df.index.hour
profiles_df["is_weekend"] = profiles_df.index.dayofweek >= 5

weekday_profile = profiles_df.loc[~profiles_df["is_weekend"]].groupby("hour")["Demand"].mean()
weekend_profile = profiles_df.loc[profiles_df["is_weekend"]].groupby("hour")["Demand"].mean()

fig_weekday_weekend = plot_typical_profiles_weekday_weekend(
    weekday=weekday_profile,
    weekend=weekend_profile,
    value_label="Demand (kW)",
    style="academic",
)
fig_weekday_weekend.update_layout(title="Typical hourly demand profile – weekday vs weekend")

save_figure(fig_weekday_weekend, "demand_typical_hourly_weekday_vs_weekend", width=1100, height=650)
fig_weekday_weekend




In [8]:
month_order = list(calendar.month_name[1:])
profiles_df["month"] = pd.Categorical(profiles_df.index.month_name(), categories=month_order, ordered=True)
monthly_profile = (
    profiles_df.groupby(["month", "hour"])["Demand"]
    .mean()
    .reset_index()
    .rename(columns={"Demand": "value"})
)

fig_monthly_profiles = plot_typical_profiles_monthly(
    monthly_profile,
    value_label="Demand (kW)",
    style="academic",
)

save_figure(fig_monthly_profiles, "demand_typical_hourly_by_month", width=1280, height=720)
fig_monthly_profiles




## Interpretation

- **Dominant seasonality.** Daily (24h) seasonality clearly explains the largest share of variance (see the strength table above), with weekly effects present but smaller. Annual seasonality remains modest because the dataset spans only a single year; the metric is informative yet should be interpreted cautiously.
- **Operational insights.** Morning and evening peaks suggest scheduling battery discharge and demand response during those windows. Weekend profiles are flatter, highlighting flexibility for storage pre-charging. Monthly profiles emphasise winter demand increases, guiding tariff-aware scheduling and solar self-consumption planning.
