In [None]:
# Cell 0: Imports & helper functions

# If needed in Colab:
# !pip install --quiet openpyxl statsmodels plotly

import os
from datetime import timedelta

import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.io as pio

# -----------------------------------------------------------------------------
# HTML / Plot saving helpers
# -----------------------------------------------------------------------------

# Global registries for L1 and L3 plots
ALL_L1_FIGS = []   # list of tuples: (name, fig)
ALL_L3_FIGS = []   # list of tuples: (name, fig)


def register_fig(fig, name, model="L1"):
    """
    Register a plot for later saving / combined HTML export.

    Parameters
    ----------
    fig : plotly.graph_objects.Figure
    name : str
        A unique, descriptive name (used for filenames).
    model : str
        "L1" or "L3" – decides which global list to append to.
    """
    entry = (name, fig)
    if model.upper() == "L3":
        ALL_L3_FIGS.append(entry)
    else:
        ALL_L1_FIGS.append(entry)
    fig.show()


def ensure_dir(path):
    """Create directory if it doesn't exist."""
    os.makedirs(path, exist_ok=True)


def save_plot_html(fig, filename, output_dir="plots"):
    """
    Save a single Plotly figure to an HTML file.

    Parameters
    ----------
    fig : plotly.graph_objects.Figure
    filename : str
        File name WITHOUT directory (e.g. 'daily_error_L1.html')
    output_dir : str
        Directory where file will be saved.
    """
    ensure_dir(output_dir)
    filepath = os.path.join(output_dir, filename)
    fig.write_html(filepath, include_plotlyjs="cdn", full_html=True)
    print(f"Saved: {filepath}")


def save_all_plots_individual(output_dir="plots"):
    """
    Save all registered L1 and L3 figures as individual HTML files.
    Uses their registered names as filenames.
    """
    ensure_dir(output_dir)

    for name, fig in ALL_L1_FIGS:
        fname = f"L1_{name}.html"
        save_plot_html(fig, fname, output_dir=output_dir)

    for name, fig in ALL_L3_FIGS:
        fname = f"L3_{name}.html"
        save_plot_html(fig, fname, output_dir=output_dir)


def save_combined_html(fig_list, output_file, title="Combined Plots"):
    """
    Combine multiple Plotly figures into a single HTML file.

    Parameters
    ----------
    fig_list : list of (name, fig)
    output_file : str
        Path to output HTML file.
    title : str
        Title of the combined HTML document.
    """
    ensure_dir(os.path.dirname(output_file) or ".")
    html = [f"<html><head><title>{title}</title></head><body>"]

    for i, (name, fig) in enumerate(fig_list, start=1):
        html.append(f"<h2>{i}. {name}</h2>")
        html_fragment = pio.to_html(
            fig,
            include_plotlyjs=(i == 1),  # include JS only once
            full_html=False
        )
        html.append(html_fragment)
        html.append("<hr>")

    html.append("</body></html>")

    with open(output_file, "w", encoding="utf-8") as f:
        f.write("".join(html))

    print(f"Combined HTML saved to: {output_file}")


def save_combined_L1_L3(output_dir="plots"):
    """
    Save combined HTML for all L1 plots and all L3 plots separately.
    """
    ensure_dir(output_dir)

    l1_path = os.path.join(output_dir, "L1_all_plots_combined.html")
    save_combined_html(
        ALL_L1_FIGS,
        output_file=l1_path,
        title="All L1 Plots"
    )

    l3_path = os.path.join(output_dir, "L3_all_plots_combined.html")
    save_combined_html(
        ALL_L3_FIGS,
        output_file=l3_path,
        title="All L3 Plots"
    )


In [None]:
# Cell 1: Read Excel and parse time, compute alpha_L2, alpha_L1, and errors

time_col = "Time"       # adjust if different
actual_col = "Actual"   # adjust if different
pred_col = "Predicted"  # adjust if different
sheet_name = 0          # or sheet name string

df = pd.read_excel('alpha_prediction_metal.xlsx', sheet_name=sheet_name, engine='openpyxl')

# Parse time column to datetime (adjust format if needed)
df[time_col] = pd.to_datetime(df[time_col])

# Keep only necessary cols and drop NA in both
df = df[[time_col, actual_col, pred_col]].dropna(subset=[time_col, actual_col, pred_col])

# Sort by time
df = df.sort_values(time_col).reset_index(drop=True)

# Handle duplicate timestamps before setting index
df = df.drop_duplicates(subset=[time_col], keep='first')

# Set datetime index
df = df.set_index(time_col)

# OPTIONAL: reindex to a continuous hourly index (fills missing hours with NaN)
start = df.index.min().floor('h')
end   = df.index.max().ceil('h')
full_idx = pd.date_range(start=start, end=end, freq='h')
df = df.reindex(full_idx)

# If any numeric columns have NaN due to missing hours, you can:
# 1) leave them as NaN (some diagnostics will skip), or
# 2) interpolate: df[actual_col] = df[actual_col].interpolate(limit=6)
# Choose based on whether interpolation is appropriate.
df[actual_col] = pd.to_numeric(df[actual_col], errors='coerce')
df[pred_col]   = pd.to_numeric(df[pred_col], errors='coerce')

# Remove datapoints where both Actual and Predicted are zero
df = df[(df[actual_col] > 1)]

print("Data range:", df.index.min(), "to", df.index.max())
print("Rows:", len(df))
df.head()


# -------------------------------------------------------------------
# Alpha estimation helpers (from your notebook, cleaned)
# -------------------------------------------------------------------
def alpha_l2(actual, pred):
    """
    L2-optimal scalar alpha solving min ||a - αp||_2^2 :
    α = Σ(a_i p_i) / Σ(p_i^2)
    """
    mask = (~np.isnan(actual)) & (~np.isnan(pred))
    a = actual[mask]
    p = pred[mask]
    denom = np.sum(p ** 2)
    if denom == 0:
        return np.nan
    return np.sum(a * p) / denom


def alpha_l1_weighted_median(actual, pred):
    """
    L1-optimal scalar alpha (weighted median on a_i / p_i, weights |p_i|).
    """
    mask = (~np.isnan(actual)) & (~np.isnan(pred)) & (pred != 0)
    a = actual[mask]
    p = pred[mask]
    if len(a) == 0:
        return np.nan

    ratios = a / p
    weights = np.abs(p)

    # sort by ratios
    order = np.argsort(ratios)
    ratios_sorted = ratios[order]
    weights_sorted = weights[order]

    cum_w = np.cumsum(weights_sorted)
    half_w = 0.5 * np.sum(weights_sorted)
    idx = np.searchsorted(cum_w, half_w)
    return ratios_sorted[idx]


# Compute scalar alphas
alpha_L2 = alpha_l2(df[actual_col].values, df[pred_col].values)
alpha_L1 = alpha_l1_weighted_median(df[actual_col].values, df[pred_col].values)

print(f"alpha_L2 = {alpha_L2:.6f}")
print(f"alpha_L1 = {alpha_L1:.6f}")

# Scaled predictions
df["ScaledPred_L2"] = df[pred_col] * alpha_L2
df["ScaledPred_L1"] = df[pred_col] * alpha_L1

# L1 error terms
df["Error_L1"] = df[actual_col] - df["ScaledPred_L1"]
df["AbsError_L1"] = df["Error_L1"].abs()
df["FracError_L1"] = df["Error_L1"] / df[actual_col]

# L2 error terms (if needed)
df["Error_L2"] = df[actual_col] - df["ScaledPred_L2"]
df["AbsError_L2"] = df["Error_L2"].abs()
df["FracError_L2"] = df["Error_L2"] / df[actual_col]

print("Actual Mean:", df[actual_col].mean())
print("L1: MAE =", df["AbsError_L1"].mean(),
      "RMSE =", np.sqrt(np.nanmean(df["Error_L1"] ** 2)))
print("L2: MAE =", df["AbsError_L2"].mean(),
      "RMSE =", np.sqrt(np.nanmean(df["Error_L2"] ** 2)))


Data range: 2025-04-01 07:00:00 to 2025-07-31 18:00:00
Rows: 1528
alpha_L2 = 0.663779
alpha_L1 = 0.732446
Actual Mean: 88.03475130890052
L1: MAE = 29.93681850354622 RMSE = 44.507209052341246
L2: MAE = 30.90377731711452 RMSE = 43.17680127801914


In [None]:
## Hourly
import numpy as np
import pandas as pd

# ---------- helpers ----------
def weighted_median_ratio(a, p):
    """
    Weighted median of ratios r = a/p with weights w = |p|.
    Returns np.nan if no valid pairs (or all p==0).
    """
    mask = (~np.isnan(a)) & (~np.isnan(p)) & (p != 0)
    a_valid = a[mask]
    p_valid = p[mask]
    if len(a_valid) == 0:
        return np.nan
    ratios = a_valid / p_valid
    weights = np.abs(p_valid)
    order = np.argsort(ratios)
    ratios_sorted = ratios[order]
    weights_sorted = weights[order]
    cumw = np.cumsum(weights_sorted)
    half = cumw[-1] / 2.0
    idx = np.searchsorted(cumw, half)
    # guard index bounds
    idx = min(max(idx, 0), len(ratios_sorted)-1)
    return float(ratios_sorted[idx])

# ---------- prepare datetime / hour ----------
# If your df index is DatetimeIndex, use that; otherwise convert timestamp column.
if isinstance(df.index, pd.DatetimeIndex):
    df = df.copy()  # avoid modifying original if needed
    df['hour'] = df.index.hour
else:
    # replace 'timestamp' with your datetime column name if needed
    if 'timestamp' in df.columns:
        df['hour'] = pd.to_datetime(df['timestamp']).dt.hour
    else:
        raise ValueError("Dataframe has no DatetimeIndex and no 'timestamp' column. Add one or set index = pd.to_datetime(...).")

# ---------- global (fallback) alphas (optional) ----------
actual = df[actual_col].values
pred   = df[pred_col].values

# global L1 alpha (your function)
global_alpha_l1 = weighted_median_ratio(df[actual_col].values, df[pred_col].values)

# a small safety: if global alpha is nan, compute L2 or simple ratio mean as last resort
if np.isnan(global_alpha_l1):
    # L2 fallback
    def alpha_l2(a, p):
        mask = (~np.isnan(a)) & (~np.isnan(p))
        a_ = a[mask]; p_ = p[mask]
        denom = np.sum(p_**2)
        if denom == 0:
            return np.nan
        return np.sum(a_ * p_) / denom
    global_alpha_l1 = alpha_l2(actual, pred)
    print("Global L1 was NaN; using L2 fallback alpha:", global_alpha_l1)

# ---------- hourly alphas ----------
# Minimum number of valid rows per hour to trust the hourly alpha
MIN_SAMPLES_PER_HOUR = 10

def compute_hour_alpha(group):
    # group is a DataFrame slice for one hour
    a = group[actual_col].values
    p = group[pred_col].values
    alpha = weighted_median_ratio(a, p)
    # check sample count (non-nan and pred!=0)
    valid_count = np.sum((~np.isnan(a)) & (~np.isnan(p)) & (p != 0))
    if valid_count < MIN_SAMPLES_PER_HOUR or np.isnan(alpha):
        return np.nan, int(valid_count)
    return float(alpha), int(valid_count)

# apply groupby
hour_results = {}
hour_counts = {}
for h, group in df.groupby('hour'):
    alpha_h, cnt = compute_hour_alpha(group)
    hour_results[h] = alpha_h
    hour_counts[h] = cnt

hour_alpha = pd.Series(hour_results).sort_index()   # index 0..23 (some may be missing)
hour_counts = pd.Series(hour_counts).sort_index()

# fill missing hours (0..23) with NaN
all_hours = pd.Series(index=np.arange(24), dtype=float)
all_hours.update(hour_alpha)
hour_alpha = all_hours

# fallback: where hourly alpha is NaN, use global_alpha_l1
hour_alpha_filled = hour_alpha.fillna(global_alpha_l1)

# map back to df
df['alpha_hourly_L1'] = df['hour'].map(hour_alpha_filled)

# ---------- scaled preds and errors ----------
df['ScaledPred_hourly_L1'] = df[pred_col] * df['alpha_hourly_L1']
df['Error_hourly_L1'] = df[actual_col] - df['ScaledPred_hourly_L1']
df['AbsError_hourly_L1'] = df['Error_hourly_L1'].abs()

# overall and per-hour MAE
overall_mae = df['AbsError_hourly_L1'].mean()
hourly_mae = df.groupby('hour')['AbsError_hourly_L1'].mean().reindex(np.arange(24))

# print summary
print(f"Global fallback alpha (L1): {global_alpha_l1:.6f}")
print("\nHour | Alpha(L1) | ValidSamples | Hourly MAE")
for h in range(24):
    a = hour_alpha.loc[h]
    a_f = hour_alpha_filled.loc[h]
    cnt = hour_counts.get(h, 0)
    mae_h = hourly_mae.loc[h] if not np.isnan(hourly_mae.loc[h]) else np.nan
    print(f"{h:02d}   {a if (not np.isnan(a)) else '   NaN':>9}  -> used {a_f:.6f}   samples={cnt:3d}    MAE={mae_h if not np.isnan(mae_h) else 'NaN'}")

print(f"\nOverall MAE using hourly-L1 alphas = {overall_mae:.6f}")

# optional: show table of hour alphas and counts
hour_alpha_table = pd.DataFrame({
    'alpha_raw': hour_alpha,
    'alpha_used': hour_alpha_filled,
    'valid_count': hour_counts.reindex(np.arange(24)).fillna(0).astype(int),
    'hourly_mae': hourly_mae
})
display(hour_alpha_table)

# If you want to export per-row scaled predictions:
# df[['ScaledPred_hourly_L1','Error_hourly_L1','AbsError_hourly_L1']].to_csv('scaled_preds_hourly.csv', index=True)


Global fallback alpha (L1): 0.732446

Hour | Alpha(L1) | ValidSamples | Hourly MAE
00         NaN  -> used 0.732446   samples=  0    MAE=NaN
01         NaN  -> used 0.732446   samples=  0    MAE=NaN
02         NaN  -> used 0.732446   samples=  0    MAE=NaN
03         NaN  -> used 0.732446   samples=  0    MAE=NaN
04         NaN  -> used 0.732446   samples=  0    MAE=NaN
05         NaN  -> used 0.732446   samples=  0    MAE=NaN
06         NaN  -> used 0.732446   samples=  0    MAE=1.791086956521739
07   0.803920343555145  -> used 0.803920   samples=106    MAE=5.600242364108181
08   0.5728918869799947  -> used 0.572892   samples=111    MAE=13.903251117817467
09   0.6559215405447842  -> used 0.655922   samples=114    MAE=20.96831056064342
10   0.7118885151577543  -> used 0.711889   samples=117    MAE=29.24696810961231
11   0.73479298341201  -> used 0.734793   samples=116    MAE=33.86566972847047
12   0.7464119595357956  -> used 0.746412   samples=117    MAE=44.18118108510617
13   0.748971

Unnamed: 0,alpha_raw,alpha_used,valid_count,hourly_mae
0,,0.732446,0,
1,,0.732446,0,
2,,0.732446,0,
3,,0.732446,0,
4,,0.732446,0,
5,,0.732446,0,
6,,0.732446,0,1.791087
7,0.80392,0.80392,106,5.600242
8,0.572892,0.572892,111,13.903251
9,0.655922,0.655922,114,20.968311


In [None]:
# Cell 2: Build L3 (hourly alpha) predictions and errors

# Create hour column (0–23)
df["hour"] = df.index.hour

# Suppose you already computed an hourly alpha series: hour_alpha_filled
# If you already have hour_alpha_filled from your previous logic, reuse it.
# Here we put a placeholder example (copy L1 alpha for all hours or
# replace with your real hourly alpha mapping).

# Placeholder: use same alpha for all hours (replace with your logic)
hour_alpha_filled = hour_alpha_table['alpha_used']

df["alpha_L3"] = df["hour"].map(hour_alpha_filled)

# L3 prediction and errors
df["Pred_L3"] = df[pred_col] * df["alpha_L3"]
df["Error_L3"] = df[actual_col] - df["Pred_L3"]
df["AbsError_L3"] = df["Error_L3"].abs()
df["FracError_L3"] = df["Error_L3"] / df[actual_col]

print("L3: MAE =", df["AbsError_L3"].mean(),
      "RMSE =", np.sqrt(np.nanmean(df["Error_L3"] ** 2)))


L3: MAE = 28.889906119736086 RMSE = 44.52192120643279


In [None]:
# Cell 3: Hourly analysis – ONLY these 2 plots (for L1 & L3)

def plot_hourly_mean_std_error(df, error_col, model_label="L1"):
    df_hour = df.copy()
    df_hour["hour"] = df_hour.index.hour
    hour_stats = df_hour.groupby("hour")[error_col].agg(["mean", "std"]).reset_index()

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=hour_stats["hour"],
        y=hour_stats["mean"],
        error_y=dict(type="data", array=hour_stats["std"]),
        mode="markers+lines",
        name=f"Mean ± Std ({model_label})"
    ))

    fig.update_layout(
        title=f"Mean ± STD of Error by Hour of Day ({model_label})",
        xaxis_title="Hour of Day (0–23)",
        yaxis_title="Error (Actual - Pred)",
        template="plotly_white"
    )

    return fig


def plot_hourly_actual_vs_pred(df, pred_col, model_label="L1"):
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df[actual_col],
        mode="lines",
        name="Actual"
    ))
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df[pred_col],
        mode="lines",
        name=f"Predicted ({model_label})"
    ))
    fig.update_layout(
        title=f"Time Series – Actual vs Predicted ({model_label})",
        xaxis_title="Time",
        yaxis_title="Value",
        hovermode="x unified",
        template="plotly_white"
    )
    return fig


# ---- L1 hourly plots ----
fig_hour_L1 = plot_hourly_mean_std_error(df, "Error_L1", model_label="L1")
register_fig(fig_hour_L1, "Hourly_Mean_Std_Error", model="L1")

fig_ts_L1 = plot_hourly_actual_vs_pred(df, "ScaledPred_L1", model_label="L1")
register_fig(fig_ts_L1, "Hourly_TimeSeries_Actual_vs_Pred", model="L1")

# ---- L3 hourly plots ----
fig_hour_L3 = plot_hourly_mean_std_error(df, "Error_L3", model_label="L3")
register_fig(fig_hour_L3, "Hourly_Mean_Std_Error", model="L3")

fig_ts_L3 = plot_hourly_actual_vs_pred(df, "Pred_L3", model_label="L3")
register_fig(fig_ts_L3, "Hourly_TimeSeries_Actual_vs_Pred", model="L3")


In [None]:
# Cell 4: Daily analysis (Actual vs Pred, Error, %Error) for L1 & L3
# - Drops outliers where |%Error| > 75%
# - Applies that outlier filter to Signed_Error, Abs_Error, Perc_Error
# - No dots in plots
# - Daily Actual vs Pred curves are smoothed (spline)

def daily_weekly_aggregates(df, pred_col, model_label="L1", freq="D"):
    """
    Aggregate actual and predicted, compute signed, absolute and percentage error
    at daily/weekly level, and treat |%Error| > 75% as outliers.

    freq="D" -> daily, "W" -> weekly
    """
    agg = df[[actual_col, pred_col]].resample(freq).sum()

    # Signed and absolute error
    agg["Signed_Error"] = agg[actual_col] - agg[pred_col]
    agg["Abs_Error"] = agg["Signed_Error"].abs()

    # Percentage error (%)
    agg["Perc_Error"] = np.where(
        agg[actual_col] != 0,
        (agg["Signed_Error"] / agg[actual_col]) * 100,
        0.0
    )

    # --- Outlier handling: consider |%Error| > 75% as outliers ---
    outlier_mask = agg["Perc_Error"].abs() > 100

    # Drop those points from plots & summary by setting to NaN
    # (applies to signed, absolute and percentage error)
    agg.loc[outlier_mask, ["Signed_Error", "Abs_Error", "Perc_Error"]] = np.nan

    return agg


def plot_agg_actual_vs_pred(agg, pred_col, model_label, freq_label):
    """
    Smoothed Actual vs Predicted curve (spline), no markers.
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=agg.index,
        y=agg[actual_col],
        mode="lines",              # no markers
        name="Actual",
        line=dict(shape="spline")  # smoothing
    ))
    fig.add_trace(go.Scatter(
        x=agg.index,
        y=agg[pred_col],
        mode="lines",              # no markers
        name=f"Predicted ({model_label})",
        line=dict(shape="spline")  # smoothing
    ))
    fig.update_layout(
        title=f"{freq_label} Actual vs Predicted ({model_label})",
        xaxis_title=f"{freq_label} Date",
        yaxis_title="Value",
        hovermode="x unified",
        template="plotly_white"
    )
    return fig


def plot_agg_error(agg, model_label, freq_label):
    """
    Signed error vs time, line only (no dots).
    Outliers already removed in daily_weekly_aggregates via NaN.
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=agg.index,
        y=agg["Signed_Error"],
        mode="lines",      # no markers
        name=f"Signed Error ({model_label})"
    ))
    fig.add_hline(y=0, line_dash="dash", annotation_text="Zero Line")
    fig.update_layout(
        title=f"{freq_label} Signed Error (Actual - Pred) ({model_label})",
        xaxis_title=f"{freq_label} Date",
        yaxis_title="Error",
        hovermode="x unified",
        template="plotly_white"
    )
    return fig


def plot_agg_perc_error(agg, model_label, freq_label):
    """
    Percentage error vs time, line only (no dots).
    Points with |%Error| > 75% are removed upstream.
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=agg.index,
        y=agg["Perc_Error"],
        mode="lines",      # no markers
        name=f"% Error ({model_label})"
    ))
    fig.add_hline(y=0, line_dash="dash", annotation_text="Zero Line")
    fig.update_layout(
        title=f"{freq_label} Percentage Error (Actual - Pred, %) ({model_label})",
        xaxis_title=f"{freq_label} Date",
        yaxis_title="Percentage Error (%)",
        hovermode="x unified",
        template="plotly_white"
    )
    return fig


# ---------- DAILY (freq="D") for L1 ----------
daily_L1 = daily_weekly_aggregates(df, "ScaledPred_L1", "L1", freq="D")

fig_daily_L1_actual_pred = plot_agg_actual_vs_pred(
    daily_L1, "ScaledPred_L1", "L1", "Daily"
)
register_fig(fig_daily_L1_actual_pred, "Daily_Actual_vs_Pred", model="L1")

fig_daily_L1_err = plot_agg_error(daily_L1, "L1", "Daily")
register_fig(fig_daily_L1_err, "Daily_Signed_Error", model="L1")

fig_daily_L1_perc = plot_agg_perc_error(daily_L1, "L1", "Daily")
register_fig(fig_daily_L1_perc, "Daily_Percentage_Error", model="L1")

print("L1 Daily Mean ABS Error:", daily_L1["Abs_Error"].mean())
print("L1 Daily Mean % Error:", daily_L1["Perc_Error"].mean())


# ---------- DAILY (freq="D") for L3 ----------
daily_L3 = daily_weekly_aggregates(df, "Pred_L3", "L3", freq="D")

fig_daily_L3_actual_pred = plot_agg_actual_vs_pred(
    daily_L3, "Pred_L3", "L3", "Daily"
)
register_fig(fig_daily_L3_actual_pred, "Daily_Actual_vs_Pred", model="L3")

fig_daily_L3_err = plot_agg_error(daily_L3, "L3", "Daily")
register_fig(fig_daily_L3_err, "Daily_Signed_Error", model="L3")

fig_daily_L3_perc = plot_agg_perc_error(daily_L3, "L3", "Daily")
register_fig(fig_daily_L3_perc, "Daily_Percentage_Error", model="L3")

print("L3 Daily Mean ABS Error:", daily_L3["Abs_Error"].mean())
print("L3 Daily Mean % Error:", daily_L3["Perc_Error"].mean())


L1 Daily Mean ABS Error: 141.8075130221354
L1 Daily Mean % Error: -7.031389166039728


L3 Daily Mean ABS Error: 148.64879077333262
L3 Daily Mean % Error: -9.423249333210006


In [None]:
# Cell 5: Weekly analysis (Actual vs Pred, Error, %Error) for L1 & L3
# Uses the same helpers above (daily_weekly_aggregates, plot_agg_*)

# ---------- WEEKLY (freq="W") for L1 ----------
weekly_L1 = daily_weekly_aggregates(df, "ScaledPred_L1", "L1", freq="W")

fig_weekly_L1_actual_pred = plot_agg_actual_vs_pred(
    weekly_L1, "ScaledPred_L1", "L1", "Weekly"
)
register_fig(fig_weekly_L1_actual_pred, "Weekly_Actual_vs_Pred", model="L1")

fig_weekly_L1_err = plot_agg_error(weekly_L1, "L1", "Weekly")
register_fig(fig_weekly_L1_err, "Weekly_Signed_Error", model="L1")

fig_weekly_L1_perc = plot_agg_perc_error(weekly_L1, "L1", "Weekly")
register_fig(fig_weekly_L1_perc, "Weekly_Percentage_Error", model="L1")

print("L1 Weekly Mean ABS Error:", weekly_L1["Abs_Error"].mean())
print("L1 Weekly Mean % Error:", weekly_L1["Perc_Error"].mean())


# ---------- WEEKLY (freq="W") for L3 ----------
weekly_L3 = daily_weekly_aggregates(df, "Pred_L3", "L3", freq="W")

fig_weekly_L3_actual_pred = plot_agg_actual_vs_pred(
    weekly_L3, "Pred_L3", "L3", "Weekly"
)
register_fig(fig_weekly_L3_actual_pred, "Weekly_Actual_vs_Pred", model="L3")

fig_weekly_L3_err = plot_agg_error(weekly_L3, "L3", "Weekly")
register_fig(fig_weekly_L3_err, "Weekly_Signed_Error", model="L3")

fig_weekly_L3_perc = plot_agg_perc_error(weekly_L3, "L3", "Weekly")
register_fig(fig_weekly_L3_perc, "Weekly_Percentage_Error", model="L3")

print("L3 Weekly Mean ABS Error:", weekly_L3["Abs_Error"].mean())
print("L3 Weekly Mean % Error:", weekly_L3["Perc_Error"].mean())


L1 Weekly Mean ABS Error: 695.7550407233512
L1 Weekly Mean % Error: -8.035165560069226


L3 Weekly Mean ABS Error: 771.8309854037483
L3 Weekly Mean % Error: -9.759922488978114


In [None]:
# Cell 6: Save all plots

# 1. Save individual HTMLs with proper naming
save_all_plots_individual(output_dir="plots")

# 2. Save single combined HTML file for all L1 plots
#    and single combined HTML file for all L3 plots
save_combined_L1_L3(output_dir="plots")


Saved: plots/L1_Hourly_Mean_Std_Error.html
Saved: plots/L1_Hourly_TimeSeries_Actual_vs_Pred.html
Saved: plots/L1_Daily_Actual_vs_Pred.html
Saved: plots/L1_Daily_Signed_Error.html
Saved: plots/L1_Daily_Percentage_Error.html
Saved: plots/L1_Weekly_Actual_vs_Pred.html
Saved: plots/L1_Weekly_Signed_Error.html
Saved: plots/L1_Weekly_Percentage_Error.html
Saved: plots/L3_Hourly_Mean_Std_Error.html
Saved: plots/L3_Hourly_TimeSeries_Actual_vs_Pred.html
Saved: plots/L3_Daily_Actual_vs_Pred.html
Saved: plots/L3_Daily_Signed_Error.html
Saved: plots/L3_Daily_Percentage_Error.html
Saved: plots/L3_Weekly_Actual_vs_Pred.html
Saved: plots/L3_Weekly_Signed_Error.html
Saved: plots/L3_Weekly_Percentage_Error.html
Combined HTML saved to: plots/L1_all_plots_combined.html
Combined HTML saved to: plots/L3_all_plots_combined.html
