Cell 1

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Configure Plotly to work in the notebook
import plotly.io as pio
pio.templates.default = "plotly_white"

print("Libraries imported successfully!")

Cell 2

In [None]:
# --- CONFIGURATION ---
MODEL_TO_ANALYZE = 'price'
log_file = f'{MODEL_TO_ANALYZE}_forecast_log.csv'
# ---------------------

# Load the data
df = pd.read_csv(log_file)

# --- MODIFICATION: Convert both timestamp columns ---
df['forecast_target_time'] = pd.to_datetime(df['forecast_target_time'], format='ISO8601')
df['forecast_creation_time'] = pd.to_datetime(df['forecast_creation_time'], format='ISO8601')
df.set_index('forecast_target_time', inplace=True)

# Drop rows where the actual value hasn't been backfilled yet
df.dropna(subset=['actual'], inplace=True)

# Calculate the core metric: the error (or "residual")
df['error'] = df['prediction'] - df['actual']

print(f"Loaded and prepared {len(df)} records for the '{MODEL_TO_ANALYZE}' model.")
df[['forecast_creation_time', 'prediction', 'actual', 'error']].head()

Cell 3

In [None]:
mae = mean_absolute_error(df['actual'], df['prediction'])
rmse = np.sqrt(mean_squared_error(df['actual'], df['prediction']))

print(f"Overall Model Performance ({MODEL_TO_ANALYZE}):")
print(f"-----------------------------------")
print(f"Mean Absolute Error (MAE):   {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

Cell 4

In [None]:
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    subplot_titles=(f'{MODEL_TO_ANALYZE.capitalize()} Forecast vs. Actual', 'Prediction Error (Residuals)'),
                    vertical_spacing=0.1)

# Plot 1: Prediction vs Actual
fig.add_trace(go.Scatter(x=df.index, y=df['actual'], name='Actual', mode='lines',
                         line=dict(color='blue', width=2)), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['prediction'], name='Prediction', mode='lines',
                         line=dict(color='red', dash='dot', width=2)), row=1, col=1)

# Plot 2: Error
fig.add_trace(go.Scatter(x=df.index, y=df['error'], name='Error', mode='lines',
                         line=dict(color='purple', width=1)), row=2, col=1)
fig.add_hline(y=0, line_width=2, line_dash="dash", line_color="green", row=2, col=1)

fig.update_layout(height=600, title_text="Model Performance Over Time")
fig.show()

Cell 5

In [None]:
fig = px.histogram(df, x='error', nbins=100,
                   title=f'Histogram of Prediction Errors for {MODEL_TO_ANALYZE.capitalize()} Model')
fig.update_layout(xaxis_title="Prediction Error (Prediction - Actual)", yaxis_title="Count")
fig.show()

Cell 6

In [None]:
# Create an 'hour' column from the index
df['hour'] = df.index.hour

# Calculate the average absolute error for each hour
hourly_error = df.groupby('hour')['error'].apply(lambda x: np.mean(np.abs(x))).reset_index()

fig = px.bar(hourly_error, x='hour', y='error',
             title=f'Average Absolute Error by Hour of Day ({MODEL_TO_ANALYZE.capitalize()})')
fig.update_layout(xaxis_title="Hour of Day", yaxis_title="Mean Absolute Error")
fig.show()

Cell 7

In [None]:
# Group by model version and calculate metrics for each
performance_by_version = df.groupby('model_version').apply(
    lambda g: pd.Series({
        'MAE': mean_absolute_error(g['actual'], g['prediction']),
        'RMSE': np.sqrt(mean_squared_error(g['actual'], g['prediction'])),
        'record_count': len(g)
    })
).reset_index()

print("--- Model Performance by Version ---")
performance_by_version

Cell 8: Feature Engineering for Advanced Analysis

In [None]:
# --- Step 1: Feature Engineering (on the main DataFrame) ---

# Calculate the forecast horizon (lead time) for each prediction
delta = df.index - df['forecast_creation_time']

# Round the forecast horizon UP to the nearest 30-minute interval
df['forecast_horizon_hours'] = np.ceil(delta.dt.total_seconds() / 1800) / 2

# Extract the time component from the target time (e.g., 14:30:00)
df['target_time_of_day'] = df.index.time

print("Feature engineering complete. 'forecast_horizon_hours' and 'target_time_of_day' are now in the main DataFrame.")


# --- Step 2: Define the SIMPLIFIED Analysis Function ---

def analyze_forecast_accuracy(df_prepared, prediction_col, actual_col, analysis_name):
    """
    Generates and displays a full accuracy analysis (metrics and heatmaps).
    
    NOTE: This function now ASSUMES the input DataFrame already contains
    'forecast_horizon_hours' and 'target_time_of_day' columns.
    """
    # Create a working copy, keeping the essential columns for this analysis
    cols_to_keep = [prediction_col, actual_col, 'forecast_horizon_hours', 'target_time_of_day']
    analysis_df = df_prepared[cols_to_keep].copy()
    
    analysis_df.dropna(subset=[prediction_col, actual_col], inplace=True)

    if analysis_df.empty:
        print(f"\nNo data available for analysis: {analysis_name}. Skipping.")
        return

    # --- Metrics ---
    mae = mean_absolute_error(analysis_df[actual_col], analysis_df[prediction_col])
    rmse = np.sqrt(mean_squared_error(analysis_df[actual_col], analysis_df[prediction_col]))
    print(f"\n--- Performance Analysis for: {analysis_name} ---")
    print(f"MAE: {mae:.4f} | RMSE: {rmse:.4f}")

    # --- Error Calculation (the only feature it creates itself) ---
    analysis_df['error'] = analysis_df[prediction_col] - analysis_df[actual_col]
    
    # --- Heatmap 1: Error Magnitude ---
    heatmap_mae = pd.pivot_table(analysis_df, values='error', index='target_time_of_day',
                                 columns='forecast_horizon_hours', aggfunc=lambda x: np.mean(np.abs(x)))
    fig_mae = px.imshow(heatmap_mae, labels=dict(x="Forecast Horizon (Hours)", y="Time of Day", color="MAE"),
                        title=f"<b>Error Magnitude (MAE) for {analysis_name}</b>", aspect='auto')
    fig_mae.update_yaxes(tickformat='%H:%M', autorange="reversed")
    fig_mae.show()
    
    # --- Heatmap 2: Error Bias ---
    heatmap_bias = pd.pivot_table(analysis_df, values='error', index='target_time_of_day',
                                  columns='forecast_horizon_hours', aggfunc=np.mean)
    fig_bias = px.imshow(heatmap_bias, labels=dict(x="Forecast Horizon (Hours)", y="Time of Day", color="Mean Error"),
                         title=f"<b>Error Bias (Mean Error) for {analysis_name}</b>", aspect='auto',
                         color_continuous_scale='RdBu_r', color_continuous_midpoint=0)
    fig_bias.update_yaxes(tickformat='%H:%M', autorange="reversed")
    fig_bias.show()

print("Analysis function is now defined and ready to use.")

Cell 9: Heatmaps

In [None]:
# --- Run Analysis for the Main Model ---
# The main 'df' DataFrame now has all the required columns from Cell 8.
analyze_forecast_accuracy(df, 'prediction', 'actual', f"{MODEL_TO_ANALYZE.capitalize()} Model")

# --- Run Analysis for the Input Covariates ---
analyze_forecast_accuracy(df, 'power_pv', 'power_pv_actual', "Solcast PV Forecast")
analyze_forecast_accuracy(df, 'temperature_adelaide', 'temperature_adelaide_actual', "BOM Temperature Forecast")
analyze_forecast_accuracy(df, 'humidity_adelaide', 'humidity_adelaide_actual', "BOM Humidity Forecast")
analyze_forecast_accuracy(df, 'wind_speed_adelaide', 'wind_speed_adelaide_actual', "BOM Wind Speed Forecast")