# Naive Forecasting

A naive forecast uses simple rules to generate predictions. It is the **most important baseline** in time series. If your model does not beat this, stop and rethink.

---

## Mathematical Foundation

### Naive (Last Value) Forecast
The simplest forecast uses the last observed value:

$$\hat{y}_{T+h} = y_T \quad \text{for all } h$$

### Seasonal Naive Forecast
For seasonal data, use the value from the same season in the previous cycle:

$$\hat{y}_{T+h} = y_{T+h-km}$$

where $m$ is the seasonal period and $k = \lceil h/m \rceil$.

### Drift Method
Extends the naive method by allowing the forecast to increase/decrease over time at a constant rate (drift):

$$\hat{y}_{T+h} = y_T + h \cdot \frac{y_T - y_1}{T-1}$$

### Prediction Intervals
For naive forecasts, assuming residuals are normally distributed:

$$\hat{y}_{T+h} \pm z_{\alpha/2} \cdot \hat{\sigma}\sqrt{h}$$

where $\hat{\sigma}$ is the standard deviation of residuals and $z_{\alpha/2}$ is the critical value (e.g., 1.96 for 95% CI).

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sktime.datasets import load_airline

# Reproducibility
np.random.seed(42)

y = load_airline()
y.name = "Passengers"



---

## Low-Level NumPy Implementation

Pure NumPy implementations for understanding the core algorithms.

In [None]:
def naive_forecast(y: np.ndarray, h: int) -> np.ndarray:
    """
    Naive forecast: repeat the last observed value.
    
    Parameters
    ----------
    y : np.ndarray
        Historical time series values
    h : int
        Forecast horizon (number of steps ahead)
    
    Returns
    -------
    np.ndarray
        Forecasted values of length h
    """
    return np.full(h, y[-1])


def seasonal_naive_forecast(y: np.ndarray, h: int, period: int) -> np.ndarray:
    """
    Seasonal naive forecast: use values from the same season in the last cycle.
    
    Parameters
    ----------
    y : np.ndarray
        Historical time series values
    h : int
        Forecast horizon
    period : int
        Seasonal period (e.g., 12 for monthly data with yearly seasonality)
    
    Returns
    -------
    np.ndarray
        Forecasted values of length h
    """
    forecasts = np.zeros(h)
    for i in range(h):
        # Look back by period, cycling if needed
        lookback_idx = len(y) - period + (i % period)
        while lookback_idx < 0:
            lookback_idx += period
        forecasts[i] = y[lookback_idx]
    return forecasts


def drift_forecast(y: np.ndarray, h: int) -> np.ndarray:
    """
    Drift method: naive forecast with linear trend.
    
    Parameters
    ----------
    y : np.ndarray
        Historical time series values
    h : int
        Forecast horizon
    
    Returns
    -------
    np.ndarray
        Forecasted values of length h
    """
    T = len(y)
    drift = (y[-1] - y[0]) / (T - 1)
    return y[-1] + np.arange(1, h + 1) * drift


def naive_prediction_intervals(
    y: np.ndarray, 
    h: int, 
    alpha: float = 0.05
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Compute prediction intervals for naive forecast.
    
    Uses residual-based variance estimation assuming i.i.d. errors.
    Interval width grows with sqrt(h).
    
    Parameters
    ----------
    y : np.ndarray
        Historical time series values
    h : int
        Forecast horizon
    alpha : float
        Significance level (default 0.05 for 95% CI)
    
    Returns
    -------
    tuple
        (point_forecast, lower_bound, upper_bound)
    """
    from scipy import stats
    
    # Point forecast
    point_forecast = naive_forecast(y, h)
    
    # Estimate residual standard deviation using one-step naive residuals
    residuals = y[1:] - y[:-1]
    sigma = np.std(residuals, ddof=1)
    
    # Critical value for confidence level
    z = stats.norm.ppf(1 - alpha / 2)
    
    # Prediction interval width grows with sqrt(h)
    horizons = np.arange(1, h + 1)
    margin = z * sigma * np.sqrt(horizons)
    
    lower = point_forecast - margin
    upper = point_forecast + margin
    
    return point_forecast, lower, upper


# Test the implementations
y_array = y_train.values

print("=== NumPy Implementation Test ===")
print(f"\nNaive forecast (h=3): {naive_forecast(y_array, 3)}")
print(f"Seasonal naive (h=3, period=12): {seasonal_naive_forecast(y_array, 3, 12)}")
print(f"Drift forecast (h=3): {drift_forecast(y_array, 3)}")

---

## Plotly Visualization: Method Comparison

Compare all three naive methods on the airline dataset.

In [None]:
# Generate forecasts using NumPy implementations
h = len(y_test)
y_array = y_train.values

naive_pred = naive_forecast(y_array, h)
seasonal_pred = seasonal_naive_forecast(y_array, h, period=12)
drift_pred = drift_forecast(y_array, h)

# Create comparison visualization
fig = go.Figure()

# Training data
fig.add_trace(go.Scatter(
    x=y_train.index.to_timestamp(),
    y=y_train.values,
    name="Training Data",
    line=dict(color="blue")
))

# Actual test data
fig.add_trace(go.Scatter(
    x=y_test.index.to_timestamp(),
    y=y_test.values,
    name="Actual",
    line=dict(color="black", width=2)
))

# Naive forecast
fig.add_trace(go.Scatter(
    x=y_test.index.to_timestamp(),
    y=naive_pred,
    name="Naive (Last Value)",
    line=dict(color="red", dash="dash")
))

# Seasonal naive forecast
fig.add_trace(go.Scatter(
    x=y_test.index.to_timestamp(),
    y=seasonal_pred,
    name="Seasonal Naive",
    line=dict(color="green", dash="dot")
))

# Drift forecast
fig.add_trace(go.Scatter(
    x=y_test.index.to_timestamp(),
    y=drift_pred,
    name="Drift Method",
    line=dict(color="purple", dash="dashdot")
))

fig.update_layout(
    title="Comparison of Naive Forecasting Methods",
    xaxis_title="Date",
    yaxis_title="Passengers",
    hovermode="x unified",
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
)
fig.show()

# Calculate errors for each method
from sktime.performance_metrics.forecasting import mean_absolute_error, mean_squared_error

print("\n=== Method Comparison (MAE / RMSE) ===")
print(f"Naive (Last):     MAE={mean_absolute_error(y_test, naive_pred):.2f}, RMSE={mean_squared_error(y_test, naive_pred, square_root=True):.2f}")
print(f"Seasonal Naive:   MAE={mean_absolute_error(y_test, seasonal_pred):.2f}, RMSE={mean_squared_error(y_test, seasonal_pred, square_root=True):.2f}")
print(f"Drift:            MAE={mean_absolute_error(y_test, drift_pred):.2f}, RMSE={mean_squared_error(y_test, drift_pred, square_root=True):.2f}")

---

## Prediction Intervals Fan Chart

Visualize forecast uncertainty with expanding prediction intervals.

In [None]:
# Generate prediction intervals at multiple confidence levels
point_fc, lower_95, upper_95 = naive_prediction_intervals(y_array, h, alpha=0.05)
_, lower_80, upper_80 = naive_prediction_intervals(y_array, h, alpha=0.20)
_, lower_50, upper_50 = naive_prediction_intervals(y_array, h, alpha=0.50)

forecast_dates = y_test.index.to_timestamp()

# Create fan chart
fig = go.Figure()

# Historical data
fig.add_trace(go.Scatter(
    x=y_train.index.to_timestamp(),
    y=y_train.values,
    name="Historical",
    line=dict(color="blue")
))

# 95% prediction interval (outermost)
fig.add_trace(go.Scatter(
    x=list(forecast_dates) + list(forecast_dates[::-1]),
    y=list(upper_95) + list(lower_95[::-1]),
    fill="toself",
    fillcolor="rgba(255,0,0,0.1)",
    line=dict(color="rgba(255,0,0,0)"),
    name="95% PI",
    showlegend=True
))

# 80% prediction interval
fig.add_trace(go.Scatter(
    x=list(forecast_dates) + list(forecast_dates[::-1]),
    y=list(upper_80) + list(lower_80[::-1]),
    fill="toself",
    fillcolor="rgba(255,0,0,0.2)",
    line=dict(color="rgba(255,0,0,0)"),
    name="80% PI",
    showlegend=True
))

# 50% prediction interval (innermost)
fig.add_trace(go.Scatter(
    x=list(forecast_dates) + list(forecast_dates[::-1]),
    y=list(upper_50) + list(lower_50[::-1]),
    fill="toself",
    fillcolor="rgba(255,0,0,0.3)",
    line=dict(color="rgba(255,0,0,0)"),
    name="50% PI",
    showlegend=True
))

# Point forecast
fig.add_trace(go.Scatter(
    x=forecast_dates,
    y=point_fc,
    name="Point Forecast",
    line=dict(color="red", width=2)
))

# Actual values
fig.add_trace(go.Scatter(
    x=forecast_dates,
    y=y_test.values,
    name="Actual",
    line=dict(color="black", width=2, dash="dot")
))

fig.update_layout(
    title="Naive Forecast with Prediction Intervals (Fan Chart)",
    xaxis_title="Date",
    yaxis_title="Passengers",
    hovermode="x unified",
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
)
fig.show()

# Coverage analysis
actual = y_test.values
coverage_95 = np.mean((actual >= lower_95) & (actual <= upper_95)) * 100
coverage_80 = np.mean((actual >= lower_80) & (actual <= upper_80)) * 100
coverage_50 = np.mean((actual >= lower_50) & (actual <= upper_50)) * 100

print("\n=== Prediction Interval Coverage ===")
print(f"95% PI coverage: {coverage_95:.1f}% (expected: 95%)")
print(f"80% PI coverage: {coverage_80:.1f}% (expected: 80%)")
print(f"50% PI coverage: {coverage_50:.1f}% (expected: 50%)")

## Train/Test split and forecasting horizon


In [None]:
from sktime.forecasting.model_selection import temporal_train_test_split, ForecastingHorizon

y_train, y_test = temporal_train_test_split(y, test_size=24)
fh = ForecastingHorizon(y_test.index, is_relative=False)



## Fit the model


In [None]:
from sktime.forecasting.naive import NaiveForecaster

model = NaiveForecaster(strategy="last")
model.fit(y_train)
pred = model.predict(fh)



## Visualize forecast


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=y_train.index.to_timestamp(), y=y_train, name="Train"))
fig.add_trace(go.Scatter(x=y_test.index.to_timestamp(), y=y_test, name="Test"))
fig.add_trace(go.Scatter(x=pred.index.to_timestamp(), y=pred, name="Forecast"))
fig.update_layout(title="Naive forecast vs actual")
fig

## Evaluate


In [None]:
from sktime.performance_metrics.forecasting import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test, pred)
rmse = mean_squared_error(y_test, pred, square_root=True)
print(f"MAE: {mae:.3f}, RMSE: {rmse:.3f}")


## When to use

- **Naive** is surprisingly strong for short horizons when the series is persistent.
- **Seasonal naive** is a must‑beat baseline for seasonal series.

These are also excellent debugging tools: if your evaluation is *worse* than naive, you likely have leakage or mis‑specification.
