### [Confidence Intervals for Time Series Forecasts](https://medium.com/@kylejones_47003/confidence-intervals-for-time-series-forecasts-95d2d3f81415)

> Best practices for confidence intervals in time series analysis

#### Computing Forecast Confidence Intervals

In [None]:
import warnings
warnings.filterwarnings("ignore")

warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

In [None]:
!pip install -q pandas "numpy<2.0.0"
!pip install -q matplotlib statsmodels
!pip install -q scikit-learn pmdarima

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import StandardScaler
from pmdarima import auto_arima

In [None]:
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")

In [None]:
# Load and preprocess data
def load_and_preprocess_data(url):
    df = pd.read_csv(url)
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df = df.resample('h').mean().asfreq('h')
    df['values'] = df['values'].interpolate()

    scaler = StandardScaler()
    df['scaled_values'] = scaler.fit_transform(df[['values']])

    return df, scaler

# Forecast with ARIMA
def forecast_with_confidence(data, order, steps=48, confidence=0.95):
    model = ARIMA(data, order=order)
    fitted_model = model.fit()
    
    forecast_result = fitted_model.get_forecast(steps=steps)
    forecasts = forecast_result.predicted_mean
    conf_int = forecast_result.conf_int(alpha=1 - confidence)

    return forecasts, conf_int.iloc[:, 0], conf_int.iloc[:, 1]

# Plot function
def plot_forecast_with_ci(historical_data, test_data, forecasts, lower_ci, upper_ci, title="Forecast with Confidence Intervals"):
    plt.figure(figsize=(12, 6))
    plt.plot(historical_data.index, historical_data.values, label='Historical Data', color='blue')
    plt.plot(test_data.index, test_data, label='Actual Test Data', color='green')
    
    forecast_index = test_data.index
    plt.plot(forecast_index, forecasts, 'r-', label='Forecast')
    plt.fill_between(forecast_index, lower_ci, upper_ci, color='r', alpha=0.2, label='95% CI')

    plt.axvline(x=test_data.index[0], color='black', linestyle='--', label="Test Data Start")
    plt.title(title)
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f'{title}.png')
    plt.show()

In [None]:
# Bootstrap-based forecast confidence intervals
def bootstrap_forecast_ci(model_order, data, steps=48, n_bootstraps=100, confidence=0.95):
    forecasts = []
    
    for i in range(n_bootstraps):
        try:
            bootstrap_sample = data.sample(n=len(data), replace=True).sort_index()
            model = ARIMA(bootstrap_sample, order=model_order)
            fitted_model = model.fit()
            forecasts.append(fitted_model.forecast(steps=steps).values)
        except Exception as e:
            print(f"Bootstrap iteration {i} failed: {e}")

    if not forecasts:
        raise RuntimeError("All bootstrap iterations failed.")

    forecasts = np.array(forecasts)
    lower_ci = np.percentile(forecasts, (1 - confidence) / 2 * 100, axis=0)
    upper_ci = np.percentile(forecasts, (1 + confidence) / 2 * 100, axis=0)
    mean_forecast = np.mean(forecasts, axis=0)

    return mean_forecast, lower_ci, upper_ci

In [None]:
# Main workflow
url = "https://raw.githubusercontent.com/kylejones200/time_series/refs/heads/main/ercot_load_data.csv"
df, scaler = load_and_preprocess_data(url)

train_data = df['scaled_values'].iloc[:-48]
test_data = df['scaled_values'].iloc[-48:]

# Find best ARIMA order
auto_model = auto_arima(train_data, seasonal=False, trace=True, suppress_warnings=True, stepwise=True)
best_order = auto_model.order
print(f"Using ARIMA order: {best_order}")

# ARIMA forecast with confidence intervals
forecasts, lower_ci, upper_ci = forecast_with_confidence(train_data, best_order, steps=48)

# Bootstrapped confidence intervals
boot_forecasts, boot_lower_ci, boot_upper_ci = bootstrap_forecast_ci(best_order, train_data, steps=48, n_bootstraps=50)

def inverse_transform_and_flatten(scaler, data):
    return scaler.inverse_transform(np.array(data).reshape(-1, 1)).flatten()


forecasts, lower_ci, upper_ci = map(lambda x: inverse_transform_and_flatten(scaler, x), [forecasts, lower_ci, upper_ci])

boot_forecasts, boot_lower_ci, boot_upper_ci = map(lambda x: inverse_transform_and_flatten(scaler, x), [boot_forecasts, boot_lower_ci, boot_upper_ci])

test_data_original = inverse_transform_and_flatten(scaler, test_data)

test_data_original_series = pd.Series(test_data_original, index=test_data.index)

# Plot results
plot_forecast_with_ci(df['values'], test_data_original_series, forecasts, lower_ci, upper_ci, title="ARIMA Forecast with Confidence Intervals")
plot_forecast_with_ci(df['values'], test_data_original_series, boot_forecasts, boot_lower_ci, boot_upper_ci, title="Bootstrapped Forecast with Confidence Intervals")

##### Interpreting Confidence Bounds

Confidence intervals require careful interpretation. A 95% confidence interval doesn’t mean we’re 95% sure the true value will fall within the interval. Instead, it means that if we repeated this process many times, about 95% of our intervals would contain the true value.