# DeepAR Probabilistic Forecasting

Demonstrates probabilistic forecasting with uncertainty quantification using DeepAR.

## Key Features
- Probabilistic forecasts (P10, P50, P90)
- Uncertainty intervals
- Multi-phase support
- Multi-well training
- Ensemble forecasting


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from decline_curve.forecast_deepar import DeepARForecaster
from decline_curve.utils.real_data_loader import load_north_dakota_production
from decline_curve import dca
from decline_curve.ensemble import EnsembleWeights

# Configure logging
import logging
from decline_curve.logging_config import configure_logging, get_logger

configure_logging(level=logging.INFO)
logger = get_logger(__name__)

plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline



## 1. Load Real Production Data


In [None]:
# Load North Dakota production data
data_path = '/Users/kylejonespatricia/Library/CloudStorage/GoogleDrive-kyletjones@gmail.com/My Drive/applandia/bana_4373/data/north_dakota_production.csv'

production_df, static_df = load_north_dakota_production(
    data_path=data_path,
    max_wells=50,  # Use subset for faster training
    min_months=24,  # Require at least 24 months of history
    phases=['oil', 'gas', 'water']
)

logger.info(f"Loaded {production_df['well_id'].nunique()} wells")
logger.info(f"Total records: {len(production_df)}")
logger.info(f"Date range: {production_df['date'].min()} to {production_df['date'].max()}")


## 2. Train DeepAR Model


In [None]:
# Create DeepAR forecaster
deepar = DeepARForecaster(
    phases=['oil'],  # Focus on oil production
    horizon=12,
    sequence_length=24,
    hidden_size=64,
    num_layers=2,
    distribution='normal',
    normalization_method='minmax'
)

# Train model
logger.info("Training DeepAR model...")
history = deepar.fit(
    production_data=production_df,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=True
)

logger.info(f"✓ Model trained. Final loss: {history['loss'][-1]:.4f}")


## 3. Generate Probabilistic Forecast


In [None]:
# Select a test well
test_well = production_df['well_id'].iloc[0]
logger.info(f"Generating probabilistic forecast for well: {test_well}")

# Generate quantile forecasts (P10, P50, P90)
forecasts = deepar.predict_quantiles(
    well_id=test_well,
    production_data=production_df,
    quantiles=[0.1, 0.5, 0.9],  # P10, P50, P90
    horizon=12,
    n_samples=1000
)

# Extract quantiles
p10 = forecasts['oil']['q10']
p50 = forecasts['oil']['q50']
p90 = forecasts['oil']['q90']

logger.info(f"P50 (median) forecast: {p50.mean():.0f} bbl/month")
logger.info(f"Uncertainty range: {p10.mean():.0f} to {p90.mean():.0f} bbl/month")


## 4. Visualize Probabilistic Forecast


In [None]:
# Get historical data for the well
well_data = production_df[production_df['well_id'] == test_well].sort_values('date')
historical = well_data.set_index('date')['oil']

# Plot
fig, ax = plt.subplots(figsize=(12, 6))

# Historical data
ax.plot(historical.index, historical.values, 'o-', label='Historical', linewidth=2, markersize=4)

# Forecast with uncertainty bands
ax.plot(p50.index, p50.values, 'r-', label='P50 (Median)', linewidth=2)
ax.fill_between(p50.index, p10.values, p90.values, alpha=0.3, color='red', label='P10-P90 Range')

ax.axvline(x=historical.index[-1], color='gray', linestyle='--', linewidth=1, label='Forecast Start')
ax.set_xlabel('Date')
ax.set_ylabel('Oil Production (bbl/month)')
ax.set_title(f'DeepAR Probabilistic Forecast - Well {test_well}')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

logger.info("✓ Probabilistic forecast with uncertainty bands displayed")


## 5. Ensemble Forecasting


In [None]:
# Create ensemble combining Arps, ARIMA, and DeepAR
oil_series = historical

# Generate individual forecasts
arps_forecast = dca.forecast(oil_series, model='arps', horizon=12)
arima_forecast = dca.forecast(oil_series, model='arima', horizon=12)

# DeepAR forecast via main API
deepar_forecast = dca.forecast(
    oil_series,
    model='deepar',
    deepar_model=deepar,
    production_data=production_df,
    well_id=test_well,
    quantiles=[0.5],
    horizon=12
)

# Ensemble forecast
weights = EnsembleWeights(arps=0.4, lstm=0.0, deepar=0.6)
ensemble_forecast = dca.forecast(
    oil_series,
    model='ensemble',
    ensemble_models=['arps', 'arima', 'deepar'],
    ensemble_weights=weights,
    ensemble_method='weighted',
    deepar_model=deepar,
    production_data=production_df,
    well_id=test_well,
    horizon=12
)

logger.info(f"Arps forecast: {arps_forecast.iloc[-12:].mean():.0f} bbl/month")
logger.info(f"ARIMA forecast: {arima_forecast.iloc[-12:].mean():.0f} bbl/month")
logger.info(f"DeepAR P50: {deepar_forecast.iloc[-12:].mean():.0f} bbl/month")
logger.info(f"Ensemble forecast: {ensemble_forecast.mean():.0f} bbl/month")


## Summary

This notebook demonstrated:
1. ✓ Loading real production data
2. ✓ Training DeepAR probabilistic model
3. ✓ Generating P10/P50/P90 forecasts
4. ✓ Visualizing uncertainty bands
5. ✓ Ensemble forecasting combining multiple models

### Key Takeaways
- **Probabilistic forecasting** provides uncertainty quantification (P10/P50/P90)
- **DeepAR** combines LSTM with probabilistic outputs for better forecasts
- **Ensemble methods** can improve accuracy by combining multiple models
- **Uncertainty bands** help assess forecast reliability
