# Prophet Modeling for Chili Price Forecasting

**Algorithm:** Prophet (Facebook's Time Series Forecasting)

**Features:**
- Separate model for each market (correct approach for Prophet)
- Prophet without holidays
- Prophet with Indonesian holidays
- Automatic seasonality detection

**Prerequisites:** Run `01_data_cleaning_and_eda.ipynb` first


In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
import joblib
import os

# Suppress Prophet output
import logging
logging.getLogger('prophet').setLevel(logging.WARNING)

# Ensure directories exist
os.makedirs('../models/prophet', exist_ok=True)
os.makedirs('../results/metrics', exist_ok=True)

print('âœ“ Libraries imported successfully')

Importing plotly failed. Interactive plots will not work.


âœ“ Libraries imported successfully


In [3]:
# Load preprocessed data
df_with_holidays = pd.read_csv('../data/processed/data_with_holidays.csv', index_col=0, parse_dates=True)

print(f"Data loaded: {df_with_holidays.shape}")
print(f"Date range: {df_with_holidays.index.min()} to {df_with_holidays.index.max()}")
df_with_holidays.head()

Data loaded: (471, 6)
Date range: 2024-01-01 00:00:00 to 2025-10-24 00:00:00


Unnamed: 0_level_0,Pasar Aksara,Pasar Brayan,Pasar Petisah,Pasar Sukaramai,Pusat Pasar,is_holiday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-01,30000.0,26500.0,30000.0,30000.0,27500.0,1
2024-01-02,35000.0,32500.0,35000.0,38000.0,30000.0,0
2024-01-03,35000.0,32500.0,35000.0,38000.0,30000.0,0
2024-01-04,30000.0,30000.0,30000.0,38000.0,30000.0,0
2024-01-05,30000.0,30000.0,30000.0,30000.0,24500.0,0


In [4]:
# Define parameters
market_columns = ['Pasar Sukaramai', 'Pasar Aksara', 'Pasar Petisah', 'Pusat Pasar', 'Pasar Brayan']
TEST_SIZE = 0.2
SPLIT_INDEX = int(len(df_with_holidays) * (1 - TEST_SIZE))

# Split data
train_data = df_with_holidays.iloc[:SPLIT_INDEX]
test_data = df_with_holidays.iloc[SPLIT_INDEX:]

print(f"Training set: {len(train_data)} samples")
print(f"Test set: {len(test_data)} samples")

Training set: 376 samples
Test set: 95 samples


In [5]:
# Helper function for MAPE calculation
def calculate_mape(actual, predicted):
    """Calculate Mean Absolute Percentage Error"""
    mask = actual != 0
    if mask.sum() == 0:
        return np.nan
    mape = np.mean(np.abs((actual[mask] - predicted[mask]) / actual[mask])) * 100
    return min(mape, 999.99)

# Define Indonesian holidays for Prophet
prophet_holidays = pd.DataFrame({
    'holiday': [
        'New Year 2024', 'New Year 2024', 'New Year 2024', 'New Year 2024', 'New Year 2024', 'New Year 2024', 'New Year 2024', 'New Year 2024',
        'New Year 2025', 'New Year 2025', 'New Year 2025', 'New Year 2025', 'New Year 2025', 'New Year 2025', 'New Year 2025', 'New Year 2025', 'New Year 2025', 'New Year 2025',
        'Imlek 2024', 'Imlek 2024', 'Imlek 2024', 'Imlek 2024', 'Imlek 2024', 'Imlek 2024', 'Imlek 2024', 'Imlek 2024',
        'Imlek 2025', 'Imlek 2025', 'Imlek 2025', 'Imlek 2025', 'Imlek 2025', 'Imlek 2025', 'Imlek 2025', 'Imlek 2025',
        'Ramadhan 2024', 'Ramadhan 2024', 'Ramadhan 2024', 'Ramadhan 2024', 'Ramadhan 2024', 'Ramadhan 2024', 'Ramadhan 2024', 'Ramadhan 2024',
        'Ramadhan 2025', 'Ramadhan 2025', 'Ramadhan 2025', 'Ramadhan 2025', 'Ramadhan 2025', 'Ramadhan 2025', 'Ramadhan 2025', 'Ramadhan 2025',
        'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024', 'Lebaran 2024',
        'Lebaran 2025', 'Lebaran 2025', 'Lebaran 2025', 'Lebaran 2025', 'Lebaran 2025', 'Lebaran 2025', 'Lebaran 2025', 'Lebaran 2025',
        'Idul Adha 2024', 'Idul Adha 2024', 'Idul Adha 2024', 'Idul Adha 2024', 'Idul Adha 2024', 'Idul Adha 2024', 'Idul Adha 2024', 'Idul Adha 2024',
        'Idul Adha 2025', 'Idul Adha 2025', 'Idul Adha 2025', 'Idul Adha 2025', 'Idul Adha 2025', 'Idul Adha 2025', 'Idul Adha 2025', 'Idul Adha 2025',
        'Christmas 2024', 'Christmas 2024', 'Christmas 2024', 'Christmas 2024', 'Christmas 2024', 'Christmas 2024', 'Christmas 2024', 'Christmas 2024', 'Christmas 2024', 'Christmas 2024'
    ],
    'ds': pd.to_datetime([
        '2023-12-25', '2023-12-26', '2023-12-27', '2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31', '2024-01-01',
        '2024-12-23', '2024-12-24', '2024-12-25', '2024-12-26', '2024-12-27', '2024-12-28', '2024-12-29', '2024-12-30', '2024-12-31', '2025-01-01',
        '2024-02-05', '2024-02-06', '2024-02-07', '2024-02-08', '2024-02-09', '2024-02-10', '2024-02-11', '2024-02-12',
        '2025-01-27', '2025-01-28', '2025-01-29', '2025-01-30', '2025-01-31', '2025-02-01', '2025-02-02', '2025-02-03',
        '2024-03-11', '2024-03-12', '2024-03-13', '2024-03-14', '2024-03-15', '2024-03-16', '2024-03-17', '2024-03-18',
        '2025-02-24', '2025-02-25', '2025-02-26', '2025-02-27', '2025-02-28', '2025-03-01', '2025-03-02', '2025-03-03',
        '2024-04-01', '2024-04-02', '2024-04-03', '2024-04-04', '2024-04-05', '2024-04-06', '2024-04-07', '2024-04-08', '2024-04-09', '2024-04-10',
        '2025-03-24', '2025-03-25', '2025-03-26', '2025-03-27', '2025-03-28', '2025-03-29', '2025-03-30', '2025-03-31',
        '2024-06-10', '2024-06-11', '2024-06-12', '2024-06-13', '2024-06-14', '2024-06-15', '2024-06-16', '2024-06-17',
        '2025-06-02', '2025-06-03', '2025-06-04', '2025-06-05', '2025-06-06', '2025-06-07', '2025-06-08', '2025-06-09',
        '2024-12-16', '2024-12-17', '2024-12-18', '2024-12-19', '2024-12-20', '2024-12-21', '2024-12-22', '2024-12-23', '2024-12-24', '2024-12-25'
    ])
})

print(f"âœ“ Defined {len(prophet_holidays)} holiday days for Prophet")
print(f"âœ“ Holiday periods: {prophet_holidays['holiday'].nunique()} unique holidays")

âœ“ Defined 94 holiday days for Prophet
âœ“ Holiday periods: 11 unique holidays


In [11]:
# Results storage
prophet_results = {}

# Train Prophet models for each market
for market in market_columns:
    print(f"\n{'='*50}")
    print(f"Training Prophet for {market}")
    print(f"{'='*50}")

    # Prepare data for Prophet with additional regressors
    # First create base features
    lag_7_series = train_data[market].shift(7).fillna(train_data[market].mean())
    lag_14_series = train_data[market].shift(14).fillna(train_data[market].mean())
    ma_7_series = train_data[market].rolling(window=7, min_periods=1).mean()
    ma_30_series = train_data[market].rolling(window=30, min_periods=1).mean()
    
    # Create DataFrame with all features at once
    prophet_train = pd.DataFrame({
        'ds': train_data.index,
        'y': train_data[market].values,
        'lag_7': lag_7_series.values,
        'lag_14': lag_14_series.values,
        'ma_7': ma_7_series.values,
        'ma_30': ma_30_series.values
    })
    
    # Ensure no NaN values exist
    prophet_train = prophet_train.fillna(prophet_train.mean(numeric_only=True))
    
    # Prepare test data with same features
    prophet_test_full = pd.concat([train_data[market], test_data[market]])
    
    test_lag_7 = prophet_test_full.shift(7).iloc[-len(test_data):].fillna(prophet_test_full.mean())
    test_lag_14 = prophet_test_full.shift(14).iloc[-len(test_data):].fillna(prophet_test_full.mean())
    test_ma_7 = prophet_test_full.rolling(window=7, min_periods=1).mean().iloc[-len(test_data):].values
    test_ma_30 = prophet_test_full.rolling(window=30, min_periods=1).mean().iloc[-len(test_data):].values

    # Model 1: Prophet Baseline (for comparison)
    print("Training Prophet Baseline...")
    prophet_baseline = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        changepoint_prior_scale=0.05
    )
    prophet_baseline.fit(prophet_train[['ds', 'y']])
    
    # Model 2: Prophet Optimized (tuned hyperparameters + regressors)
    print("Training Prophet Optimized (with regressors and tuned parameters)...")
    prophet_model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        seasonality_mode='multiplicative',  # Better for volatile data
        changepoint_prior_scale=0.15,  # Increased flexibility
        seasonality_prior_scale=15.0,  # Stronger seasonality
        n_changepoints=30,  # More changepoints for volatile commodity
        changepoint_range=0.9  # Allow changes throughout series
    )
    
    # Add regressors
    prophet_model.add_regressor('lag_7', standardize=True)
    prophet_model.add_regressor('lag_14', standardize=True)
    prophet_model.add_regressor('ma_7', standardize=True)
    prophet_model.add_regressor('ma_30', standardize=True)
    
    # Add custom monthly seasonality (important for commodity prices)
    prophet_model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
    
    prophet_model.fit(prophet_train)

    # Model 3: Prophet with holidays + optimization
    print("Training Prophet with Holidays + Optimization...")
    prophet_holiday_model = Prophet(
        holidays=prophet_holidays,
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        seasonality_mode='multiplicative',
        changepoint_prior_scale=0.15,
        seasonality_prior_scale=15.0,
        n_changepoints=30,
        changepoint_range=0.9,
        holidays_prior_scale=10.0  # Moderate holiday effect
    )
    
    # Add regressors
    prophet_holiday_model.add_regressor('lag_7', standardize=True)
    prophet_holiday_model.add_regressor('lag_14', standardize=True)
    prophet_holiday_model.add_regressor('ma_7', standardize=True)
    prophet_holiday_model.add_regressor('ma_30', standardize=True)
    
    # Add custom monthly seasonality
    prophet_holiday_model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
    
    prophet_holiday_model.fit(prophet_train)

    # Create future dataframe for predictions
    future_df_baseline = prophet_baseline.make_future_dataframe(periods=len(test_data), freq='D')
    
    future_df = pd.DataFrame({
        'ds': pd.concat([prophet_train['ds'], pd.Series(test_data.index)]).reset_index(drop=True),
        'lag_7': pd.concat([prophet_train['lag_7'], test_lag_7]).reset_index(drop=True),
        'lag_14': pd.concat([prophet_train['lag_14'], test_lag_14]).reset_index(drop=True),
        'ma_7': pd.concat([prophet_train['ma_7'], pd.Series(test_ma_7)]).reset_index(drop=True),
        'ma_30': pd.concat([prophet_train['ma_30'], pd.Series(test_ma_30)]).reset_index(drop=True)
    })

    # Predictions
    prophet_baseline_forecast = prophet_baseline.predict(future_df_baseline)
    prophet_baseline_pred = prophet_baseline_forecast['yhat'].iloc[-len(test_data):].values
    
    prophet_forecast = prophet_model.predict(future_df)
    prophet_pred = prophet_forecast['yhat'].iloc[-len(test_data):].values
    
    prophet_holiday_forecast = prophet_holiday_model.predict(future_df)
    prophet_holiday_pred = prophet_holiday_forecast['yhat'].iloc[-len(test_data):].values

    # Calculate metrics
    actual_values = test_data[market].values

    # Baseline metrics
    prophet_baseline_rmse = np.sqrt(mean_squared_error(actual_values, prophet_baseline_pred))
    prophet_baseline_mae = mean_absolute_error(actual_values, prophet_baseline_pred)
    prophet_baseline_mape = calculate_mape(actual_values, prophet_baseline_pred)
    
    # Optimized metrics
    prophet_rmse = np.sqrt(mean_squared_error(actual_values, prophet_pred))
    prophet_mae = mean_absolute_error(actual_values, prophet_pred)
    prophet_mape = calculate_mape(actual_values, prophet_pred)

    # Holiday + Optimized metrics
    prophet_h_rmse = np.sqrt(mean_squared_error(actual_values, prophet_holiday_pred))
    prophet_h_mae = mean_absolute_error(actual_values, prophet_holiday_pred)
    prophet_h_mape = calculate_mape(actual_values, prophet_holiday_pred)

    # Store results
    prophet_results[market] = {
        'prophet_baseline_pred': prophet_baseline_pred,
        'prophet_pred': prophet_pred,
        'prophet_holiday_pred': prophet_holiday_pred,
        'actual': actual_values,
        'prophet_baseline_rmse': prophet_baseline_rmse,
        'prophet_baseline_mae': prophet_baseline_mae,
        'prophet_baseline_mape': prophet_baseline_mape,
        'prophet_rmse': prophet_rmse,
        'prophet_mae': prophet_mae,
        'prophet_mape': prophet_mape,
        'prophet_h_rmse': prophet_h_rmse,
        'prophet_h_mae': prophet_h_mae,
        'prophet_h_mape': prophet_h_mape,
        'test_dates': test_data.index
    }

    print(f"Prophet Baseline: RMSE={prophet_baseline_rmse:.2f}, MAE={prophet_baseline_mae:.2f}, MAPE={prophet_baseline_mape:.2f}%")
    print(f"Prophet Optimized: RMSE={prophet_rmse:.2f}, MAE={prophet_mae:.2f}, MAPE={prophet_mape:.2f}%")
    print(f"Prophet+Holiday Optimized: RMSE={prophet_h_rmse:.2f}, MAE={prophet_h_mae:.2f}, MAPE={prophet_h_mape:.2f}%")
    
    improvement = ((prophet_baseline_mape - prophet_mape) / prophet_baseline_mape) * 100
    print(f"ðŸ“ˆ Improvement over baseline: {improvement:.1f}%")

    # Save Prophet models
    joblib.dump(prophet_baseline, f'../models/prophet/prophet_baseline_{market.replace(" ", "_")}.joblib')
    joblib.dump(prophet_model, f'../models/prophet/prophet_model_{market.replace(" ", "_")}.joblib')
    joblib.dump(prophet_holiday_model, f'../models/prophet/prophet_holiday_model_{market.replace(" ", "_")}.joblib')

print("\nAll Prophet models saved to: models/prophet/")

07:02:14 - cmdstanpy - INFO - Chain [1] start processing
07:02:14 - cmdstanpy - INFO - Chain [1] done processing
07:02:14 - cmdstanpy - INFO - Chain [1] start processing
07:02:14 - cmdstanpy - INFO - Chain [1] done processing



Training Prophet for Pasar Sukaramai
Training Prophet Baseline...
Training Prophet Optimized (with regressors and tuned parameters)...
Training Prophet with Holidays + Optimization...


07:02:14 - cmdstanpy - INFO - Chain [1] start processing
07:02:14 - cmdstanpy - INFO - Chain [1] done processing
07:02:14 - cmdstanpy - INFO - Chain [1] start processing
07:02:14 - cmdstanpy - INFO - Chain [1] done processing
07:02:14 - cmdstanpy - INFO - Chain [1] start processing
07:02:15 - cmdstanpy - INFO - Chain [1] done processing


Prophet Baseline: RMSE=51973.15, MAE=41617.71, MAPE=74.44%
Prophet Optimized: RMSE=21181.83, MAE=15748.58, MAPE=26.13%
Prophet+Holiday Optimized: RMSE=23623.45, MAE=17392.84, MAPE=28.86%
ðŸ“ˆ Improvement over baseline: 64.9%

Training Prophet for Pasar Aksara
Training Prophet Baseline...
Training Prophet Optimized (with regressors and tuned parameters)...
Training Prophet with Holidays + Optimization...


07:02:15 - cmdstanpy - INFO - Chain [1] start processing
07:02:15 - cmdstanpy - INFO - Chain [1] done processing
07:02:15 - cmdstanpy - INFO - Chain [1] start processing
07:02:15 - cmdstanpy - INFO - Chain [1] done processing
07:02:15 - cmdstanpy - INFO - Chain [1] start processing
07:02:15 - cmdstanpy - INFO - Chain [1] done processing


Prophet Baseline: RMSE=48698.06, MAE=39279.24, MAPE=73.09%
Prophet Optimized: RMSE=20926.88, MAE=15294.74, MAPE=25.69%
Prophet+Holiday Optimized: RMSE=23780.24, MAE=17225.83, MAPE=28.74%
ðŸ“ˆ Improvement over baseline: 64.9%

Training Prophet for Pasar Petisah
Training Prophet Baseline...
Training Prophet Optimized (with regressors and tuned parameters)...
Training Prophet with Holidays + Optimization...


07:02:15 - cmdstanpy - INFO - Chain [1] start processing
07:02:15 - cmdstanpy - INFO - Chain [1] done processing
07:02:16 - cmdstanpy - INFO - Chain [1] start processing
07:02:16 - cmdstanpy - INFO - Chain [1] done processing
07:02:16 - cmdstanpy - INFO - Chain [1] start processing
07:02:16 - cmdstanpy - INFO - Chain [1] done processing


Prophet Baseline: RMSE=49026.18, MAE=39205.30, MAPE=71.94%
Prophet Optimized: RMSE=20682.94, MAE=15094.38, MAPE=25.04%
Prophet+Holiday Optimized: RMSE=26327.15, MAE=19137.42, MAPE=31.26%
ðŸ“ˆ Improvement over baseline: 65.2%

Training Prophet for Pusat Pasar
Training Prophet Baseline...
Training Prophet Optimized (with regressors and tuned parameters)...
Training Prophet with Holidays + Optimization...


07:02:16 - cmdstanpy - INFO - Chain [1] start processing
07:02:16 - cmdstanpy - INFO - Chain [1] done processing
07:02:16 - cmdstanpy - INFO - Chain [1] start processing
07:02:16 - cmdstanpy - INFO - Chain [1] done processing
07:02:16 - cmdstanpy - INFO - Chain [1] start processing
07:02:16 - cmdstanpy - INFO - Chain [1] done processing


Prophet Baseline: RMSE=51243.95, MAE=40871.74, MAPE=74.28%
Prophet Optimized: RMSE=21623.13, MAE=16034.16, MAPE=26.91%
Prophet+Holiday Optimized: RMSE=23112.67, MAE=16939.11, MAPE=28.00%
ðŸ“ˆ Improvement over baseline: 63.8%

Training Prophet for Pasar Brayan
Training Prophet Baseline...
Training Prophet Optimized (with regressors and tuned parameters)...
Training Prophet with Holidays + Optimization...


07:02:16 - cmdstanpy - INFO - Chain [1] start processing
07:02:16 - cmdstanpy - INFO - Chain [1] done processing


Prophet Baseline: RMSE=54510.22, MAE=43852.75, MAPE=75.75%
Prophet Optimized: RMSE=23856.14, MAE=17854.96, MAPE=28.51%
Prophet+Holiday Optimized: RMSE=32122.19, MAE=24060.18, MAPE=38.05%
ðŸ“ˆ Improvement over baseline: 62.4%

All Prophet models saved to: models/prophet/


In [12]:
# Save Prophet results for inference notebook
prophet_summary = {
    'algorithm': 'Prophet',
    'avg_baseline_rmse': np.mean([prophet_results[m]['prophet_baseline_rmse'] for m in market_columns]),
    'avg_baseline_mape': np.mean([prophet_results[m]['prophet_baseline_mape'] for m in market_columns]),
    'avg_rmse': np.mean([prophet_results[m]['prophet_rmse'] for m in market_columns]),
    'avg_rmse_with_holiday': np.mean([prophet_results[m]['prophet_h_rmse'] for m in market_columns]),
    'avg_mape': np.mean([prophet_results[m]['prophet_mape'] for m in market_columns]),
    'avg_mape_with_holiday': np.mean([prophet_results[m]['prophet_h_mape'] for m in market_columns]),
    'markets': market_columns,
    'results': prophet_results
}

joblib.dump(prophet_summary, '../results/metrics/prophet_summary.pkl')
joblib.dump(prophet_results, '../results/metrics/prophet_detailed_results.pkl')

print('\n' + '='*60)
print('PROPHET PERFORMANCE SUMMARY')
print('='*60)
print(f'âœ“ Prophet Baseline Average MAPE: {prophet_summary["avg_baseline_mape"]:.2f}%')
print(f'âœ“ Prophet Optimized Average MAPE: {prophet_summary["avg_mape"]:.2f}%')
print(f'âœ“ Prophet+Holiday Optimized Average MAPE: {prophet_summary["avg_mape_with_holiday"]:.2f}%')
improvement = ((prophet_summary["avg_baseline_mape"] - prophet_summary["avg_mape"]) / prophet_summary["avg_baseline_mape"]) * 100
print(f'\nðŸ“ˆ Overall Improvement: {improvement:.1f}%')
print(f'\nâœ“ Results saved to results/metrics/')
print('='*60)


PROPHET PERFORMANCE SUMMARY
âœ“ Prophet Baseline Average MAPE: 73.90%
âœ“ Prophet Optimized Average MAPE: 26.46%
âœ“ Prophet+Holiday Optimized Average MAPE: 30.98%

ðŸ“ˆ Overall Improvement: 64.2%

âœ“ Results saved to results/metrics/
