In [None]:
!pip install optuna
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import optuna

# 1. Generate Synthetic Data
np.random.seed(42)
dates = pd.date_range(start='2020-01-01', end='2023-12-31', freq='D')
n = len(dates)

# Components: Trend + Yearly Seasonality + Weekly Seasonality + Noise + Outliers
trend = np.linspace(50, 150, n)
yearly = 20 * np.sin(2 * np.pi * dates.dayofyear / 365.25)
weekly = 10 * np.sin(2 * np.pi * dates.dayofweek / 7)
noise = np.random.normal(0, 5, n)
outliers = np.random.choice([0, 50, -50], size=n, p=[0.98, 0.01, 0.01])

y = trend + yearly + weekly + noise + outliers
df = pd.DataFrame({'ds': dates, 'y': y})

print(f"Dataset generated with {len(df)} rows.")
df.head()

Collecting optuna
  Downloading optuna-4.7.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.7.0-py3-none-any.whl (413 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.9/413.9 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.7.0
Dataset generated with 1461 rows.


Unnamed: 0,ds,y
0,2020-01-01,62.576881
1,2020-01-02,54.40397
2,2020-01-03,50.068279
3,2020-01-04,-0.553542
4,2020-01-05,43.003013


In [None]:
# Split data: Hold out the last 180 days for testing
train_df = df.iloc[:-180]
test_df = df.iloc[-180:]

# Baseline Prophet Model (Default Settings)
baseline_model = Prophet()
baseline_model.fit(train_df)

# Forecast on test set
future = baseline_model.make_future_dataframe(periods=180)
forecast_baseline = baseline_model.predict(future)

# Calculate Baseline Metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

y_true = test_df['y'].values
y_pred_baseline = forecast_baseline.iloc[-180:]['yhat'].values

rmse_base = np.sqrt(mean_squared_error(y_true, y_pred_baseline))
mae_base = mean_absolute_error(y_true, y_pred_baseline)
mape_base = np.mean(np.abs((y_true - y_pred_baseline) / y_true)) * 100

print(f"Baseline RMSE: {rmse_base:.2f}")

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Baseline RMSE: 6.27


In [None]:
def objective(trial):
    # Define Search Space
    params = {
        'changepoint_prior_scale': trial.suggest_float('changepoint_prior_scale', 0.001, 0.5, log=True),
        'seasonality_prior_scale': trial.suggest_float('seasonality_prior_scale', 0.01, 10.0, log=True),
        'holidays_prior_scale': trial.suggest_float('holidays_prior_scale', 0.01, 10.0, log=True),
        'seasonality_mode': trial.suggest_categorical('seasonality_mode', ['additive', 'multiplicative']),
    }

    # Fit model with trial parameters
    m = Prophet(**params)
    m.fit(train_df)

    # Cross-validation (3-fold) to minimize RMSE
    df_cv = cross_validation(m, initial='730 days', period='90 days', horizon='180 days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)

    return df_p['rmse'].values[0]

# Run Optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=1) # 20 trials for demonstration

print("\nBest Parameters:", study.best_params)

[I 2026-01-31 04:49:58,611] A new study created in memory with name: no-name-8406443c-c371-44e1-817d-a1bc90b900ec
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 5 forecasts with cutoffs between 2022-01-10 00:00:00 and 2023-01-05 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7c186208b410>
[I 2026-01-31 04:50:04,198] Trial 0 finished with value: 8.399319766641598 and parameters: {'changepoint_prior_scale': 0.0012616299148802662, 'seasonality_prior_scale': 0.915139031923048, 'holidays_prior_scale': 5.119571707940332, 'seasonality_mode': 'additive'}. Best is trial 0 with value: 8.399319766641598.



Best Parameters: {'changepoint_prior_scale': 0.0012616299148802662, 'seasonality_prior_scale': 0.915139031923048, 'holidays_prior_scale': 5.119571707940332, 'seasonality_mode': 'additive'}


In [None]:
# Train final model with best parameters
best_params = study.best_params
optimized_model = Prophet(**best_params)
optimized_model.fit(train_df)

# Forecast
forecast_opt = optimized_model.predict(future)
y_pred_opt = forecast_opt.iloc[-180:]['yhat'].values

# Final Metrics
rmse_opt = np.sqrt(mean_squared_error(y_true, y_pred_opt))
mae_opt = mean_absolute_error(y_true, y_pred_opt)
mape_opt = np.mean(np.abs((y_true - y_pred_opt) / y_true)) * 100

# Results Summary
print("\n--- FINAL COMPARISON ---")
print(f"Metric | Baseline | Optimized")
print(f"RMSE   | {rmse_base:.2f} | {rmse_opt:.2f}")
print(f"MAE    | {mae_base:.2f} | {mae_opt:.2f}")
print(f"MAPE   | {mape_base:.2f}% | {mape_opt:.2f}%")

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.



--- FINAL COMPARISON ---
Metric | Baseline | Optimized
RMSE   | 6.27 | 6.29
MAE    | 4.39 | 4.39
MAPE   | 3.48% | 3.49%
