In [1]:
import mlflow
import mlflow.prophet
import pandas as pd
import numpy as np
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from dotenv import load_dotenv
import datetime
# Load environment variables from .env file
load_dotenv()

True

In [2]:
# Load sample time series data (Prophet expects 'ds' and 'y' columns)
# This example uses the classic Peyton Manning Wikipedia page views dataset
url = "https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv"
df = pd.read_csv(url)

print(f"Data shape: {df.shape}")
print(f"Date range: {df['ds'].min()} to {df['ds'].max()}")
print(f"Data preview:\n{df.head()}")


Data shape: (2905, 2)
Date range: 2007-12-10 to 2016-01-20
Data preview:
           ds         y
0  2007-12-10  9.590761
1  2007-12-11  8.519590
2  2007-12-12  8.183677
3  2007-12-13  8.072467
4  2007-12-14  7.893572


In [7]:
df.to_csv("wp_log_peyton_manning.csv", index =False)

In [5]:
experiment_name = "/Users/j.huertas@closerstillmedia.com/prophet"
try:
    mlflow.create_experiment(experiment_name)
except:
    print("experiment exists")
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/3039328626124250', creation_time=1755624146266, experiment_id='3039328626124250', last_update_time=1755624146266, lifecycle_stage='active', name='/Users/j.huertas@closerstillmedia.com/prophet', tags={'mlflow.experiment.sourceName': '/Users/j.huertas@closerstillmedia.com/prophet',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'b.relf@closerstillmedia.com',
 'mlflow.ownerId': '7931383772120950'}>

In [6]:
with mlflow.start_run(run_name="Basic Prophet Forecast"):
    # Create Prophet model with specific parameters
    model = Prophet(
        changepoint_prior_scale=0.05,  # Flexibility of trend changes
        seasonality_prior_scale=10,  # Strength of seasonality
        holidays_prior_scale=10,  # Strength of holiday effects
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
    )

    # Fit the model
    model.fit(df)

    # Extract and log model parameters
    def extract_prophet_params(prophet_model):
        """Extract Prophet model parameters for logging."""
        from prophet.serialize import SIMPLE_ATTRIBUTES

        params = {}
        for attr in SIMPLE_ATTRIBUTES:
            if hasattr(prophet_model, attr):
                value = getattr(prophet_model, attr)
                if isinstance(value, (int, float, str, bool)):
                    params[attr] = value
        return params

    params = extract_prophet_params(model)
    mlflow.log_params(params)

    # Create future dataframe for forecasting
    future = model.make_future_dataframe(periods=365)  # Forecast 1 year ahead
    forecast = model.predict(future)

    # Cross-validation for model evaluation
    cv_results = cross_validation(
        model,
        initial="730 days",  # Initial training period
        period="180 days",  # Spacing between cutoff dates
        horizon="365 days",  # Forecast horizon
        parallel="threads",  # Use threading for speed
    )

    # Calculate performance metrics
    metrics = performance_metrics(cv_results)
    avg_metrics = metrics[["mse", "rmse", "mae", "mape"]].mean().to_dict()
    mlflow.log_metrics(avg_metrics)

    # Log the model with input example
    mlflow.prophet.log_model(
        pr_model=model, name="prophet_model", input_example=df[["ds"]].head(10)
    )

    print(f"Model trained and logged successfully!")
    print(f"Average MAPE: {avg_metrics['mape']:.2f}%")

18:22:46 - cmdstanpy - INFO - Chain [1] start processing
18:22:48 - cmdstanpy - INFO - Chain [1] done processing
18:22:51 - cmdstanpy - INFO - Chain [1] start processing
18:22:51 - cmdstanpy - INFO - Chain [1] start processing
18:22:51 - cmdstanpy - INFO - Chain [1] done processing
18:22:51 - cmdstanpy - INFO - Chain [1] done processing
18:22:52 - cmdstanpy - INFO - Chain [1] start processing
18:22:52 - cmdstanpy - INFO - Chain [1] start processing
18:22:52 - cmdstanpy - INFO - Chain [1] start processing
18:22:52 - cmdstanpy - INFO - Chain [1] done processing
18:22:52 - cmdstanpy - INFO - Chain [1] done processing
18:22:52 - cmdstanpy - INFO - Chain [1] done processing
18:22:52 - cmdstanpy - INFO - Chain [1] start processing
18:22:52 - cmdstanpy - INFO - Chain [1] start processing
18:22:53 - cmdstanpy - INFO - Chain [1] start processing
18:22:53 - cmdstanpy - INFO - Chain [1] start processing
18:22:53 - cmdstanpy - INFO - Chain [1] start processing
18:22:53 - cmdstanpy - INFO - Chain [

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

2025/08/19 18:23:04 INFO mlflow.models.model: Found the following environment variables used during model inference: [AZURE_API_KEY, AZURE_OPENAI_API_KEY, DATABRICKS_HOST, ... ]. Please check if you need to set them when deploying the model. To disable this message, set environment variable `MLFLOW_RECORD_ENV_VARS_IN_MODEL_LOGGING` to `false`.


Model trained and logged successfully!
Average MAPE: 0.07%
🏃 View run Basic Prophet Forecast at: https://adb-3191716161457605.5.azuredatabricks.net/ml/experiments/3039328626124250/runs/82f51f69037a42a58daf8e889f1b256e
🧪 View experiment at: https://adb-3191716161457605.5.azuredatabricks.net/ml/experiments/3039328626124250


In [9]:
forecast.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2007-12-10,8.039995,8.240314,9.468886,8.039995,8.039995,0.803072,0.803072,0.803072,0.352285,0.352285,0.352285,0.450787,0.450787,0.450787,0.0,0.0,0.0,8.843067
1,2007-12-11,8.038455,7.960559,9.20231,8.038455,8.038455,0.553164,0.553164,0.553164,0.11964,0.11964,0.11964,0.433524,0.433524,0.433524,0.0,0.0,0.0,8.591619
2,2007-12-12,8.036915,7.79729,9.002588,8.036915,8.036915,0.350535,0.350535,0.350535,-0.066661,-0.066661,-0.066661,0.417196,0.417196,0.417196,0.0,0.0,0.0,8.387451
3,2007-12-13,8.035375,7.792014,9.022092,8.035375,8.035375,0.32998,0.32998,0.32998,-0.072268,-0.072268,-0.072268,0.402248,0.402248,0.402248,0.0,0.0,0.0,8.365355
4,2007-12-14,8.033835,7.742005,9.004836,8.033835,8.033835,0.319506,0.319506,0.319506,-0.069578,-0.069578,-0.069578,0.389085,0.389085,0.389085,0.0,0.0,0.0,8.353342


In [10]:
forecast.to_csv("simple_prediction.csv", index=False)