In [1]:
import mlflow
import mlflow.prophet
import pandas as pd
import numpy as np
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from dotenv import load_dotenv
import datetime
# Load environment variables from .env file
load_dotenv()


experiment_name = "/Users/j.huertas@closerstillmedia.com/prophet"
try:
    mlflow.create_experiment(experiment_name)
except:
    print("experiment exists")
mlflow.set_experiment(experiment_name)

experiment exists


<Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/3039328626124250', creation_time=1755624146266, experiment_id='3039328626124250', last_update_time=1755874967812, lifecycle_stage='active', name='/Users/j.huertas@closerstillmedia.com/prophet', tags={'mlflow.databricks.filesystem.experiment_permissions_check': 'test',
 'mlflow.experiment.sourceName': '/Users/j.huertas@closerstillmedia.com/prophet',
 'mlflow.experimentKind': 'custom_model_development',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'b.relf@closerstillmedia.com',
 'mlflow.ownerId': '7931383772120950'}>

# Forecast Accuracy Monitoring

In [2]:
actuals_df = pd.read_csv("wp_log_peyton_manning.csv")

In [3]:
actuals_df["ds"].max()

'2016-01-20'

In [4]:
actuals_df["ds"]=pd.to_datetime(actuals_df["ds"])

In [5]:
actuals_df.columns

Index(['ds', 'y'], dtype='object')

In [6]:
import matplotlib.pyplot as plt
import seaborn as sns

run_id = "1655d93c38a4421cb6aa50a49c28e127"
model_uri = f"runs:/{run_id}/best_model"

def monitor_forecast_accuracy(model_uri, actuals_df, prediction_horizon_days=30):
    """Monitor Prophet model accuracy against actual values."""

    with mlflow.start_run(run_name="Forecast Accuracy Monitoring"):
        # Load model
        model = mlflow.prophet.load_model(model_uri)

        # Generate historical predictions for comparison
        cutoff_date = actuals_df["ds"].max() - pd.Timedelta(
            days=prediction_horizon_days
        )
        historical_data = actuals_df[actuals_df["ds"] <= cutoff_date]

        # Refit model on historical data
        temp_model = Prophet()
        temp_model.fit(historical_data)

        # Generate predictions for the monitoring period
        future = temp_model.make_future_dataframe(periods=prediction_horizon_days)
        if temp_model.growth == "logistic":
            future["cap"] = (
                historical_data["cap"].iloc[-1]
                if "cap" in historical_data.columns
                else 10000
            )

        forecast = temp_model.predict(future)

        # Get actual values for the prediction period
        actual_values = actuals_df[actuals_df["ds"] > cutoff_date]
        forecast_values = forecast[forecast["ds"] > cutoff_date]

        # Align dates
        merged = actual_values.merge(
            forecast_values[["ds", "yhat", "yhat_lower", "yhat_upper"]], on="ds"
        )

        if len(merged) > 0:
            # Calculate accuracy metrics
            mae = np.mean(np.abs(merged["y"] - merged["yhat"]))
            mape = np.mean(np.abs((merged["y"] - merged["yhat"]) / merged["y"])) * 100
            rmse = np.sqrt(np.mean((merged["y"] - merged["yhat"]) ** 2))

            # Coverage (percentage of actuals within prediction intervals)
            coverage = (
                np.mean(
                    (merged["y"] >= merged["yhat_lower"])
                    & (merged["y"] <= merged["yhat_upper"])
                )
                * 100
            )

            # Log metrics
            accuracy_metrics = {
                "monitoring_mae": mae,
                "monitoring_mape": mape,
                "monitoring_rmse": rmse,
                "prediction_coverage": coverage,
            }
            mlflow.log_metrics(accuracy_metrics)

            # Create accuracy visualization
            plt.figure(figsize=(12, 6))
            plt.plot(merged["ds"], merged["y"], label="Actual", marker="o")
            plt.plot(merged["ds"], merged["yhat"], label="Predicted", marker="s")
            plt.fill_between(
                merged["ds"],
                merged["yhat_lower"],
                merged["yhat_upper"],
                alpha=0.3,
                label="Prediction Interval",
            )
            plt.title(f"Forecast Accuracy Monitoring (MAPE: {mape:.2f}%)")
            plt.xlabel("Date")
            plt.ylabel("Value")
            plt.legend()
            plt.xticks(rotation=45)
            plt.tight_layout()
            plt.savefig("accuracy_monitoring.png", dpi=300, bbox_inches="tight")
            mlflow.log_artifact("accuracy_monitoring.png")
            plt.close()

            return accuracy_metrics
        else:
            print("No overlapping dates found for accuracy assessment")
            return None


# Usage
accuracy_metrics = monitor_forecast_accuracy(model_uri, actuals_df, prediction_horizon_days=30)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

16:05:16 - cmdstanpy - INFO - Chain [1] start processing
16:05:17 - cmdstanpy - INFO - Chain [1] done processing


🏃 View run Forecast Accuracy Monitoring at: https://adb-3191716161457605.5.azuredatabricks.net/ml/experiments/3039328626124250/runs/779ed5196b034107b11c9feb71c2e020
🧪 View experiment at: https://adb-3191716161457605.5.azuredatabricks.net/ml/experiments/3039328626124250


# Automated Model Retraining


In [7]:
def automated_prophet_retraining(
    model_uri, new_data, performance_threshold_mape=10.0, min_data_points=100
):
    """Automated Prophet model retraining pipeline."""

    with mlflow.start_run(run_name="Automated Prophet Retraining"):
        # Load current production model
        current_model_uri = model_uri

        try:
            current_model = mlflow.prophet.load_model(current_model_uri)
            mlflow.log_param("current_model_loaded", True)
        except Exception as e:
            print(f"Could not load current model: {e}")
            current_model = None
            mlflow.log_param("current_model_loaded", False)

        # Data quality checks
        data_quality_passed = True
        quality_issues = []

        # Check data quantity
        if len(new_data) < min_data_points:
            data_quality_passed = False
            quality_issues.append(
                f"Insufficient data: {len(new_data)} < {min_data_points}"
            )

        # Check for missing values
        missing_values = new_data[["ds", "y"]].isnull().sum().sum()
        if missing_values > 0:
            quality_issues.append(f"Missing values found: {missing_values}")

        # Check date continuity
        new_data = new_data.sort_values("ds")
        date_gaps = pd.to_datetime(new_data["ds"]).diff().dt.days
        large_gaps = (date_gaps > 7).sum()  # Gaps larger than 7 days
        if large_gaps > 0:
            quality_issues.append(f"Large date gaps found: {large_gaps}")

        mlflow.log_params(
            {
                "data_quality_passed": data_quality_passed,
                "data_points": len(new_data),
                "quality_issues": "; ".join(quality_issues),
            }
        )

        if not data_quality_passed:
            print("Data quality checks failed. Skipping retraining.")
            return None

        # Train new model
        new_model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False,
            changepoint_prior_scale=0.05,
        )

        new_model.fit(new_data)

        # Evaluate new model performance
        cv_results = cross_validation(
            new_model,
            initial="365 days",
            period="90 days",
            horizon="30 days",
            parallel="threads",
        )

        metrics = performance_metrics(cv_results)
        new_mape = metrics["mape"].mean()

        mlflow.log_metric("new_model_mape", new_mape)

        # Compare with current model if available
        should_deploy = True
        if current_model is not None:
            try:
                # Test current model on new data
                current_cv = cross_validation(
                    current_model,
                    initial="365 days",
                    period="90 days",
                    horizon="30 days",
                )
                current_metrics = performance_metrics(current_cv)
                current_mape = current_metrics["mape"].mean()

                mlflow.log_metric("current_model_mape", current_mape)

                # Deploy if new model is significantly better
                improvement = (current_mape - new_mape) / current_mape * 100
                mlflow.log_metric("performance_improvement_percent", improvement)

                should_deploy = improvement > 5.0  # Deploy if >5% improvement

            except Exception as e:
                print(f"Could not evaluate current model: {e}")
                should_deploy = new_mape < performance_threshold_mape
        else:
            should_deploy = new_mape < performance_threshold_mape

        mlflow.log_params(
            {
                "should_deploy": should_deploy,
                "performance_threshold": performance_threshold_mape,
            }
        )

        # Log and potentially deploy new model
        model_info = mlflow.prophet.log_model(
            pr_model=new_model,
            name="retrained_model",
            registered_model_name=current_model_name if should_deploy else None,
        )

        if should_deploy:
            # Transition to production
            client = mlflow.MlflowClient()
            latest_version = client.get_latest_versions(
                current_model_name, stages=["None"]
            )[0]

            client.transition_model_version_stage(
                name=current_model_name,
                version=latest_version.version,
                stage="Production",
            )

            print(f"New model deployed to production with MAPE: {new_mape:.2f}%")
        else:
            print(
                f"New model not deployed. MAPE: {new_mape:.2f}% did not meet criteria."
            )

        return new_model, should_deploy

In [8]:
run_id = "1655d93c38a4421cb6aa50a49c28e127"
model_uri = f"runs:/{run_id}/best_model"

In [9]:
def prepare_prophet_data(data, date_col, value_col, freq="D"):
    """
    Prepare data for Prophet training.

    Args:
        data: DataFrame with time series data
        date_col: Name of date column
        value_col: Name of value column
        freq: Frequency of the time series
    """

    # Prophet requires columns named 'ds' (datestamp) and 'y' (value)
    prophet_df = data[[date_col, value_col]].copy()
    prophet_df.columns = ["ds", "y"]

    # Ensure ds is datetime
    prophet_df["ds"] = pd.to_datetime(prophet_df["ds"])

    # Sort by date
    prophet_df = prophet_df.sort_values("ds").reset_index(drop=True)

    # Handle missing dates if needed
    if freq:
        full_date_range = pd.date_range(
            start=prophet_df["ds"].min(), end=prophet_df["ds"].max(), freq=freq
        )

        # Reindex to fill missing dates
        prophet_df = prophet_df.set_index("ds").reindex(full_date_range).reset_index()
        prophet_df.columns = ["ds", "y"]

        # Log data quality metrics
        missing_dates = prophet_df["y"].isna().sum()
        print(f"Missing dates filled: {missing_dates}")

    return prophet_df

In [11]:
raw_data =  pd.read_csv("sales_data_2023_2025_v3.csv")
new_data = prepare_prophet_data(raw_data, 'date', 'sales', freq='D')
new_data.head()

Missing dates filled: 0


Unnamed: 0,ds,y
0,2021-01-01,941
1,2021-01-02,1157
2,2021-01-03,1149
3,2021-01-04,944
4,2021-01-05,1071


In [12]:
new_model, should_deploy = automated_prophet_retraining( model_uri, new_data)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

16:05:43 - cmdstanpy - INFO - Chain [1] start processing
16:05:43 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.
16:05:44 - cmdstanpy - INFO - Chain [1] start processing
16:05:44 - cmdstanpy - INFO - Chain [1] done processing
16:05:46 - cmdstanpy - INFO - Chain [1] start processing
16:05:46 - cmdstanpy - INFO - Chain [1] start processing
16:05:46 - cmdstanpy - INFO - Chain [1] done processing
16:05:47 - cmdstanpy - INFO - Chain [1] start processing
16:05:47 - cmdstanpy - INFO - Chain [1] done processing
16:05:47 - cmdstanpy - INFO - Chain [1] start processing
16:05:47 - cmdstanpy - INFO - Chain [1] start processing
16:05:47 - cmdstanpy - INFO - Chain [1] start processing
16:05:47 - cmdstanpy - INFO - Chain [1] start processing
16:05:47 - cmdstanpy - INFO - Chain [1] start processing
16:05:47 - cmdstanpy - INFO - Chain [1] start processing
16:05:47 - cmdstanpy - INFO - Chain [1] start

  0%|          | 0/16 [00:00<?, ?it/s]

16:05:49 - cmdstanpy - INFO - Chain [1] start processing
16:05:49 - cmdstanpy - INFO - Chain [1] done processing
16:05:50 - cmdstanpy - INFO - Chain [1] start processing
16:05:50 - cmdstanpy - INFO - Chain [1] done processing
16:05:50 - cmdstanpy - INFO - Chain [1] start processing
16:05:50 - cmdstanpy - INFO - Chain [1] done processing
16:05:51 - cmdstanpy - INFO - Chain [1] start processing
16:05:51 - cmdstanpy - INFO - Chain [1] done processing
16:05:51 - cmdstanpy - INFO - Chain [1] start processing
16:05:52 - cmdstanpy - INFO - Chain [1] done processing
16:05:52 - cmdstanpy - INFO - Chain [1] start processing
16:05:52 - cmdstanpy - INFO - Chain [1] done processing
16:05:53 - cmdstanpy - INFO - Chain [1] start processing
16:05:53 - cmdstanpy - INFO - Chain [1] done processing
16:05:53 - cmdstanpy - INFO - Chain [1] start processing
16:05:53 - cmdstanpy - INFO - Chain [1] done processing
16:05:54 - cmdstanpy - INFO - Chain [1] start processing
16:05:54 - cmdstanpy - INFO - Chain [1]

New model not deployed. MAPE: 0.04% did not meet criteria.
🏃 View run Automated Prophet Retraining at: https://adb-3191716161457605.5.azuredatabricks.net/ml/experiments/3039328626124250/runs/a8fa80d3063945ccb6550b1d31fe9473
🧪 View experiment at: https://adb-3191716161457605.5.azuredatabricks.net/ml/experiments/3039328626124250


In [13]:
should_deploy

np.False_