___
<a name="setup"></a>
## (1) Initial Setup

# Demand Forcasting Retail

## Part 3 - Prophet AutoML with MLflow


### Install pre-requisite dependencies

### Import dependencies and datasets

In [1]:
import pandas as pd
import mlflow
import pickle
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [2]:
# Load Data
train_df = pd.read_csv('data/processed/train.csv')
test_df = pd.read_csv('data/processed/test.csv')

In [3]:
# Prepare data for Prophet
train_df.rename(columns={'Year': 'ds', 'Weekly_Sales': 'y'}, inplace=True)
test_df.rename(columns={'Year': 'ds'}, inplace=True)

In [4]:
# Ensure datetime format
train_df['ds'] = pd.to_datetime(train_df['ds'])
test_df['ds'] = pd.to_datetime(test_df['ds'])

In [5]:
# Set up MLflow experiment
experiment_name = "prophet-automl-forecasting"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='file:///C:/Ml_Project/Demand-Forecasting-Retail/notebooks/mlruns/2', experiment_id='2', lifecycle_stage='active', name='prophet-automl-forecasting', tags={}>

In [6]:
with mlflow.start_run():
    # Initialize Prophet model
    model = Prophet()
    model.fit(train_df)
    
    # Make future predictions
    future = test_df[['ds']].copy()
    forecast = model.predict(future)
    
    # Extract relevant forecast values
    predictions = forecast[['ds', 'yhat']].rename(columns={'yhat': 'forecast'})
    
    # Evaluate model
    y_true = test_df['Weekly_Sales'].values
    y_pred = predictions['forecast'].values
    
    mae = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    
    # Log metrics
    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("RMSE", rmse)
    
    # Save model manually (avoiding mlflow.prophet issues)
    model_path = "data/processed/prophet_model.pkl"
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    mlflow.log_artifact(model_path)
    
    print(f"MAE: {mae}, RMSE: {rmse}")
    
    # Save predictions
    predictions.to_csv("data/processed/forecast_results.csv", index=False)
    print("Forecast saved!")


22:47:55 - cmdstanpy - INFO - Chain [1] start processing
22:48:03 - cmdstanpy - INFO - Chain [1] done processing


MAE: 0.0230521324529331, RMSE: 0.034546070030004185
Forecast saved!


In [7]:
# Load best model for inference
with open(model_path, "rb") as f:
    best_model = pickle.load(f)

# Make final forecast using loaded model
test_forecast = best_model.predict(test_df[['ds']])
print(test_forecast[['ds', 'yhat']].tail())

                                 ds      yhat
84309 1970-01-01 00:00:00.000000001  0.030038
84310 1970-01-01 00:00:00.000000001  0.030038
84311 1970-01-01 00:00:00.000000001  0.030038
84312 1970-01-01 00:00:00.000000001  0.030038
84313 1970-01-01 00:00:00.000000001  0.030038


### Setup MLflow

- First open Powershell terminal and change path to the directory hosting this notebook (e.g. `newth@latest MINGW64 /c/Ml_Project/Demand-Forecasting-Retail'
- Enter `mlflow ui` to initiate MLFlow server
- Once done, access the MLFlow UI served on http://127.0.0.1:5000

![image.png](C://Ml_Project//Demand-Forecasting-Retail//image.png)

In [8]:
import mlflow
from mlflow.tracking import MlflowClient

# Initialize MLflow client
client = MlflowClient()

# Set up MLflow experiment
experiment_name = "prophet-automl-forecasting"
mlflow.set_experiment(experiment_name)

# Get experiment details
experiment = client.get_experiment_by_name(experiment_name)

# Print experiment details
print(f"Name: {experiment.name}")
print(f"Experiment ID: {experiment.experiment_id}")
print(f"Artifact Location: {experiment.artifact_location}")
print(f"Tags: {experiment.tags}")
print(f"Lifecycle Stage: {experiment.lifecycle_stage}")
print(f"Tracking URI: {mlflow.get_tracking_uri()}")


Name: prophet-automl-forecasting
Experiment ID: 2
Artifact Location: file:///C:/Ml_Project/Demand-Forecasting-Retail/notebooks/mlruns/2
Tags: {}
Lifecycle Stage: active
Tracking URI: file:///C:/Ml_Project/Demand-Forecasting-Retail/notebooks/mlruns


___
<a name="automl"></a>
## (2) Prophet AutoML MLFlow Tracking

In [9]:
import mlflow
from mlflow.tracking import MlflowClient

# Initialize MLflow client
client = MlflowClient()

# Get all runs in the experiment
experiment = client.get_experiment_by_name("prophet-automl-forecasting")
runs = client.search_runs(experiment.experiment_id)

# Print details of each run
for run in runs:
    print(f"Run ID: {run.info.run_id}")
    print(f"Status: {run.info.status}")
    print(f"Start Time: {run.info.start_time}")
    print(f"Metrics: {run.data.metrics}")
    print(f"Params: {run.data.params}")
    print("-" * 50)


Run ID: 9993ff5ecf20489f835b198fea6a29c7
Status: FINISHED
Start Time: 1739639861713
Metrics: {'MAE': 0.0230521324529331, 'RMSE': 0.034546070030004185}
Params: {}
--------------------------------------------------
Run ID: 8d83ecd49e424f60a6dfed9887aa4882
Status: FINISHED
Start Time: 1739639611410
Metrics: {'accuracy': 0.92}
Params: {'param1': '5'}
--------------------------------------------------
Run ID: bbe200e629934f1a8036f6c2711a7412
Status: FINISHED
Start Time: 1739639603526
Metrics: {'accuracy': 0.92}
Params: {'param1': '5'}
--------------------------------------------------
Run ID: c8f3ef1108a4434b92112900eb653f72
Status: FINISHED
Start Time: 1739639019430
Metrics: {}
Params: {}
--------------------------------------------------
Run ID: f6034adbe8b745739954d6b014f3172d
Status: FINISHED
Start Time: 1739638876016
Metrics: {'accuracy': 0.92}
Params: {'param1': '5'}
--------------------------------------------------
Run ID: 66bbd674508a423a9b578465b73b53dc
Status: FINISHED
Start Time

# MLflow Tracking UI
mlflow ui --backend-store-uri file:///C:/Ml_Project/Demand-Forecasting-Retail/notebooks/mlruns


mlflow server \
    --backend-store-uri sqlite:///mlflow.db \
    --default-artifact-root file:///C:/Ml_Project/Demand-Forecasting-Retail/notebooks/mlruns \
    --host 127.0.0.1 --port 5000


In [None]:
# Run this on git bush for tracking
# mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root file:///C:/Ml_Project/Demand-Forecasting-Retail/notebooks/mlruns --host 0.0.0.0 --port 5000

In [15]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [16]:
import mlflow

# Check if a run is already active and end it
if mlflow.active_run():
    mlflow.end_run()

# Now start a new run
with mlflow.start_run():
    mlflow.log_param("param1", 5)
    mlflow.log_metric("accuracy", 0.92)


In [17]:
# Log Experiments 
import mlflow

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("prophet-automl-forecasting")

with mlflow.start_run():
    mlflow.log_param("param1", 5)
    mlflow.log_metric("accuracy", 0.92)


### View best model

In [22]:
import mlflow
import mlflow.pyfunc
from mlflow.tracking import MlflowClient

client = MlflowClient()
run_id = "bbe200e629934f1a8036f6c2711a7412"  # Replace with actual best run ID

# Register the model in MLflow Model Registry
model_uri = f"runs:/{run_id}/model"
model_name = "ProphetForecastModel"

mlflow.register_model(model_uri, model_name)


RestException: INVALID_PARAMETER_VALUE:  Model registry functionality is unavailable; got unsupported URI 'file:///C:/Ml_Project/Demand-Forecasting-Retail/notebooks/mlruns' for model registry data storage. Supported URI schemes are: ['postgresql', 'mysql', 'sqlite', 'mssql']. See https://www.mlflow.org/docs/latest/tracking.html#storage for how to run an MLflow server against one of the supported backend storage locations.