# Deploy Best Model to an Online Endpoint

Use this notebook to convert the best trained model into an MLflow artifact, register it with Azure ML, and deploy it to a managed online endpoint.

## 0. Prerequisites

Make sure you have:

- Run `run_pipeline.py` (or the pipeline from `main.ipynb`) so that `outputs/model_output/<model_name>_model.pkl` exists locally.
- `azure-ai-ml>=1.14.0`, `mlflow`, and `azure-identity` installed in the current environment.
- `config.env` populated with your workspace and data asset settings.

If you are on a compute instance, these requirements should already be satisfied.


In [None]:
from __future__ import annotations

import json
import os
import time
from pathlib import Path
from typing import Dict, Any

import joblib
import mlflow
import mlflow.sklearn
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Model, ManagedOnlineEndpoint, ManagedOnlineDeployment
from azure.ai.ml.constants import AssetTypes

from src.utils import load_azure_config

NOTEBOOK_ROOT = Path.cwd().resolve()
PROJECT_ROOT = NOTEBOOK_ROOT if (NOTEBOOK_ROOT / "src").exists() else NOTEBOOK_ROOT.parent
os.chdir(PROJECT_ROOT)

print(f"Project root: {PROJECT_ROOT}")


Project root: /workspaces/customer-churn-prediction-azureml


In [60]:
# --- User Inputs -----------------------------------------------------------
OUTPUTS_DIR = PROJECT_ROOT / "outputs"
MODEL_OUTPUT_DIR = OUTPUTS_DIR / "model_output"
MODEL_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Check for existing MLflow bundles first (preferred - preserves original Python version)
MLFLOW_BUNDLE_DIR = None
env_mlflow_path = os.getenv("AML_MLFLOW_BUNDLE_PATH")
if env_mlflow_path:
    MLFLOW_BUNDLE_DIR = Path(env_mlflow_path)
elif OUTPUTS_DIR.exists():
    mlflow_bundles = sorted(
        [d for d in OUTPUTS_DIR.iterdir() if d.is_dir() and d.name.endswith("_mlflow")],
        key=lambda p: p.stat().st_mtime if p.exists() else 0,
        reverse=True,
    )
    if mlflow_bundles:
        MLFLOW_BUNDLE_DIR = mlflow_bundles[0]

# If no MLflow bundle, look for pickle files
MODEL_PICKLE_PATH = None
if not MLFLOW_BUNDLE_DIR:
    env_model_path = os.getenv("AML_MODEL_PICKLE_PATH")
    if env_model_path:
        MODEL_PICKLE_PATH = Path(env_model_path)
    else:
        candidate_pkls = []
        for search_dir in [MODEL_OUTPUT_DIR, OUTPUTS_DIR]:
            if search_dir.exists():
                candidate_pkls.extend(search_dir.glob("*_model.pkl"))
        
        if candidate_pkls:
            MODEL_PICKLE_PATH = sorted(candidate_pkls, key=lambda p: p.stat().st_mtime, reverse=True)[0]
        else:
            available = "\n".join(str(p) for p in MODEL_OUTPUT_DIR.glob("*")) or "(directory is empty)"
            raise FileNotFoundError(
                "No MLflow bundles (*_mlflow/) or *_model.pkl files were found. \n"
                "Run run_pipeline.py locally or download the best-model artifacts from the pipeline run, "
                "then place them inside outputs/ or set AML_MLFLOW_BUNDLE_PATH/AML_MODEL_PICKLE_PATH.\n"
                f"Current directory listing:\n{available}"
            )

# Directory to store the temporary MLflow model artifact (if converting from pickle)
MLFLOW_MODEL_DIR = PROJECT_ROOT / "artifacts" / "mlflow_online_model"
MLFLOW_MODEL_DIR.mkdir(parents=True, exist_ok=True)

# Names for Azure resources
MODEL_NAME = os.getenv("AML_DEPLOY_MODEL_NAME", "bank-churn-best-model")
ENDPOINT_NAME = os.getenv("AML_ONLINE_ENDPOINT_NAME", f"churn-endpoint-{int(time.time())}")
DEPLOYMENT_NAME = os.getenv("AML_ONLINE_DEPLOYMENT_NAME", "blue")

if MLFLOW_BUNDLE_DIR:
    print(f"✓ Using existing MLflow bundle: {MLFLOW_BUNDLE_DIR}")
    MLFLOW_MODEL_DIR = MLFLOW_BUNDLE_DIR
else:
    print(f"Using model pickle: {MODEL_PICKLE_PATH}")
    print(f"MLflow artifact dir: {MLFLOW_MODEL_DIR}")

print(f"Model asset name: {MODEL_NAME}")
print(f"Endpoint name: {ENDPOINT_NAME}")
print(f"Deployment name: {DEPLOYMENT_NAME}")


✓ Using existing MLflow bundle: /workspaces/customer-churn-prediction-azureml/outputs/xgboost_mlflow
Model asset name: bank-churn-best-model
Endpoint name: churn-endpoint-1763511924
Deployment name: blue


In [63]:
# Connect to Azure ML workspace
load_dotenv(PROJECT_ROOT / "config.env")

azure_cfg = load_azure_config()

try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception:
    credential = InteractiveBrowserCredential()

ml_client = MLClient(
    credential,
    subscription_id=azure_cfg["subscription_id"],
    resource_group_name=azure_cfg["resource_group"],
    workspace_name=azure_cfg["workspace_name"],
)
print(
    f"Connected to workspace: {ml_client.workspace_name} | "
    f"resource group: {ml_client.resource_group_name}"
)


Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


Connected to workspace: churn-ml-workspace | resource group: rg-churn-ml-project-2025-11-15


In [64]:
# Register MLflow model asset
model_asset = Model(
    name=MODEL_NAME,
    path=str(MLFLOW_MODEL_DIR),
    type=AssetTypes.MLFLOW_MODEL,
    description="Best churn model exported from training pipeline",
)
registered_model = ml_client.models.create_or_update(model_asset)
print(f"Registered model: {registered_model.name}:{registered_model.version}")


[32mUploading xgboost_mlflow (0.43 MBs): 100%|██████████| 430941/430941 [00:01<00:00, 309419.14it/s]
[39m



Registered model: bank-churn-best-model:2


In [None]:
# Create or update managed online endpoint
# Check if endpoint already exists and delete if in failed state
try:
    existing_endpoint = ml_client.online_endpoints.get(ENDPOINT_NAME)
    if existing_endpoint.provisioning_state in ["Failed", "Canceled"]:
        print(f"Endpoint {ENDPOINT_NAME} is in {existing_endpoint.provisioning_state} state. Deleting...")
        ml_client.online_endpoints.begin_delete(ENDPOINT_NAME).result()
        print(f"Deleted failed endpoint {ENDPOINT_NAME}")
        time.sleep(5)  # Wait for deletion to propagate
except Exception:
    # Endpoint doesn't exist or other error - proceed with creation
    pass

endpoint = ManagedOnlineEndpoint(
    name=ENDPOINT_NAME,
    auth_mode="key",
    description="Online endpoint serving the churn model",
)

endpoint = ml_client.begin_create_or_update(endpoint).result()
print(f"Endpoint ready: {endpoint.name}")


In [None]:
# Deploy the model
deployment = ManagedOnlineDeployment(
    name=DEPLOYMENT_NAME,
    endpoint_name="churn-endpoint-1763505617",
    model=registered_model,
    instance_type=os.getenv("AML_ONLINE_INSTANCE_TYPE", "Standard_D2as_v4"),
    instance_count=int(os.getenv("AML_ONLINE_INSTANCE_COUNT", "1")),
)

ml_client.online_deployments.begin_create_or_update(deployment).result()
print(f"Deployment '{DEPLOYMENT_NAME}' is live")


Check: endpoint churn-endpoint-1763505617 exists


...................................................................................................................................................................Deployment 'blue' is live


In [68]:
# --- Route traffic to the deployment --------------------------------------
endpoint.traffic = {DEPLOYMENT_NAME: 100}
ml_client.begin_create_or_update(endpoint).result()
print(f"Endpoint traffic updated: {endpoint.traffic}")


Readonly attribute principal_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>
Readonly attribute tenant_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>


Endpoint traffic updated: {'blue': 100}


## 6. Invoke the endpoint

Create a JSON file with inference data (for example `sample-data.json`) that matches the shape expected by your model. Use the cell below to invoke the endpoint and inspect predictions.


In [86]:
REQUEST_FILE = PROJECT_ROOT / "sample-data.json"

if not REQUEST_FILE.exists():
    raise FileNotFoundError(f"{REQUEST_FILE} not found.")

response = ml_client.online_endpoints.invoke(
    endpoint_name="churn-endpoint-1763505617",
    deployment_name=DEPLOYMENT_NAME,
    request_file=str(REQUEST_FILE),
)
print("Raw response:", response)


Raw response: [0]


## 7. (Optional) Delete the endpoint

Managed online endpoints accrue cost while running. Use the following cell to delete it when you're done testing.


In [None]:
# Uncomment to delete the managed endpoint when no longer needed
# ml_client.online_endpoints.begin_delete(name=ENDPOINT_NAME)
# print(f"Deleted endpoint {ENDPOINT_NAME}")
