In [0]:
import hashlib
import time

import mlflow
import pandas as pd
import requests
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedEntityInput
from lightgbm import LGBMClassifier
from mlflow import MlflowClient
from mlflow.models import infer_signature
# from pyspark.sql import SparkSession
from databricks.connect import DatabricksSession
from sklearn.compose import ColumnTransformer
from sklearn.metrics import roc_auc_score  # classification_report, confusion_matrix,
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler

from utils import load_config

In [0]:
# Set up MLflow for tracking and model registry
mlflow.set_tracking_uri("databricks")
mlflow.set_registry_uri("databricks-uc")

In [0]:
config = load_config("../project_config.yml")
print(config)

[32m2025-03-28 21:22:22.256[0m | [1mINFO    [0m | [36mutils[0m:[36mload_config[0m:[36m66[0m - [1mLoaded configuration from ../project_config.yml[0m


catalog_name='test01' schema_name='default' pipeline_id='4be66e88-11e8-4988-9fa3-459b9b28a83c' parameters={'learning_rate': 0.05, 'random_state': 42, 'force_col_wise': True} ab_test={'learning_rate_a': 0.05, 'learning_rate_b': 0.1, 'force_col_wise': True} num_features=[NumFeature(name='ID', dtype='int64'), NumFeature(name='LIMIT_BAL', dtype='float64'), NumFeature(name='SEX', dtype='int64'), NumFeature(name='EDUCATION', dtype='int64'), NumFeature(name='MARRIAGE', dtype='int64'), NumFeature(name='AGE', dtype='int64'), NumFeature(name='PAY_0', dtype='int64'), NumFeature(name='PAY_2', dtype='int64'), NumFeature(name='PAY_3', dtype='int64'), NumFeature(name='PAY_4', dtype='int64'), NumFeature(name='PAY_5', dtype='int64'), NumFeature(name='PAY_6', dtype='int64'), NumFeature(name='BILL_AMT1', dtype='float64'), NumFeature(name='BILL_AMT2', dtype='float64'), NumFeature(name='BILL_AMT3', dtype='float64'), NumFeature(name='BILL_AMT4', dtype='float64'), NumFeature(name='BILL_AMT5', dtype='float64'

In [0]:
# Initialize the MLflow client for model management
client = MlflowClient()

In [0]:
# Extract key configuration details
catalog_name = config.catalog_name
schema_name = config.schema_name
parameters = config.parameters
features_robust = config.features.robust

ab_test_params = config.ab_test

In [0]:
# Set up specific parameters for model A and model B as part of the A/B test

parameters_a = {
    "learning_rate": ab_test_params["learning_rate_a"],
    "force_col_wise": ab_test_params["force_col_wise"],
}

print(parameters_a)

{'learning_rate': 0.05, 'force_col_wise': True}


In [0]:
# Set up specific parameters for model A and model B as part of the A/B test

parameters_b = {
    "learning_rate": ab_test_params["learning_rate_b"],
    "force_col_wise": ab_test_params["force_col_wise"],
}

print(parameters_b)

{'learning_rate': 0.1, 'force_col_wise': True}


In [0]:
## Load and Prepare Training and Testing Datasets

spark = DatabricksSession.builder.getOrCreate()

# columns = ['Limit_bal', 'Sex', 'Education', 'Marriage', 'Age', 'Pay_0',
#        'Pay_2', 'Pay_3', 'Pay_4', 'Pay_5', 'Pay_6', 'Bill_amt1', 'Bill_amt2',
#        'Bill_amt3', 'Bill_amt4', 'Bill_amt5', 'Bill_amt6', 'Pay_amt1',
#        'Pay_amt2', 'Pay_amt3', 'Pay_amt4', 'Pay_amt5', 'Pay_amt6']

# Load the training and testing sets from Databricks tables
train_set_spark = spark.table(f"{catalog_name}.{schema_name}.train_set")
train_set = train_set_spark.toPandas()

test_set = spark.table(f"{catalog_name}.{schema_name}.test_set").toPandas()

# Define features and target variables
X_train = train_set.drop(columns=["Default", "Id", "Update_timestamp_utc"])
y_train = train_set["Default"]

X_test = test_set.drop(columns=["Default", "Id", "Update_timestamp_utc"])
y_test = test_set["Default"]

In [0]:
# Define a preprocessor
preprocessor = ColumnTransformer(
    transformers=[("robust_scaler", RobustScaler(), features_robust)],
    remainder="passthrough",
)

In [0]:
# Create the pipeline with preprocessing and the LightGBM Classifier A
pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", LGBMClassifier(**parameters_a))])

In [0]:
# Set the MLflow experiment to track this A/B testing project
mlflow.set_experiment(experiment_name="/Shared/credit_default-ab")
model_name = f"{catalog_name}.{schema_name}.credit_default_model_ab"

2025/03/28 21:24:30 INFO mlflow.tracking.fluent: Experiment with name '/Shared/credit_default-ab' does not exist. Creating a new experiment.


In [0]:
# Start MLflow run to track training of Model A
with mlflow.start_run(tags={"model_class": "A", "branch": "serving"}) as run:
    run_id = run.info.run_id

    # Train the model
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    # Evaluate the model performance
    auc_test = roc_auc_score(y_test, y_pred)

    # Log parameters, metrics, and the model to MLflow
    mlflow.log_param("model_type", "LightGBM with preprocessing")
    mlflow.log_params(parameters_a)
    mlflow.log_metric("AUC", auc_test)

    # Log the input dataset
    dataset = mlflow.data.from_spark(train_set_spark, table_name=f"{catalog_name}.{schema_name}.train_set", version="0")

    mlflow.log_input(dataset, context="training")

    # Log the model
    signature = infer_signature(model_input=X_train, model_output=y_pred)
    mlflow.sklearn.log_model(sk_model=pipeline, artifact_path="lightgbm-pipeline-model", signature=signature)

[LightGBM] [Info] Number of positive: 5323, number of negative: 18677
[LightGBM] [Info] Total Bins 3249
[LightGBM] [Info] Number of data points in the train set: 24000, number of used features: 23
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.221792 -> initscore=-1.255256
[LightGBM] [Info] Start training from score -1.255256


2025/03/28 21:25:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run amazing-pig-460 at: https://adb-4478913909061743.3.azuredatabricks.net/ml/experiments/327870133523278/runs/9a7e5d0242dd445d9d1b74b8f79a5179.
2025/03/28 21:25:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://adb-4478913909061743.3.azuredatabricks.net/ml/experiments/327870133523278.


In [0]:
# Regsiter Model A
model_version = mlflow.register_model(
    model_uri=f"runs:/{run_id}/lightgbm-pipeline-model", name=model_name, tags={"model_class": "A", "branch": "serving"}
)

Successfully registered model 'test01.default.credit_default_model_ab'.
Created version '1' of model 'test01.default.credit_default_model_ab'.


In [0]:
print(model_version.version)

1


In [0]:
## Assign Alias to registered Model A

# Assign alias for easy reference in future A/B tests
model_version_alias = "model_A"

client.set_registered_model_alias(name=model_name, alias=model_version_alias, version=f"{model_version.version}")

model_uri = f"models:/{model_name}@{model_version_alias}"

model_A = mlflow.sklearn.load_model(model_uri)

# model_A = mlflow.pyfunc.load_model(model_uri)

In [0]:
# Create the pipeline with preprocessing and the LightGBM Classifier B
pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", LGBMClassifier(**parameters_b))])

In [0]:
# Start MLflow run to track training of Model B
with mlflow.start_run(tags={"model_class": "B", "branch": "serving"}) as run:
    run_id = run.info.run_id

    # Train the model
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    # Evaluate the model performance
    auc_test = roc_auc_score(y_test, y_pred)

    # Log parameters, metrics, and the model to MLflow
    mlflow.log_param("model_type", "LightGBM with preprocessing")
    mlflow.log_params(parameters_b)
    mlflow.log_metric("AUC", auc_test)

    # Log the input dataset
    dataset = mlflow.data.from_spark(train_set_spark, table_name=f"{catalog_name}.{schema_name}.train_set", version="0")

    mlflow.log_input(dataset, context="training")

    # Log the model
    signature = infer_signature(model_input=X_train, model_output=y_pred)
    mlflow.sklearn.log_model(sk_model=pipeline, artifact_path="lightgbm-pipeline-model", signature=signature)

[LightGBM] [Info] Number of positive: 5323, number of negative: 18677
[LightGBM] [Info] Total Bins 3249
[LightGBM] [Info] Number of data points in the train set: 24000, number of used features: 23
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.221792 -> initscore=-1.255256
[LightGBM] [Info] Start training from score -1.255256


2025/03/28 21:26:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run useful-midge-562 at: https://adb-4478913909061743.3.azuredatabricks.net/ml/experiments/327870133523278/runs/98ea03367cb34fee99b91808f0216d13.
2025/03/28 21:26:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://adb-4478913909061743.3.azuredatabricks.net/ml/experiments/327870133523278.


In [0]:
# Regsiter Model B
model_version = mlflow.register_model(
    model_uri=f"runs:/{run_id}/lightgbm-pipeline-model", name=model_name, tags={"model_class": "B", "branch": "serving"}
)

Registered model 'test01.default.credit_default_model_ab' already exists. Creating a new version of this model...
Created version '2' of model 'test01.default.credit_default_model_ab'.


In [0]:
## Assign Alias to registered Model B

# Assign alias for easy reference in future A/B tests
model_version_alias = "model_B"

client.set_registered_model_alias(name=model_name, alias=model_version_alias, version=f"{model_version.version}")

model_uri = f"models:/{model_name}@{model_version_alias}"

model_B = mlflow.sklearn.load_model(model_uri)

In [0]:
class CreditDefaultModelWrapper(mlflow.pyfunc.PythonModel):
    def __init__(self, models):
        self.models = models
        self.model_a = models[0]
        self.model_b = models[1]

    def predict(self, context, model_input):
        if isinstance(model_input, pd.DataFrame):
            credit_id = str(model_input["Id"].values[0])  # Id number
            hashed_id = hashlib.md5(credit_id.encode(encoding="UTF-8")).hexdigest()

            # convert a hexadecimal (base-16) string into an integer
            if int(hashed_id, 16) % 2:
                predictions = self.model_a.predict(model_input.drop(["Id"], axis=1))
                return {"Prediction": predictions[0], "model": "Model A"}

            else:
                predictions = self.model_b.predict(model_input.drop(["Id"], axis=1))
                return {"Prediction": predictions[0], "model": "Model B"}

        else:
            raise ValueError("Input must be a pandas DataFrame.")

In [0]:
# Add columns
columns = config.features.clean

X_train = train_set[columns]
X_test = test_set[columns]

In [0]:
# Run prediction on model A
models = [model_A, model_B]
wrapped_model = CreditDefaultModelWrapper(models)

example_input = X_test.iloc[0:1]  # Select row hashed for mdoel A

example_prediction = wrapped_model.predict(context=None, model_input=example_input)

print("Example Prediction:", example_prediction)

Example Prediction: {'Prediction': 0, 'model': 'Model A'}


In [0]:
# Run prediction on model B
models = [model_A, model_B]
wrapped_model = CreditDefaultModelWrapper(models)

example_input = X_test.iloc[112:113]  # Select row hashed for mdoel B

example_prediction = wrapped_model.predict(context=None, model_input=example_input)

print("Example Prediction:", example_prediction)

Example Prediction: {'Prediction': 0, 'model': 'Model B'}


In [0]:
# Now we register our wrapped model

mlflow.set_experiment(experiment_name="/Shared/credit_default-ab-testing")
model_name = f"{catalog_name}.{schema_name}.credit_default_model_pyfunc_ab_test"

with mlflow.start_run() as run:
    run_id = run.info.run_id
    signature = infer_signature(model_input=X_train, model_output={"Prediction": 0, "model": "Model B"})

    dataset = mlflow.data.from_spark(train_set_spark, table_name=f"{catalog_name}.{schema_name}.train_set", version="0")

    mlflow.log_input(dataset, context="training")

    mlflow.pyfunc.log_model(
        python_model=wrapped_model, artifact_path="pyfunc-credit_default-model-ab", signature=signature
    )
model_version = mlflow.register_model(
    model_uri=f"runs:/{run_id}/pyfunc-credit_default-model-ab", name=model_name, tags={"branch": "serving"}
)

2025/03/28 21:28:02 INFO mlflow.tracking.fluent: Experiment with name '/Shared/credit_default-ab-testing' does not exist. Creating a new experiment.
2025/03/28 21:28:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run gentle-hawk-773 at: https://adb-4478913909061743.3.azuredatabricks.net/ml/experiments/327870133523292/runs/ccdee4cbc5e44952a998f2b1f8c08e57.
2025/03/28 21:28:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://adb-4478913909061743.3.azuredatabricks.net/ml/experiments/327870133523292.
Successfully registered model 'test01.default.credit_default_model_pyfunc_ab_test'.
Created version '1' of model 'test01.default.credit_default_model_pyfunc_ab_test'.


In [0]:
model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version.version}")
print(model)

mlflow.pyfunc.loaded_model:
  artifact_path: pyfunc-credit_default-model-ab
  flavor: mlflow.pyfunc.model
  run_id: ccdee4cbc5e44952a998f2b1f8c08e57



In [0]:
# Run prediction
predictions_a = model.predict(X_test.iloc[0:1])
predictions_b = model.predict(X_test.iloc[112:113])

print(predictions_a)
print(predictions_b)

{'Prediction': 0, 'model': 'Model A'}
{'Prediction': 0, 'model': 'Model B'}


In [0]:
## Create serving endpoint

workspace = WorkspaceClient()

workspace.serving_endpoints.create(
    name="credit_default-model-serving-ab-test",
    config=EndpointCoreConfigInput(
        served_entities=[
            ServedEntityInput(
                entity_name=f"{catalog_name}.{schema_name}.credit_default_model_pyfunc_ab_test",
                scale_to_zero_enabled=True,
                workload_size="Small",
                entity_version=model_version.version,
            )
        ]
    ),
)

<databricks.sdk.service._internal.Wait at 0x7f482d25d850>

In [0]:
import time
from databricks.sdk import WorkspaceClient

w = WorkspaceClient()

def wait_for_endpoint(endpoint_name, max_wait_sec=300, check_interval_sec=10):
    start_time = time.time()
    while time.time() - start_time < max_wait_sec:
        status = w.serving_endpoints.get(name=endpoint_name).state.ready
        if status:
            print(f"Endpoint {endpoint_name} is ready!")
            return True
        print(f"Endpoint {endpoint_name} not ready yet. Waiting {check_interval_sec} seconds...")
        time.sleep(check_interval_sec)
    print(f"Endpoint {endpoint_name} did not become ready within {max_wait_sec} seconds")
    return False

wait_for_endpoint("credit_default-model-serving-ab-test")
# if wait_for_endpoint("credit_default-model-serving-ab-test"):
#     pass

Endpoint credit_default-model-serving-ab-test is ready!


In [0]:
token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()  # noqa: F821

# token = dbutils.secrets.get(scope="secret-scope", key="databricks-token")  # noqa: F821

host = spark.conf.get("spark.databricks.workspaceUrl")
train_set = spark.table(f"{catalog_name}.{schema_name}.train_set").toPandas()

sampled_records = train_set[columns].sample(n=1000, replace=True).to_dict(orient="records")
dataframe_records = [[record] for record in sampled_records]

# Make predictions

start_time = time.time()

model_serving_endpoint = f"https://{host}/serving-endpoints/credit_default-model-serving-ab-test/invocations"
# model_serving_endpoint = f"https://{host}/serving-endpoints/credit-default-model-serving-feature/invocations"

response = requests.post(
    f"{model_serving_endpoint}",
    headers={"Authorization": f"Bearer {token}"},
    json={"dataframe_records": dataframe_records[175]},
)

end_time = time.time()
execution_time = end_time - start_time

print("Response status:", response.status_code)
print("Reponse text:", response.text)
print("Execution time:", execution_time, "seconds")

Response status: 200
Reponse text: {"predictions": {"Prediction": 0, "model": "Model A"}}
Execution time: 0.2959156036376953 seconds
