#### Challenger model validation

#### 01. Fetch Model information

In [0]:
# Register Parameters
catalog = "workspace"
db = "customer_churn"


In [0]:
from mlflow.tracking import MlflowClient

# Fully qualified model name
model_name = f"{catalog}.{db}.advanced_mlops_churn"

# We are interested in validating the Challenger model
model_alias = "Challenger"

client = MlflowClient()
model_details = client.get_model_version_by_alias(model_name, model_alias)
model_version = int(model_details.version)
run_info = client.get_run(run_id=model_details.run_id)

print(f"Validating {model_alias} model for {model_name} on model version {model_version}")

#### 02. Model checks

#### Description check

In [0]:
# If there's no description or an insufficient number of characters, tag accordingly
if not model_details.description:
  has_description = False
  print("Please add model description")

elif not len(model_details.description) > 20:
  has_description = False
  print("Please add detailed model description (40 char min).")
  
else:
  has_description = True

print(f'Model {model_name} version {model_details.version} has description: {has_description}')
client.set_model_version_tag(name=model_name, version=str(model_details.version), key="has_description", value=has_description)

#### Validate prediction

In [0]:
from databricks.feature_engineering import FeatureEngineeringClient
from pyspark.sql.types import StructType
import pandas as pd


fe = FeatureEngineeringClient()

# Load model as a Spark UDF
model_uri = f"models:/{model_name}@{model_alias}"
label_col = "churn"

# Predict on a Spark DataFrame
try:
  # Read labels and IDs
  labelsDF = spark.read.table("advanced_churn_label_table")

  # Batch score
  features_w_preds = fe.score_batch(df=labelsDF, model_uri=model_uri, result_type=labelsDF.schema[label_col].dataType)
  display(features_w_preds)
  client.set_model_version_tag(name=model_name, version=str(model_version), key="predicts", value=True)

except Exception as e:
  print(e)
  features_w_preds = spark.createDataFrame([], StructType([]))
  print("Unable to predict on features.")
  client.set_model_version_tag(name=model_name, version=str(model_version), key="predicts", value=False)
  pass

#### Artifact check

In [0]:
import os
import mlflow


# Create local directory
local_dir = "/tmp/model_artifacts"
if not os.path.exists(local_dir):
    os.mkdir(local_dir)

# Download artifacts from tracking server - no need to specify DBFS path here
local_path = mlflow.artifacts.download_artifacts(run_id=run_info.info.run_id, dst_path=local_dir)

# Tag model version as possessing artifacts or not
if not os.listdir(local_path):
  client.set_model_version_tag(name=model_name, version=model_version, key="has_artifacts", value=False)
  print("There are no artifacts associated with this model.  Please include some data visualization or data profiling.  MLflow supports HTML, .png, and more.")

else:
  client.set_model_version_tag(name=model_name, version=str(model_version), key = "has_artifacts", value = True)
  print("Artifacts downloaded in: {}".format(local_path))
  print("Artifacts: {}".format(os.listdir(local_path)))

#### Model performance metric

In [0]:
model_run_id = model_details.run_id
f1_score = mlflow.get_run(model_run_id).data.metrics['test_f1_score']

try:
    #Compare the challenger f1 score to the existing champion if it exists
    champion_model = client.get_model_version_by_alias(model_name, "Champion")
    champion_f1 = mlflow.get_run(champion_model.run_id).data.metrics['test_f1_score']
    print(f'Champion f1 score: {champion_f1}. Challenger f1 score: {f1_score}.')
    metric_f1_passed = f1_score >= champion_f1

except:
    print(f"No Champion found. Accept the model as it's the first one.")
    metric_f1_passed = True

print(f'Model {model_name} version {model_details.version} metric_f1_passed: {metric_f1_passed}')

# Tag that F1 metric check has passed
client.set_model_version_tag(name=model_name, version=model_details.version, key="metric_f1_passed", value=metric_f1_passed)

#### 03. Benchmark or business metrics on the eval dataset

In [0]:
import pyspark.sql.functions as F


# Get our validation dataset:
validation_df = spark.table('advanced_churn_label_table').filter("split='validate'")

# Call the model with the given alias and return the prediction
def predict_churn(validation_df, model_alias):
    features_w_preds = fe.score_batch(df=validation_df, model_uri=f"models:/{model_name}@{model_alias}", 
                                      result_type=validation_df.schema[label_col].dataType)

    return features_w_preds

In [0]:
import pandas as pd
import plotly.express as px
from sklearn.metrics import confusion_matrix


# Note: This is over-simplified and depends on the use-case, but the idea is to evaluate our model against business metrics
cost_of_customer_churn = 2000 #in dollar
cost_of_discount = 500 #in dollar

cost_true_negative = 0 #did not churn, we did not give him the discount
cost_false_negative = cost_of_customer_churn #did churn, we lost the customer
cost_true_positive = cost_of_customer_churn -cost_of_discount #We avoided churn with the discount
cost_false_positive = -cost_of_discount #doesn't churn, we gave the discount for free

def get_model_value_in_dollar(model_alias):
    # Convert preds_df to Pandas DataFrame
    model_predictions = predict_churn(validation_df, model_alias).toPandas()
    # Calculate the confusion matrix
    tn, fp, fn, tp = confusion_matrix(model_predictions['churn'], model_predictions['prediction']).ravel()
    return tn * cost_true_negative+ fp * cost_false_positive + fn * cost_false_negative + tp * cost_true_positive

try:
    champion_model = client.get_model_version_by_alias(model_name, "Champion")
    champion_potential_revenue_gain = get_model_value_in_dollar("Champion")
    challenger_potential_revenue_gain = get_model_value_in_dollar("Challenger")

    data = {'Model Alias': ['Challenger', 'Champion'],
            'Potential Revenue Gain': [challenger_potential_revenue_gain, champion_potential_revenue_gain]}

except:
    print("No Champion found. Skipping business metrics evaluation.")
    print("You can return to re-run this cell after promoting the Challenger model as Champion in the rest of this notebook.")

    data = {'Model Alias': ['Challenger', 'Champion'],
            'Potential Revenue Gain': [0, 0]}

# Create a bar plot using plotly express
px.bar(data, x='Model Alias', y='Potential Revenue Gain', color='Model Alias',
    labels={'Potential Revenue Gain': 'Revenue Impacted'},
    title='Business Metrics - Revenue Impacted')

In [0]:
results = client.get_model_version(model_name, model_version)
results.tags

#### 04. Promoting to the Challenger to Champion

In [0]:
if results.tags["has_description"] and results.tags["metric_f1_passed"] and results.tags['predicts']:
  print(f"Registering model {model_name} Version {model_version} as Champion!")
  client.set_registered_model_alias(
    name=model_name,
    alias="Champion",
    version=model_version
  )

else:
  raise Exception("Model not ready for promotion")