# Configure a drift detection alert

In [19]:
image = "mlrun/mlrun"

## Create the drift alert project

In [20]:
%config Completer.use_jedi = False
import os
import pandas as pd
from sklearn.datasets import load_iris
import mlrun
import mlrun.common.schemas.alert as alert_objects
from mlrun import import_function, get_dataitem, get_or_create_project
from mlrun.model_monitoring.helpers import get_result_instance_fqn

project_name = "drift-alert-test-project"
project = get_or_create_project(project_name, context="./")

> 2024-08-04 14:05:03,774 [info] Project loaded successfully: {"project_name":"drift-alert-test-project"}


## Enable model montoring
The default histogram data drift is disabled, since you will use a usr-defined monitorinf app later on.

In [21]:
project.enable_model_monitoring(base_period=1,image=image,deploy_histogram_data_drift_app=False)

## Download a pre-trained model and load it

In [22]:
# Download the pre-trained Iris model
# get_dataitem("https://s3.wasabisys.com/iguazio/models/iris/model.pkl").download("model.pkl")

iris = load_iris()
train_set = pd.DataFrame(
    iris["data"],
    columns=["sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm"],
)

model_name = "RandomForestClassifier"

# Log the model through the projects API so that it is available through the feature store API
# TODO: log training dataset
project.log_model(model_name, model_file="model-iris.pkl", training_set=train_set)

<mlrun.artifacts.model.ModelArtifact at 0x7fca308edf40>

## Deploy the serving function

In [23]:
def deply_serv(image=None, key="my_model", model_name="RandomForestClassifier"):
    # Import the serving function from the function hub
    serving_fn = import_function(
        "hub://v2_model_server", project=project_name, new_name="serving"
    )

    # Add the model to the serving function's routing spec
    serving_fn.add_model(
        model_name, model_path=f"store://models/{project_name}/{model_name}:latest"
    )

    serving_fn.set_tracking()
    serving_fn.spec.build.image = image
    serving_fn.spec.image = image
    serving_fn.spec.build.requirements = ["scikit-learn"]

    # Deploy the function
    serving_fn.deploy()
    return serving_fn

In [24]:
serving_fn = deply_serv(image=image)

> 2024-08-04 14:05:17,170 [info] Starting remote function deploy
2024-08-04 14:05:17  (info) Deploying function
2024-08-04 14:05:17  (info) Building
2024-08-04 14:05:17  (info) Staging files and preparing base images
2024-08-04 14:05:17  (warn) Using user provided base image, runtime interpreter version is provided by the base image
2024-08-04 14:05:17  (info) Building processor image
2024-08-04 14:06:22  (info) Build complete
2024-08-04 14:06:30  (info) Function deploy complete
> 2024-08-04 14:06:38,111 [info] Successfully deployed function: {"external_invocation_urls":["drift-alert-test-project-serving.default-tenant.app.vmdev94.lab.iguazeng.com/"],"internal_invocation_urls":["nuclio-drift-alert-test-project-serving.default-tenant.svc.cluster.local:8080"]}


## Configure the notification

In [25]:
notification = mlrun.model.Notification(
            kind="slack",
            name="slack_notification",
            message="A drift was detected",
            severity="warning",
            when=["now"],
            condition="failed",
            secret_params={
                "webhook": "https://hooks.slack.com/",
            },
        ).to_dict()

endpoints = mlrun.get_run_db().list_model_endpoints(project=project_name)
endpoint_id = endpoints[0].metadata.uid
result_endpoint = get_result_instance_fqn(endpoint_id, "myappv2", "data_drift_test")
notifications = [alert_objects.AlertNotification(notification=notification)]
alert_name="drift_alert"
alert_summary="A drift was detected"
entity_kind = alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT
event_name = alert_objects.EventKind.DATA_DRIFT_DETECTED
alert_data = mlrun.alerts.alert.AlertConfig(
            project=project_name,
            name=alert_name,
            summary=alert_summary,
            severity=alert_objects.AlertSeverity.LOW,
            entities=alert_objects.EventEntities(
                kind=entity_kind, project=project_name, ids=[result_endpoint]
            ),
            trigger=alert_objects.AlertTrigger(events=[event_name]),
            criteria=None,
            notifications=notifications,
        )

project.store_alert_config(alert_data)

<mlrun.alerts.alert.AlertConfig at 0x7fca305f5430>

## Invoke the model

In [26]:
import json
from time import sleep
from random import choice, uniform

iris = load_iris()
iris_data = iris["data"].tolist()

model_name = "RandomForestClassifier"
serving_1 = project.get_function("serving")
for i in range(200):
    data_point = choice(iris_data)
    # data_point = [0.5,0.5,0.5,0.5]
    serving_1.invoke(
        f"v2/models/{model_name}/infer", json.dumps({"inputs": [data_point, data_point]})
    )
    sleep(choice([0.01, 0.04]))

> 2024-08-04 14:07:40,073 [info] Invoking function: {"method":"POST","path":"http://nuclio-drift-alert-test-project-serving.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer"}
> 2024-08-04 14:07:40,316 [info] Invoking function: {"method":"POST","path":"http://nuclio-drift-alert-test-project-serving.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer"}
> 2024-08-04 14:07:40,340 [info] Invoking function: {"method":"POST","path":"http://nuclio-drift-alert-test-project-serving.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer"}
> 2024-08-04 14:07:40,394 [info] Invoking function: {"method":"POST","path":"http://nuclio-drift-alert-test-project-serving.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer"}
> 2024-08-04 14:07:40,419 [info] Invoking function: {"method":"POST","path":"http://nuclio-drift-alert-test-project-serving.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClas

## Application registration

In [27]:
app = project.set_model_monitoring_function(
    application_class="MyApp",
    name="myAppv2",
    image=image,
)

In [28]:
project.deploy_function(app)

> 2024-08-04 14:08:15,299 [info] Starting remote function deploy
2024-08-04 14:08:15  (info) Deploying function
2024-08-04 14:08:15  (info) Building
2024-08-04 14:08:16  (info) Staging files and preparing base images
2024-08-04 14:08:16  (warn) Using user provided base image, runtime interpreter version is provided by the base image
2024-08-04 14:08:16  (info) Building processor image
2024-08-04 14:09:21  (info) Build complete
2024-08-04 14:09:39  (info) Function deploy complete
> 2024-08-04 14:09:48,342 [info] Successfully deployed function: {"external_invocation_urls":["drift-alert-test-project-myappv2.default-tenant.app.vmdev94.lab.iguazeng.com/"],"internal_invocation_urls":["nuclio-drift-alert-test-project-myappv2.default-tenant.svc.cluster.local:8080"]}


DeployStatus(state=ready, outputs={'endpoint': 'http://drift-alert-test-project-myappv2.default-tenant.app.vmdev94.lab.iguazeng.com/', 'name': 'drift-alert-test-project-myappv2'})

## Start tracking with the user-defined custom model monitoring application

In [29]:
# mlrun: start-code

In [30]:
import mlrun
from mlrun.model_monitoring.application import (
    ModelMonitoringApplicationBase,
    ModelMonitoringApplicationResult,
)
from mlrun.datastore.targets import ParquetTarget
import typing
import pandas as pd
import json
from mlrun.artifacts import (
    Artifact,
    DatasetArtifact,
    PlotlyArtifact,
    TableArtifact,
    update_dataset_meta,
)
import os
import random
from mlrun.artifacts.manager import ArtifactManager, extend_artifact_path

from mlrun.datastore import store_manager


class MyApp(ModelMonitoringApplicationBase):
    def __init__(self):
        self.name = "a"

    def do_tracking(
        self,
        application_name: str,
        sample_df_stats: pd.DataFrame,
        feature_stats: pd.DataFrame,
        sample_df: pd.DataFrame,
        start_infer_time: pd.Timestamp,
        end_infer_time: pd.Timestamp,
        latest_request: pd.Timestamp,
        endpoint_id: str,
        output_stream_uri: str,
    ) -> typing.Union[
        ModelMonitoringApplicationResult, list[ModelMonitoringApplicationResult]
    ]:
        self.context.log_artifact(TableArtifact("current_stats", df=pd.DataFrame(sample_df_stats)))
        drift_result = 4.5
        if drift_result < 0:
            status = mlrun.common.schemas.model_monitoring.constants.ResultStatusApp.irrelevant
        elif drift_result < 3:
            status = mlrun.common.schemas.model_monitoring.constants.ResultStatusApp.no_detection
        elif drift_result < 4:
            status = mlrun.common.schemas.model_monitoring.constants.ResultStatusApp.potential_detection
        else:
            status = mlrun.common.schemas.model_monitoring.constants.ResultStatusApp.detected
        return ModelMonitoringApplicationResult(
            name="data_drift_test",
            value=drift_result,
            kind=mlrun.common.schemas.model_monitoring.constants.ResultKindApp.data_drift,
            status=status,
            extra_data={},
        )