In [1]:
!pip uninstall -y mlrun
!pip install git+https://github.com/davesh0812/mlrun.git@feature/monitoring-batch-02

Found existing installation: mlrun 0.0.0+unstable
Uninstalling mlrun-0.0.0+unstable:
  Successfully uninstalled mlrun-0.0.0+unstable
Collecting git+https://github.com/davesh0812/mlrun.git@feature/monitoring-batch-02
  Cloning https://github.com/davesh0812/mlrun.git (to revision feature/monitoring-batch-02) to /tmp/pip-req-build-c4ce2pvi
  Running command git clone --filter=blob:none --quiet https://github.com/davesh0812/mlrun.git /tmp/pip-req-build-c4ce2pvi
  Running command git checkout -b feature/monitoring-batch-02 --track origin/feature/monitoring-batch-02
  Switched to a new branch 'feature/monitoring-batch-02'
  Branch 'feature/monitoring-batch-02' set up to track remote branch 'feature/monitoring-batch-02' from 'origin'.
  Resolved https://github.com/davesh0812/mlrun.git to commit 85903bdc69fa412861b3a3d1533c5a9c393ad613
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25l

In [1]:
!pip install scikit-learn



In [1]:
%config Completer.use_jedi = False
import os
import pandas as pd
from sklearn.datasets import load_iris
import mlrun
from mlrun import import_function, get_dataitem, get_or_create_project

project_name = "new-iris-app-ev-v28"
project = get_or_create_project(project_name, context="./")

> 2023-09-06 11:22:15,538 [info] Loading project from path: {'project_name': 'new-iris-app-ev-v28', 'path': './'}
> 2023-09-06 11:22:31,520 [info] Project loaded successfully: {'project_name': 'new-iris-app-ev-v28', 'path': './', 'stored_in_db': True}


In [2]:
# Download the pre-trained Iris model
# get_dataitem("https://s3.wasabisys.com/iguazio/models/iris/model.pkl").download("model.pkl")

iris = load_iris()
train_set = pd.DataFrame(
    iris["data"],
    columns=["sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm"],
)

model_name = "RandomForestClassifier"

# Log the model through the projects API so that it is available through the feature store API
# TODO: log training dataset
project.log_model(model_name, model_file="model.pkl", training_set=train_set)

<mlrun.artifacts.model.ModelArtifact at 0x7f3528976550>

# Evidently Project Creation

In [3]:
import datetime

from sklearn import datasets

from evidently.metrics import ColumnDriftMetric
from evidently.metrics import ColumnSummaryMetric
from evidently.metrics import DatasetDriftMetric
from evidently.metrics import DatasetMissingValuesMetric
from evidently.report import Report
from evidently.test_preset import DataDriftTestPreset
from evidently.test_suite import TestSuite
from evidently.ui.dashboards import CounterAgg
from evidently.ui.dashboards import DashboardPanelCounter
from evidently.ui.dashboards import DashboardPanelPlot
from evidently.ui.dashboards import PanelValue
from evidently.ui.dashboards import PlotType
from evidently.ui.dashboards import ReportFilter
from evidently.ui.remote import RemoteWorkspace
from evidently.ui.workspace import Workspace
from evidently.ui.workspace import WorkspaceBase

adult_data = datasets.fetch_openml(name="adult", version=2, as_frame="auto")
adult = adult_data.frame

adult_ref = adult[~adult.education.isin(["Some-college", "HS-grad", "Bachelors"])]
adult_cur = adult[adult.education.isin(["Some-college", "HS-grad", "Bachelors"])]

WORKSPACE = os.path.abspath(
    f"/v3io/projects/{project_name}/artifacts/evidently_workspace"
)


YOUR_PROJECT_NAME = "iris monitoring"
YOUR_PROJECT_DESCRIPTION = "Test project using iris dataset."


def create_project(workspace: WorkspaceBase):
    project = workspace.create_project(YOUR_PROJECT_NAME)
    project.description = YOUR_PROJECT_DESCRIPTION
    project.dashboard.add_panel(
        DashboardPanelCounter(
            filter=ReportFilter(metadata_values={}, tag_values=[]),
            agg=CounterAgg.NONE,
            title="Income Dataset (iris)",
        )
    )
    project.dashboard.add_panel(
        DashboardPanelCounter(
            title="Model Calls",
            filter=ReportFilter(metadata_values={}, tag_values=[]),
            value=PanelValue(
                metric_id="DatasetMissingValuesMetric",
                field_path=DatasetMissingValuesMetric.fields.current.number_of_rows,
                legend="count",
            ),
            text="count",
            agg=CounterAgg.SUM,
            size=1,
        )
    )
    project.dashboard.add_panel(
        DashboardPanelCounter(
            title="Share of Drifted Features",
            filter=ReportFilter(metadata_values={}, tag_values=[]),
            value=PanelValue(
                metric_id="DatasetDriftMetric",
                field_path="share_of_drifted_columns",
                legend="share",
            ),
            text="share",
            agg=CounterAgg.LAST,
            size=1,
        )
    )
    project.dashboard.add_panel(
        DashboardPanelPlot(
            title="Dataset Quality",
            filter=ReportFilter(metadata_values={}, tag_values=[]),
            values=[
                PanelValue(
                    metric_id="DatasetDriftMetric",
                    field_path="share_of_drifted_columns",
                    legend="Drift Share",
                ),
                PanelValue(
                    metric_id="DatasetMissingValuesMetric",
                    field_path=DatasetMissingValuesMetric.fields.current.share_of_missing_values,
                    legend="Missing Values Share",
                ),
            ],
            plot_type=PlotType.LINE,
        )
    )
    project.save()
    return project


def create_demo_project(workspace: str):
    ws = Workspace.create(workspace)
    project_ev = create_project(ws)
    return ws, project_ev



In [4]:
ws, project_ev = create_demo_project(WORKSPACE)

# Deploy

In [13]:
def deply_serv(image=None, monitoring=True):
    # Import the serving function from the function hub
    serving_fn = import_function(
        "hub://v2_model_server", project=project_name, new_name="serving-3"
    )

    # Add the model to the serving function's routing spec
    serving_fn.add_model(
        model_name, model_path=f"store://models/{project_name}/{model_name}:latest"
    )
    if monitoring:
        tracking_policy = {
            "default_batch_intervals": "0 */2 * * *",
            "stream_image": image,
            "default_batch_image": image,
            "application_batch": True,
        }
        serving_fn.set_tracking(tracking_policy=tracking_policy)

    serving_fn.spec.build.image = image
    serving_fn.spec.image = image
    serving_fn.spec.build.requirements = ["scikit-learn"]

    # Deploy the function
    serving_fn.deploy()
    return serving_fn

In [None]:
serving_fn = deply_serv(image="davesh0812/mlrun-api:1.5.0")

> 2023-09-06 11:55:40,333 [info] Starting remote function deploy
2023-09-06 11:55:41  (info) Deploying function
2023-09-06 11:55:41  (info) Building
2023-09-06 11:55:41  (info) Staging files and preparing base images
2023-09-06 11:55:41  (info) Building processor image


# Invoke the model

In [7]:
import json
from time import sleep
from random import choice, uniform

iris = load_iris()
iris_data = iris["data"].tolist()

model_name = "RandomForestClassifier"
serving_1 = project.get_function("serving-3")
0
for i in range(150):
    data_point = choice(iris_data)
    # data_point = [0.5,0.5,0.5,0.5]
    serving_1.invoke(
        f"v2/models/{model_name}/infer", json.dumps({"inputs": [data_point]})
    )
    sleep(choice([0.01, 0.04]))

> 2023-09-06 11:33:27,783 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-new-iris-app-ev-v28-serving-3.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer'}
> 2023-09-06 11:33:28,199 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-new-iris-app-ev-v28-serving-3.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer'}
> 2023-09-06 11:33:28,244 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-new-iris-app-ev-v28-serving-3.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer'}
> 2023-09-06 11:33:28,327 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-new-iris-app-ev-v28-serving-3.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClassifier/infer'}
> 2023-09-06 11:33:28,410 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-new-iris-app-ev-v28-serving-3.default-tenant.svc.cluster.local:8080/v2/models/RandomForestClas

# APPLICATION REGISTRATION

In [9]:
project.set_model_monitoring_application(
    application_class="MyApp",
    requirements=[
        "git+https://github.com/davesh0812/mlrun.git@feature/monitoring-batch-02-evidently",
        "evidently~=0.4.3",
    ],
    name="myApp",
)

> 2023-09-06 11:41:24,509 [info] Starting remote function deploy
2023-09-06 11:41:24  (info) Deploying function
2023-09-06 11:41:24  (info) Building
2023-09-06 11:41:25  (info) Staging files and preparing base images
2023-09-06 11:41:25  (info) Building processor image
2023-09-06 11:43:09  (info) Build complete
2023-09-06 11:44:45  (info) Function deploy complete
> 2023-09-06 11:44:48,062 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-new-iris-app-ev-v28-myapp.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['new-iris-app-ev-v28-myapp-new-iris-app-ev-v28.default-tenant.app.vmdev41.lab.iguazeng.com/']}


<mlrun.runtimes.serving.ServingRuntime at 0x7f3502e93250>

In [10]:
project.set_model_monitoring_application(
    application_class="MyEvidentlyApp",
    requirements=[
        "git+https://github.com/davesh0812/mlrun.git@feature/monitoring-batch-02-evidently",
        "evidently~=0.4.3",
    ],
    name="MyEvidentlyApp",
    evidently_workspace_path=ws.path,
    evidently_project_id=str(project_ev.id),
)

> 2023-09-06 11:44:57,387 [info] Starting remote function deploy
2023-09-06 11:44:57  (info) Deploying function
2023-09-06 11:44:57  (info) Building
2023-09-06 11:44:57  (info) Staging files and preparing base images
2023-09-06 11:44:57  (info) Building processor image
2023-09-06 11:52:06  (info) Build complete
2023-09-06 11:54:00  (info) Function deploy complete
> 2023-09-06 11:54:04,864 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-new-iris-app-ev-v28-myevidentlyapp.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['new-iris-app-ev-v28-myevidentlyapp-new-iris-app-ev-v28.default-tenant.app.vmdev41.lab.iguazeng.com/']}


<mlrun.runtimes.serving.ServingRuntime at 0x7f352a7a5ee0>

# USER APPLICATION CODE

In [None]:
# mlrun: start-code

In [None]:
import mlrun
from mlrun.model_monitoring.application import (
    ModelMonitoringApplication,
    ModelMonitoringApplicationResult,
)
from mlrun.model_monitoring.evidently_application import (
    EvidentlyModelMonitoringApplication,
)
from mlrun.datastore.targets import ParquetTarget
import typing
import pandas as pd
import json
from mlrun.artifacts import (
    Artifact,
    DatasetArtifact,
    PlotlyArtifact,
    TableArtifact,
    update_dataset_meta,
)

from sklearn.datasets import load_iris
import datetime

from sklearn import datasets

from evidently.metrics import ColumnDriftMetric
from evidently.metrics import ColumnSummaryMetric
from evidently.metrics import DatasetDriftMetric
from evidently.metrics import DatasetMissingValuesMetric
from evidently.report import Report
from evidently.test_preset import DataDriftTestPreset
from evidently.test_suite import TestSuite
from evidently.ui.dashboards import CounterAgg
from evidently.ui.dashboards import DashboardPanelCounter
from evidently.ui.dashboards import DashboardPanelPlot
from evidently.ui.dashboards import PanelValue
from evidently.ui.dashboards import PlotType
from evidently.ui.dashboards import ReportFilter
from evidently.ui.remote import RemoteWorkspace
from evidently.ui.workspace import Workspace
from evidently.ui.workspace import WorkspaceBase


class MyEvidentlyApp(EvidentlyModelMonitoringApplication):
    def run_application(
        self,
        application_name: str,
        sample_df_stats: pd.DataFrame,
        feature_stats: pd.DataFrame,
        sample_df: pd.DataFrame,
        schedule_time: pd.Timestamp,
        latest_request: pd.Timestamp,
        endpoint_id: str,
        output_stream_uri: str,
    ) -> typing.Union[
        ModelMonitoringApplicationResult, typing.List[ModelMonitoringApplicationResult]
    ]:
        iris = load_iris()
        self.train_set = pd.DataFrame(
            iris["data"],
            columns=[
                "sepal_length_cm",
                "sepal_width_cm",
                "petal_length_cm",
                "petal_width_cm",
            ],
        )

        sample_df = sample_df[
            ["sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm"]
        ]
        print("sample_df_stats.head()")
        print(sample_df_stats.head())
        print("feature_stats.head()")
        print(feature_stats.head())
        print("sample_df.head()")
        print(sample_df.head())
        print("schedule_time")
        print(schedule_time)

        data_drift_report = self.create_report(sample_df, schedule_time)
        self.evidently_workspace.add_report(
            self.evidently_project_id, data_drift_report
        )
        data_drift_test_suite = self.create_test_suite(sample_df, schedule_time)
        self.evidently_workspace.add_test_suite(
            self.evidently_project_id, data_drift_test_suite
        )

        self.log_evidently_object(data_drift_report, f"report_{str(schedule_time)}")
        self.log_evidently_object(data_drift_test_suite, f"suite_{str(schedule_time)}")
        self.log_project_dashboard(None, schedule_time + datetime.timedelta(minutes=1))

        return ModelMonitoringApplicationResult(
            "king-evedintly",
            endpoint_id,
            schedule_time,
            result_name="data_drift_test",
            result_value=0.5,
            result_kind=mlrun.common.schemas.model_monitoring.constants.ResultKindApp.data_drift,
            result_status=mlrun.common.schemas.model_monitoring.constants.ResultStatusApp.detected,
            result_extra_data={},
        )

    def create_report(self, sample_df, schedule_time):
        metrics = [
            DatasetDriftMetric(),
            DatasetMissingValuesMetric(),
        ]
        for col_name in [
            "sepal_length_cm",
            "sepal_width_cm",
            "petal_length_cm",
            "petal_width_cm",
        ]:
            metrics.extend(
                [
                    ColumnDriftMetric(column_name=col_name, stattest="wasserstein"),
                    ColumnSummaryMetric(column_name=col_name),
                ]
            )

        data_drift_report = Report(
            metrics=metrics,
            timestamp=schedule_time,
        )

        data_drift_report.run(reference_data=self.train_set, current_data=sample_df)
        return data_drift_report

    def create_test_suite(self, sample_df, schedule_time):
        data_drift_test_suite = TestSuite(
            tests=[DataDriftTestPreset()],
            timestamp=schedule_time,
        )

        data_drift_test_suite.run(reference_data=self.train_set, current_data=sample_df)
        return data_drift_test_suite

In [None]:
import mlrun
from mlrun.model_monitoring.application import (
    ModelMonitoringApplication,
    ModelMonitoringApplicationResult,
)
from mlrun.datastore.targets import ParquetTarget
import typing
import pandas as pd
import json
from mlrun.artifacts import (
    Artifact,
    DatasetArtifact,
    PlotlyArtifact,
    TableArtifact,
    update_dataset_meta,
)
import os

from mlrun.artifacts.manager import ArtifactManager, extend_artifact_path

from mlrun.datastore import store_manager


class MyApp(ModelMonitoringApplication):
    def __init__(self):
        self.name = "a"

    def run_application(
        self,
        application_name: str,
        sample_df_stats: pd.DataFrame,
        feature_stats: pd.DataFrame,
        sample_df: pd.DataFrame,
        schedule_time: pd.Timestamp,
        latest_request: pd.Timestamp,
        endpoint_id: str,
        output_stream_uri: str,
    ) -> typing.Union[
        ModelMonitoringApplicationResult, typing.List[ModelMonitoringApplicationResult]
    ]:
        print("sample_df_stats.head()")
        print(sample_df_stats.head())
        print("feature_stats.head()")
        print(feature_stats.head())
        print("sample_df.head()")
        print(sample_df.head())
        print("schedule_time")
        print(schedule_time)

        self.context.log_artifact(TableArtifact("current_stats", df=sample_df_stats))

        return ModelMonitoringApplicationResult(
            self.name,
            endpoint_id,
            schedule_time,
            result_name="data_drift_test",
            result_value=0.5,
            result_kind=mlrun.common.schemas.model_monitoring.constants.ResultKindApp.data_drift,
            result_status=mlrun.common.schemas.model_monitoring.constants.ResultStatusApp.detected,
            result_extra_data={},
        )

In [None]:
# mlrun: end-code