From 72d5485d37f4503f77c6ac4e093e70bd4bd248d9 Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Sun, 28 Sep 2025 15:43:07 +0300 Subject: [PATCH 01/17] replace author to Iguazio manually (#905) --- functions/src/aggregate/item.yaml | 2 +- functions/src/arc_to_parquet/item.yaml | 2 +- functions/src/auto_trainer/item.yaml | 2 +- functions/src/azureml_serving/function.yaml | 2 +- functions/src/azureml_serving/item.yaml | 2 +- functions/src/azureml_utils/item.yaml | 2 +- functions/src/batch_inference/item.yaml | 2 +- functions/src/batch_inference_v2/item.yaml | 2 +- functions/src/describe/item.yaml | 2 +- functions/src/describe_spark/item.yaml | 3 ++- functions/src/feature_selection/item.yaml | 2 +- functions/src/gen_class_data/item.yaml | 2 +- functions/src/github_utils/function.yaml | 2 +- functions/src/github_utils/item.yaml | 2 +- functions/src/hugging_face_serving/item.yaml | 2 +- functions/src/load_dataset/function.yaml | 2 +- functions/src/load_dataset/item.yaml | 2 +- functions/src/mlflow_utils/item.yaml | 2 +- functions/src/model_server/item.yaml | 2 +- functions/src/model_server_tester/function.yaml | 2 +- functions/src/model_server_tester/item.yaml | 2 +- functions/src/noise_reduction/item.yaml | 2 +- functions/src/onnx_utils/item.yaml | 2 +- functions/src/open_archive/item.yaml | 2 +- functions/src/pii_recognizer/item.yaml | 2 +- functions/src/pyannote_audio/item.yaml | 2 +- functions/src/question_answering/item.yaml | 2 +- functions/src/send_email/function.yaml | 2 +- functions/src/send_email/item.yaml | 2 +- functions/src/silero_vad/item.yaml | 2 +- functions/src/sklearn_classifier/item.yaml | 2 +- functions/src/sklearn_classifier_dask/function.yaml | 2 +- functions/src/sklearn_classifier_dask/item.yaml | 2 +- functions/src/structured_data_generator/item.yaml | 2 +- functions/src/test_classifier/function.yaml | 2 +- functions/src/test_classifier/item.yaml | 2 +- functions/src/text_to_audio_generator/item.yaml | 2 +- functions/src/tf2_serving/function.yaml | 2 +- functions/src/tf2_serving/item.yaml | 2 +- functions/src/transcribe/item.yaml | 2 +- functions/src/translate/item.yaml | 2 +- functions/src/v2_model_server/function.yaml | 2 +- functions/src/v2_model_server/item.yaml | 2 +- functions/src/v2_model_tester/function.yaml | 2 +- functions/src/v2_model_tester/item.yaml | 2 +- modules/src/count_events/item.yaml | 2 +- 46 files changed, 47 insertions(+), 46 deletions(-) diff --git a/functions/src/aggregate/item.yaml b/functions/src/aggregate/item.yaml index 75f7e74c5..43e87a4a2 100644 --- a/functions/src/aggregate/item.yaml +++ b/functions/src/aggregate/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: avia + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/arc_to_parquet/item.yaml b/functions/src/arc_to_parquet/item.yaml index 4bc2634ce..fe2925aef 100644 --- a/functions/src/arc_to_parquet/item.yaml +++ b/functions/src/arc_to_parquet/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: avi + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/auto_trainer/item.yaml b/functions/src/auto_trainer/item.yaml index 7e622db29..ba33f6a08 100755 --- a/functions/src/auto_trainer/item.yaml +++ b/functions/src/auto_trainer/item.yaml @@ -10,7 +10,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yonish + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/azureml_serving/function.yaml b/functions/src/azureml_serving/function.yaml index b2242da1d..978806878 100644 --- a/functions/src/azureml_serving/function.yaml +++ b/functions/src/azureml_serving/function.yaml @@ -5,7 +5,7 @@ metadata: hash: c0f404820b8f0fe92d2d1cfe9dbcc068be1a13bf project: '' labels: - author: yonish + author: Iguazio categories: - machine-learning - model-serving diff --git a/functions/src/azureml_serving/item.yaml b/functions/src/azureml_serving/item.yaml index d20e636b0..93fb046b2 100644 --- a/functions/src/azureml_serving/item.yaml +++ b/functions/src/azureml_serving/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yonish + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.1.0 diff --git a/functions/src/azureml_utils/item.yaml b/functions/src/azureml_utils/item.yaml index 342307643..ae33ad5b1 100644 --- a/functions/src/azureml_utils/item.yaml +++ b/functions/src/azureml_utils/item.yaml @@ -10,7 +10,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yonish + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/batch_inference/item.yaml b/functions/src/batch_inference/item.yaml index 16a56cfe7..65b61431e 100644 --- a/functions/src/batch_inference/item.yaml +++ b/functions/src/batch_inference/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: guyl + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/batch_inference_v2/item.yaml b/functions/src/batch_inference_v2/item.yaml index 775579b9e..8b8f01df0 100644 --- a/functions/src/batch_inference_v2/item.yaml +++ b/functions/src/batch_inference_v2/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-08-07:12-25 hidden: false icon: '' labels: - author: eyald + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0-rc51 diff --git a/functions/src/describe/item.yaml b/functions/src/describe/item.yaml index 2c41a025f..da26f1501 100644 --- a/functions/src/describe/item.yaml +++ b/functions/src/describe/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: Davids + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/describe_spark/item.yaml b/functions/src/describe_spark/item.yaml index 6c4ad32d9..58e267d4a 100644 --- a/functions/src/describe_spark/item.yaml +++ b/functions/src/describe_spark/item.yaml @@ -7,7 +7,8 @@ example: describe_spark.ipynb generationDate: 2022-08-28:17-25 hidden: false icon: '' -labels: {} +labels: + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.1.0 diff --git a/functions/src/feature_selection/item.yaml b/functions/src/feature_selection/item.yaml index 5356024df..4f9a3a5dd 100644 --- a/functions/src/feature_selection/item.yaml +++ b/functions/src/feature_selection/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: orz + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.8.0-rc40 diff --git a/functions/src/gen_class_data/item.yaml b/functions/src/gen_class_data/item.yaml index a6dd94b61..30f5cd21c 100644 --- a/functions/src/gen_class_data/item.yaml +++ b/functions/src/gen_class_data/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: Daniel + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/github_utils/function.yaml b/functions/src/github_utils/function.yaml index fe60cff7a..2d5d93aab 100644 --- a/functions/src/github_utils/function.yaml +++ b/functions/src/github_utils/function.yaml @@ -5,7 +5,7 @@ metadata: hash: d8e639af306794ce6f59eb246f0b845c016c9da4 project: '' labels: - author: yaronh + author: Iguazio categories: - utils spec: diff --git a/functions/src/github_utils/item.yaml b/functions/src/github_utils/item.yaml index c00bf86b2..9c06d84a7 100644 --- a/functions/src/github_utils/item.yaml +++ b/functions/src/github_utils/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yaronh + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.1.0 diff --git a/functions/src/hugging_face_serving/item.yaml b/functions/src/hugging_face_serving/item.yaml index 48b063e49..edad986be 100644 --- a/functions/src/hugging_face_serving/item.yaml +++ b/functions/src/hugging_face_serving/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-09-05:17-00 hidden: false icon: '' labels: - author: yonish + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.1.0 diff --git a/functions/src/load_dataset/function.yaml b/functions/src/load_dataset/function.yaml index 046bb5cc4..91775a802 100644 --- a/functions/src/load_dataset/function.yaml +++ b/functions/src/load_dataset/function.yaml @@ -5,7 +5,7 @@ metadata: hash: d05aa41d618533335eeaeab38aa434a14e3e3980 project: '' labels: - author: yjb + author: Iguazio framework: sklearn categories: - data-preparation diff --git a/functions/src/load_dataset/item.yaml b/functions/src/load_dataset/item.yaml index d9fcf8d61..fb6f69c40 100644 --- a/functions/src/load_dataset/item.yaml +++ b/functions/src/load_dataset/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yjb + author: Iguazio framework: sklearn maintainers: [] marketplaceType: '' diff --git a/functions/src/mlflow_utils/item.yaml b/functions/src/mlflow_utils/item.yaml index 79304eb38..176a9dd95 100644 --- a/functions/src/mlflow_utils/item.yaml +++ b/functions/src/mlflow_utils/item.yaml @@ -9,7 +9,7 @@ generationDate: 2024-05-23:12-00 hidden: false icon: '' labels: - author: zeevr + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.8.0 diff --git a/functions/src/model_server/item.yaml b/functions/src/model_server/item.yaml index c85cf163d..65c6f09e7 100644 --- a/functions/src/model_server/item.yaml +++ b/functions/src/model_server/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yaronh + author: Iguazio framework: sklearn maintainers: [] marketplaceType: '' diff --git a/functions/src/model_server_tester/function.yaml b/functions/src/model_server_tester/function.yaml index eda10459e..45934c444 100644 --- a/functions/src/model_server_tester/function.yaml +++ b/functions/src/model_server_tester/function.yaml @@ -5,7 +5,7 @@ metadata: hash: 3b203a2799e44992539eafd32a4b8979bbcc8001 project: '' labels: - author: yaronh + author: Iguazio categories: - monitoring - model-serving diff --git a/functions/src/model_server_tester/item.yaml b/functions/src/model_server_tester/item.yaml index 3e43a9297..b18e0082c 100644 --- a/functions/src/model_server_tester/item.yaml +++ b/functions/src/model_server_tester/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yaronh + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.1.0 diff --git a/functions/src/noise_reduction/item.yaml b/functions/src/noise_reduction/item.yaml index f748d5587..d8f2cddd4 100644 --- a/functions/src/noise_reduction/item.yaml +++ b/functions/src/noise_reduction/item.yaml @@ -9,7 +9,7 @@ generationDate: 2024-03-04:17-30 hidden: false icon: '' labels: - author: yonatans + author: Iguazio maintainers: [] mlrunVersion: 1.7.0 name: noise-reduction diff --git a/functions/src/onnx_utils/item.yaml b/functions/src/onnx_utils/item.yaml index 02134f32d..81ad593d5 100644 --- a/functions/src/onnx_utils/item.yaml +++ b/functions/src/onnx_utils/item.yaml @@ -10,7 +10,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: guyl + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.2 diff --git a/functions/src/open_archive/item.yaml b/functions/src/open_archive/item.yaml index 0a2f4516c..c40a62e4a 100644 --- a/functions/src/open_archive/item.yaml +++ b/functions/src/open_archive/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yaronh + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.8.0-rc50 diff --git a/functions/src/pii_recognizer/item.yaml b/functions/src/pii_recognizer/item.yaml index 8f3185b4c..dcd71c85c 100644 --- a/functions/src/pii_recognizer/item.yaml +++ b/functions/src/pii_recognizer/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-08-15:10-24 hidden: false icon: '' labels: - author: pgw + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/pyannote_audio/item.yaml b/functions/src/pyannote_audio/item.yaml index b6dbccddb..79a5a0f1b 100644 --- a/functions/src/pyannote_audio/item.yaml +++ b/functions/src/pyannote_audio/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-12-03:14-30 hidden: false icon: '' labels: - author: guyl + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/question_answering/item.yaml b/functions/src/question_answering/item.yaml index 741bab80c..b307a9877 100755 --- a/functions/src/question_answering/item.yaml +++ b/functions/src/question_answering/item.yaml @@ -8,7 +8,7 @@ generationDate: 2023-08-07:11-30 hidden: false icon: '' labels: - author: yonish + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/send_email/function.yaml b/functions/src/send_email/function.yaml index e895cddc9..1722fb586 100644 --- a/functions/src/send_email/function.yaml +++ b/functions/src/send_email/function.yaml @@ -5,7 +5,7 @@ metadata: hash: 5c4528084ea98992b77f65e29359bbcb4a0df8ab project: '' labels: - author: saarc + author: Iguazio categories: - utils spec: diff --git a/functions/src/send_email/item.yaml b/functions/src/send_email/item.yaml index 4c42cb73b..6caf1ab50 100644 --- a/functions/src/send_email/item.yaml +++ b/functions/src/send_email/item.yaml @@ -8,7 +8,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: saarc + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.4.1 diff --git a/functions/src/silero_vad/item.yaml b/functions/src/silero_vad/item.yaml index 49adfcd9f..7a1aeaee2 100644 --- a/functions/src/silero_vad/item.yaml +++ b/functions/src/silero_vad/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-12-03:14-30 hidden: false icon: '' labels: - author: guyl + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/sklearn_classifier/item.yaml b/functions/src/sklearn_classifier/item.yaml index 1b41e630a..b9726fb79 100644 --- a/functions/src/sklearn_classifier/item.yaml +++ b/functions/src/sklearn_classifier/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: true icon: '' labels: - author: yjb + author: Iguazio framework: sklearn maintainers: [] marketplaceType: '' diff --git a/functions/src/sklearn_classifier_dask/function.yaml b/functions/src/sklearn_classifier_dask/function.yaml index 98be06b8c..46f733886 100644 --- a/functions/src/sklearn_classifier_dask/function.yaml +++ b/functions/src/sklearn_classifier_dask/function.yaml @@ -5,7 +5,7 @@ metadata: hash: e542038fbb84f790b7144b529665f36d70d80906 project: '' labels: - author: yjb + author: Iguazio framework: sklearn categories: - machine-learning diff --git a/functions/src/sklearn_classifier_dask/item.yaml b/functions/src/sklearn_classifier_dask/item.yaml index 35e89b2dd..3264ec681 100644 --- a/functions/src/sklearn_classifier_dask/item.yaml +++ b/functions/src/sklearn_classifier_dask/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: true icon: '' labels: - author: yjb + author: Iguazio framework: sklearn maintainers: [] marketplaceType: '' diff --git a/functions/src/structured_data_generator/item.yaml b/functions/src/structured_data_generator/item.yaml index 6e01aefb9..f268f05e6 100755 --- a/functions/src/structured_data_generator/item.yaml +++ b/functions/src/structured_data_generator/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-12-14:10-50 hidden: false icon: '' labels: - author: zeevr + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.8.0 diff --git a/functions/src/test_classifier/function.yaml b/functions/src/test_classifier/function.yaml index d0e1b6067..f35446b51 100644 --- a/functions/src/test_classifier/function.yaml +++ b/functions/src/test_classifier/function.yaml @@ -5,7 +5,7 @@ metadata: hash: b4d447a2328975e90a0dbc7a28f82009924cc157 project: '' labels: - author: yjb + author: Iguazio framework: sklearn categories: - machine-learning diff --git a/functions/src/test_classifier/item.yaml b/functions/src/test_classifier/item.yaml index e9f4982a9..a38497a73 100644 --- a/functions/src/test_classifier/item.yaml +++ b/functions/src/test_classifier/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: true icon: '' labels: - author: yjb + author: Iguazio framework: sklearn maintainers: [] marketplaceType: '' diff --git a/functions/src/text_to_audio_generator/item.yaml b/functions/src/text_to_audio_generator/item.yaml index ff9ec379f..13beef4b9 100644 --- a/functions/src/text_to_audio_generator/item.yaml +++ b/functions/src/text_to_audio_generator/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-12-03:15-30 hidden: false icon: '' labels: - author: yonatans + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.1 diff --git a/functions/src/tf2_serving/function.yaml b/functions/src/tf2_serving/function.yaml index c755263ae..17cf2fbb9 100644 --- a/functions/src/tf2_serving/function.yaml +++ b/functions/src/tf2_serving/function.yaml @@ -4,7 +4,7 @@ metadata: hash: 134293b94996e74275d90546f8d4ef96198af679 project: '' labels: - author: yaronh + author: Iguazio categories: - model-serving - machine-learning diff --git a/functions/src/tf2_serving/item.yaml b/functions/src/tf2_serving/item.yaml index 88dac8478..d7c793364 100644 --- a/functions/src/tf2_serving/item.yaml +++ b/functions/src/tf2_serving/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yaronh + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.1.0 diff --git a/functions/src/transcribe/item.yaml b/functions/src/transcribe/item.yaml index 6deaf710a..0bc9e5d0f 100644 --- a/functions/src/transcribe/item.yaml +++ b/functions/src/transcribe/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-07-13:11-20 hidden: false icon: '' labels: - author: yonatans + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/translate/item.yaml b/functions/src/translate/item.yaml index 839d1efaa..eb0e821e4 100644 --- a/functions/src/translate/item.yaml +++ b/functions/src/translate/item.yaml @@ -9,7 +9,7 @@ generationDate: 2023-12-05:17-20 hidden: false icon: '' labels: - author: guyl + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.7.0 diff --git a/functions/src/v2_model_server/function.yaml b/functions/src/v2_model_server/function.yaml index 45d261b6a..5ecfec9ba 100644 --- a/functions/src/v2_model_server/function.yaml +++ b/functions/src/v2_model_server/function.yaml @@ -5,7 +5,7 @@ metadata: hash: ad85919d3b9cf2acae43a3434ba56e01b005755e project: '' labels: - author: yaronh + author: Iguazio framework: sklearn categories: - model-serving diff --git a/functions/src/v2_model_server/item.yaml b/functions/src/v2_model_server/item.yaml index 7bde91a64..4beda6243 100644 --- a/functions/src/v2_model_server/item.yaml +++ b/functions/src/v2_model_server/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yaronh + author: Iguazio framework: sklearn maintainers: [] marketplaceType: '' diff --git a/functions/src/v2_model_tester/function.yaml b/functions/src/v2_model_tester/function.yaml index 518bd1492..c9562b097 100644 --- a/functions/src/v2_model_tester/function.yaml +++ b/functions/src/v2_model_tester/function.yaml @@ -5,7 +5,7 @@ metadata: hash: 72d3f664ff2aa870109e44f52f975bda2ac13682 project: '' labels: - author: yaronh + author: Iguazio categories: - model-testing - machine-learning diff --git a/functions/src/v2_model_tester/item.yaml b/functions/src/v2_model_tester/item.yaml index ce1ecef5f..c3412fc5c 100644 --- a/functions/src/v2_model_tester/item.yaml +++ b/functions/src/v2_model_tester/item.yaml @@ -9,7 +9,7 @@ generationDate: 2022-08-28:17-25 hidden: false icon: '' labels: - author: yaronh + author: Iguazio maintainers: [] marketplaceType: '' mlrunVersion: 1.1.0 diff --git a/modules/src/count_events/item.yaml b/modules/src/count_events/item.yaml index e0eb09069..e5d796b62 100644 --- a/modules/src/count_events/item.yaml +++ b/modules/src/count_events/item.yaml @@ -6,7 +6,7 @@ example: count_events.ipynb generationDate: 2025-09-16:12-25 hidden: false labels: - author: iguazio + author: Iguazio mlrunVersion: 1.10.0-rc27 name: count_events spec: From bbcf638477567e762ab258b0127efa36135c834b Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Sun, 5 Oct 2025 12:45:51 +0300 Subject: [PATCH 02/17] Organize CLI directory + new CLI for generating item.yaml files (#906) * create a CLI for generating item.yaml and organize the CLI directory * modify comments to module * PR fixes * Update cli/common/generate_item_yaml.py Co-authored-by: Eyal Danieli --------- Co-authored-by: Eyal Danieli --- cli/README.md | 66 +++++++++++++++++++++++ cli/cli.py | 7 +-- cli/common/generate_item_yaml.py | 55 +++++++++++++++++++ cli/common/item_yaml.py | 54 ------------------- cli/functions/new_function_item.py | 67 ------------------------ cli/utils/function_item_template.yaml.j2 | 22 ++++++++ cli/utils/item_template.yaml | 21 -------- cli/utils/module_item_template.yaml.j2 | 16 ++++++ requirements.txt | 1 + 9 files changed, 162 insertions(+), 147 deletions(-) create mode 100644 cli/README.md create mode 100644 cli/common/generate_item_yaml.py delete mode 100644 cli/common/item_yaml.py delete mode 100644 cli/functions/new_function_item.py create mode 100644 cli/utils/function_item_template.yaml.j2 delete mode 100644 cli/utils/item_template.yaml create mode 100644 cli/utils/module_item_template.yaml.j2 diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 000000000..4a3cd3bfc --- /dev/null +++ b/cli/README.md @@ -0,0 +1,66 @@ +## Available Commands +(Explore more advanced options in the code, this is basic usage demonstration) + +### generate-item-yaml +Generate an `item.yaml` file (basic draft) in the appropriate directory from a Jinja2 template + +Usage: + `python -m cli.cli generate-item-yaml TYPE NAME` + +Example: + `python -m cli.cli generate-item-yaml function aggregate` + +--- + +### item-to-function +Creates a `function.yaml` file based on a provided `item.yaml` file. + +Usage: + `python -m cli.cli item-to-function --item-path PATH` + +Example: + `python -m cli.cli item-to-function --item-path functions/src/aggregate` + +--- + +### function-to-item +Creates a `item.yaml` file based on a provided `function.yaml` file. + +Usage: + `python -m cli.cli function-to-item PATH` + +Example: + `python -m cli.cli function-to-item --path functions/src/aggregate` + +--- + +### run-tests +Run assets test suite. + +Usage: + `python -m cli.cli run-tests -r PATH -s TYPE -fn NAME` + +Example: + `python -m cli.cli run-tests -r functions/src/aggregate -s py -fn aggregate` + +--- + +### build-marketplace +Build and push (create a PR) the updated marketplace/ directory (e.g: marketplace/functions) + +Usage: + `python -m cli.cli build-marketplace -s SOURCE-DIR -sn TYPE -m MARKETPLACE-DIR -c CHANNEL -v -f` + +Example: + `python -m cli.cli build-marketplace -s ./functions/src -sn functions -m marketplace -c master -v -f` + +--- + +### update-readme +Regenerate the `README.md` files in each of the asset directories (functions/modules). + +Usage: + `python -m cli.cli update-readme --asset TYPE` + +Example: + `python -m cli.cli update-readme --asset functions --asset modules` \ No newline at end of file diff --git a/cli/cli.py b/cli/cli.py index 8fee9891a..e8e6922fe 100644 --- a/cli/cli.py +++ b/cli/cli.py @@ -17,22 +17,19 @@ from cli.functions.function_to_item import function_to_item_cli from cli.functions.item_to_function import item_to_function_cli from cli.marketplace.build import build_marketplace_cli -from cli.functions.new_function_item import new_item as new_function_item from cli.common.test_suite import test_suite -from cli.common.item_yaml import update_functions_yaml from cli.common.update_readme import update_readme +from cli.common.generate_item_yaml import generate_item_yaml @click.group() def cli(): pass - -cli.add_command(new_function_item) +cli.add_command(generate_item_yaml, name="generate-item-yaml") cli.add_command(item_to_function_cli, name="item-to-function") cli.add_command(function_to_item_cli, name="function-to-item") cli.add_command(test_suite, name="run-tests") cli.add_command(build_marketplace_cli, name="build-marketplace") -cli.add_command(update_functions_yaml, name="update-functions-yaml") cli.add_command(update_readme, name="update-readme") if __name__ == "__main__": diff --git a/cli/common/generate_item_yaml.py b/cli/common/generate_item_yaml.py new file mode 100644 index 000000000..9ce362c37 --- /dev/null +++ b/cli/common/generate_item_yaml.py @@ -0,0 +1,55 @@ +import sys +from pathlib import Path +from datetime import datetime +import click +from jinja2 import Environment, FileSystemLoader + +TEMPLATES = { + "function": "cli/utils/function_item_template.yaml.j2", + "module": "cli/utils/module_item_template.yaml.j2", +} + + +@click.command() +@click.argument("type", type=click.Choice(list(TEMPLATES.keys()))) +@click.argument("name") +@click.option("--overwrite", is_flag=True, help="Replace existing file instead of raising an error.") +def generate_item_yaml(type: str, name: str, overwrite: bool = False): + """ + Generate an item.yaml file from a template. + +type: one of the supported types (currently only `function` or `module`) +name: the function/module name (also used as the directory name) +overwrite: whether to overwrite existing item.yaml file + """ + # Construct the target path + path = Path(f"{type}s/src/{name}").resolve() + output_file = path / "item.yaml" + + if not overwrite and output_file.exists(): + click.echo(f"Error: {output_file} already exists.", err=True) + sys.exit(1) + + if not path.exists(): + click.echo(f"Error: {path} does not exist.", err=True) + sys.exit(1) + + # Render parameters + params = { + "example": f"{name}.ipynb", + "generationDate": datetime.utcnow().strftime("%Y-%m-%d"), + "name": name, + "filename": f"{name}.py", + } + + # Load and render template + env = Environment(loader=FileSystemLoader(".")) + template = env.get_template(TEMPLATES[type]) + rendered = template.render(params) + + output_file.write_text(rendered) + click.echo(f"Created {output_file}") + + +if __name__ == "__main__": + generate_item_yaml() \ No newline at end of file diff --git a/cli/common/item_yaml.py b/cli/common/item_yaml.py deleted file mode 100644 index a14ea48c2..000000000 --- a/cli/common/item_yaml.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 Iguazio -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import click -from cli.utils.path_iterator import PathIterator -from cli.utils.helpers import is_item_dir -import yaml -import datetime - - -@click.command() -@click.option("-r", "--root-directory", default=".", help="Path to root directory") -@click.option("-v", "--version", help="update version number in function item yaml") -@click.option("-mv", "--mlrun-version", help="update mlrun version in function item.yaml") -@click.option("-p", "--platform-version", help="update platform version in function item.yaml") -@click.option("-d", "--date-time", help="update date-time in function item.yaml") -def update_functions_yaml(root_directory: str, - version: str, - mlrun_version: str, - platform_version: str, - date_time: str): - if not root_directory: - click.echo("-r/--root-directory is required") - exit(1) - - item_iterator = PathIterator(root=root_directory, rule=is_item_dir, as_path=True) - for inner_dir in item_iterator: - item_yaml = "item.yaml" - if (inner_dir / item_yaml).exists(): - path = str(inner_dir)+"/"+item_yaml - stream = open(path, 'r') - data = yaml.load(stream=stream, Loader=yaml.FullLoader) - if version: - data['version'] = version - if mlrun_version: - data['mlrunVersion'] = mlrun_version - if platform_version: - data['platformVersion'] = platform_version - if date_time: - data['generationDate'] = datetime.datetime.now().strftime('%Y-%m-%d:%H-%M') - print(data) - with open(path, 'w') as yaml_file: - yaml_file.write(yaml.dump(data, default_flow_style=False)) diff --git a/cli/functions/new_function_item.py b/cli/functions/new_function_item.py deleted file mode 100644 index 70eb30d55..000000000 --- a/cli/functions/new_function_item.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2019 Iguazio -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from datetime import datetime -from pathlib import Path - -import click - - -@click.command() -@click.option( - "-p", "--path", help="Path to directory in which a new item.yaml will be created" -) -@click.option("-o", "--override", is_flag=True, help="Override if already exists") -def new_item(path: str, override: bool): - path = Path(path) / "item.yaml" - - if not path.parent.exists(): - path.parent.mkdir(parents=True) - elif path.exists() and not override: - click.echo( - f"{path / 'item.yaml'} already exists, set [-o, --override] to override" - ) - exit(1) - - with open(path, "w") as f: - f.write( - f""" -apiVersion: v1 -categories: [] # List of category names -description: '' # Short description -doc: '' # Path to README.md if exists -example: '' # Path to examole notebook -generationDate: {str(datetime.utcnow())} -icon: '' # Path to icon file -labels: {{}} # Key values label pairs -maintainers: [] # List of maintainers -mlrunVersion: '' # Function’s MLRun version requirement, should follow python’s versioning schema -name: '' # Function name -platformVersion: '' # Function’s Iguazio version requirement, should follow python’s versioning schema -spec: - filename: '' # Implementation file - handler: '' # Handler function name - image: '' # Base image name - kind: '' # Function kind - requirements: [] # List of Pythonic library requirements - customFields: {{}} # Custom spec fields - env: [] # Spec environment params -url: '' -version: 0.0.1 # Function version, should follow standard semantic versioning schema -""" - ) - - -if __name__ == "__main__": - new_item() diff --git a/cli/utils/function_item_template.yaml.j2 b/cli/utils/function_item_template.yaml.j2 new file mode 100644 index 000000000..da35ef819 --- /dev/null +++ b/cli/utils/function_item_template.yaml.j2 @@ -0,0 +1,22 @@ +apiVersion: v1 +categories: [] {# List of category names #} +description: '' {# Short description #} +doc: '' {# Path to README.md if exists #} +example: {{ example|default('') }} {# Path to example notebook #} +generationDate: {{ generationDate|default('') }} {# Automatically generated ISO8086 datetime #} +hidden: false {# Hide function from the UI #} +icon: '' {# Path to icon file #} +labels: {# Key values label pairs #} + author: Iguazio +maintainers: [] {# List of maintainers #} +mlrunVersion: '' {# Function’s MLRun version requirement, should follow python’s versioning schema #} +name: {{ name|default('') }} {# Function name #} +platformVersion: '' {# Function’s Iguazio version requirement, should follow python’s versioning schema #} +spec: + filename: {{ filename|default('') }} {# Implementation file #} + handler: '' {# Handler function name #} + image: mlrun/mlrun {# Base image name #} + kind: '' {# Function kind #} + requirements: [] {# List of Pythonic library requirements #} +url: '' +version: 1.0.0 {# Function version, should follow standard semantic versioning schema #} \ No newline at end of file diff --git a/cli/utils/item_template.yaml b/cli/utils/item_template.yaml deleted file mode 100644 index b1d38d334..000000000 --- a/cli/utils/item_template.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -categories: [] # List of category names -description: '' # Short description -doc: '' # Path to README.md if exists -example: '' # Path to examole notebook -generationDate: '' # Automatically generated ISO8086 datetime -hidden: false # Hide function from the UI -icon: '' # Path to icon file -labels: {} # Key values label pairs -maintainers: [] # List of maintainers -mlrunVersion: '' # Function’s MLRun version requirement, should follow python’s versioning schema -name: '' # Function name -platformVersion: '' # Function’s Iguazio version requirement, should follow python’s versioning schema -spec: - filename: '' # Implementation file - handler: '' # Handler function name - image: '' # Base image name - kind: '' # Function kind - requirements: [] # List of Pythonic library requirements -url: '' # ??? -version: '' # Function version, should follow standard semantic versioning schema \ No newline at end of file diff --git a/cli/utils/module_item_template.yaml.j2 b/cli/utils/module_item_template.yaml.j2 new file mode 100644 index 000000000..539cd6f0a --- /dev/null +++ b/cli/utils/module_item_template.yaml.j2 @@ -0,0 +1,16 @@ +apiVersion: v1 +categories: [] {# List of category names #} +description: '' {# Short description #} +example: {{ example|default('') }} {# Path to example notebook #} +generationDate: {{ generationDate|default('') }} {# Automatically generated ISO8086 datetime #} +hidden: false {# Hide Module from the UI #} +labels: + author: Iguazio +mlrunVersion: '' {# Module’s MLRun version requirement, should follow python’s versioning schema #} +name: {{ name|default('') }} {# Module name #} +spec: + filename: {{ filename|default('') }} {# Implementation file #} + image: mlrun/mlrun {# Base image name #} + kind: '' {# Module kind #} + requirements: [] {# List of Pythonic library requirements #} +version: 1.0.0 {# Module version, should follow standard semantic versioning schema #} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e58ca8e98..c393fd552 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ wheel bs4 mlrun>=1.0.0 jinja2~=3.1.2 +click>=8.0 pipenv myst_nb black>=24.3.0 From 73b4423da22ef97b0725cf88fb190578c4555ba6 Mon Sep 17 00:00:00 2001 From: Eyal Danieli Date: Wed, 5 Nov 2025 18:52:55 +0200 Subject: [PATCH 03/17] fill count events notebook (#908) --- modules/src/count_events/count_events.ipynb | 812 +++++++++++++++++++- modules/src/count_events/count_events.py | 13 +- modules/src/count_events/item.yaml | 4 +- modules/src/count_events/requirements.txt | 4 +- 4 files changed, 819 insertions(+), 14 deletions(-) diff --git a/modules/src/count_events/count_events.ipynb b/modules/src/count_events/count_events.ipynb index 54f657bb0..8a3cac849 100644 --- a/modules/src/count_events/count_events.ipynb +++ b/modules/src/count_events/count_events.ipynb @@ -1,35 +1,829 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "2f5aea66-03d3-4ba2-a0cb-3e74e8376ff0", + "metadata": {}, + "source": [ + "# Count Events Demo" + ] + }, + { + "cell_type": "markdown", + "id": "cdadd95e-d65f-4910-b72f-ef545c09c96b", + "metadata": {}, + "source": [ + "## Overview" + ] + }, + { + "cell_type": "markdown", + "id": "c336160a-3eba-40b3-8d02-7849ca74925b", + "metadata": {}, + "source": [ + "This notebook walks through a simple example of how to monitor a real-time serving function and how to add your a custom monitoring application from the hub.\n", + "For simplicity, we’ll use the Count Events application, which calculates the number of requests in each time window.\n", + "If you’d like to create your own model monitoring application (which can later be added to the hub), follow these instructions:https://docs.mlrun.org/en/stable/model-monitoring/applications.html\n", + "\n", + "To add a model monitoring application to your project from the hub, you can choose one of two approaches:\n", + "1. **Set it directly** – the application will be deployed as is.\n", + "2. **Import it as a module** – this lets you test and modify the application code before deploying it.\n" + ] + }, + { + "cell_type": "markdown", + "id": "1bcc90b4-f3c3-46ea-8348-1e7239e4e6e0", + "metadata": {}, + "source": [ + "## Demo" + ] + }, + { + "cell_type": "markdown", + "id": "2761fb6c-2c9d-4e8c-8efd-e01762b3bb22", + "metadata": {}, + "source": [ + "### Create a project" + ] + }, { "cell_type": "code", - "execution_count": null, - "id": "initial_id", + "execution_count": 1, + "id": "e06ac3e1-8afd-45ab-9448-f664a4e54640", "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-05 15:33:39,611 [warning] Failed resolving version info. Ignoring and using defaults\n", + "> 2025-11-05 15:33:43,049 [warning] Server or client version is unstable. Assuming compatible: {\"client_version\":\"0.0.0+unstable\",\"server_version\":\"1.11.0\"}\n", + "> 2025-11-05 15:33:58,614 [info] Created and saved project: {\"context\":\"./\",\"from_template\":null,\"name\":\"count-events-demo\",\"overwrite\":false,\"save\":true}\n", + "> 2025-11-05 15:33:58,616 [info] Project created successfully: {\"project_name\":\"count-events-demo\",\"stored_in_db\":true}\n" + ] + } + ], + "source": [ + "import mlrun\n", + "project = mlrun.get_or_create_project(\"count-events-demo\",'./')" + ] + }, + { + "cell_type": "markdown", + "id": "cb0c365d-243f-447d-a693-38007d38329a", + "metadata": {}, + "source": [ + "### Generate datastore profiles for model monitoring\n", + "Before you enable model monitoring, you must configure datastore profiles for TSDB and streaming endpoints. A datastore profile holds all the information required to address an external data source, including credentials.\n", + "Model monitoring supports Kafka and V3IO as streaming platforms, and TDEngine and V3IO as TSDB platforms.\n", + "\n", + "In this example we will use V3IO for both streaming and TSDB platforms." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "10df799e-0e63-409c-a204-551635c90410", + "metadata": {}, + "outputs": [], + "source": [ + "from mlrun.datastore.datastore_profile import (\n", + " DatastoreProfileV3io\n", + ")\n", + "\n", + "v3io_profile = DatastoreProfileV3io(name=\"v3io_profile\", v3io_access_key=mlrun.mlconf.get_v3io_access_key())\n", + "\n", + "project.register_datastore_profile(v3io_profile)\n", + "project.set_model_monitoring_credentials(stream_profile_name=v3io_profile.name, tsdb_profile_name=v3io_profile.name)" + ] + }, + { + "cell_type": "markdown", + "id": "94af15ae-b250-4583-950d-b14876065b8a", + "metadata": {}, + "source": [ + "### Deploy model monitoring infrastructure" + ] + }, + { + "cell_type": "markdown", + "id": "56b2adf8-dd65-4ee1-bf18-cd97eeb129b8", + "metadata": {}, + "source": [ + "Once you’ve provided the model monitoring credentials, you can enable monitoring capabilities for your project. \n", + "Visit MLRun's [Model Monitoring Architecture](https://docs.mlrun.org/en/stable/model-monitoring/index.html#model-monitoring-des) to read more." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a83f95bc-e6b5-4184-84cd-d3117f394b1c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-11-05 15:41:01 (info) Deploying function\n", + "2025-11-05 15:41:01 (info) Building\n", + "2025-11-05 15:41:01 (info) Staging files and preparing base images\n", + "2025-11-05 15:41:01 (warn) Using user provided base image, runtime interpreter version is provided by the base image\n", + "2025-11-05 15:41:02 (info) Building processor image\n", + "2025-11-05 15:42:57 (info) Build complete\n", + "2025-11-05 15:43:07 (info) Function deploy complete\n", + "2025-11-05 15:40:57 (info) Deploying function\n", + "2025-11-05 15:40:57 (info) Building\n", + "2025-11-05 15:40:58 (info) Staging files and preparing base images\n", + "2025-11-05 15:40:58 (warn) Using user provided base image, runtime interpreter version is provided by the base image\n", + "2025-11-05 15:40:58 (info) Building processor image\n", + "2025-11-05 15:42:53 (info) Build complete\n", + "2025-11-05 15:43:12 (info) Function deploy complete\n", + "2025-11-05 15:40:59 (info) Deploying function\n", + "2025-11-05 15:40:59 (info) Building\n", + "2025-11-05 15:40:59 (info) Staging files and preparing base images\n", + "2025-11-05 15:40:59 (warn) Using user provided base image, runtime interpreter version is provided by the base image\n", + "2025-11-05 15:41:00 (info) Building processor image\n", + "2025-11-05 15:42:55 (info) Build complete\n", + "2025-11-05 15:43:03 (info) Function deploy complete\n" + ] + } + ], + "source": [ + "project.enable_model_monitoring(base_period=10, \n", + " deploy_histogram_data_drift_app=False, # built-in monitoring application for structured data \n", + " wait_for_deployment=True)" + ] + }, + { + "cell_type": "markdown", + "id": "e9f4186b-6f8f-479e-a603-d270397dd9ff", + "metadata": {}, + "source": [ + "### Log Models" + ] + }, + { + "cell_type": "markdown", + "id": "310fed55-3f62-4af8-800f-4fb2dccfe2fd", + "metadata": { + "tags": [] + }, + "source": [ + "We’ll generate some dummy classification models and log them to the project." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fafcec2f-75d1-4af0-bbe0-b796367c48be", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import make_classification\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LinearRegression\n", + "import pickle\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6cabd9aa-87f2-4af7-a5c6-ea0417ceb33f", + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare a model and generate training set\n", + "\n", + "X,y = make_classification(n_samples=200,n_features=5,random_state=42)\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,test_size=0.2,random_state=42)\n", + "model = LinearRegression()\n", + "model.fit(X_train,y_train)\n", + "X_test = pd.DataFrame(X_test,columns=[f\"column_{i}\" for i in range(5)])\n", + "y_test = pd.DataFrame(y_test,columns=[\"label\"])\n", + "training_set = pd.concat([X_test,y_test],axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3afde46a-9f26-4438-bedb-acad15866b03", + "metadata": {}, + "outputs": [], + "source": [ + "# Log your models\n", + "for i in range(5):\n", + " project.log_model(key=f\"model_{i}\",body=pickle.dumps(model),model_file=f'model.pkl',training_set=training_set,label_column=\"label\")" + ] + }, + { + "cell_type": "markdown", + "id": "49d820b1-9fd7-4184-9005-25d69578c995", + "metadata": {}, + "source": [ + "### Deploy Serving Function" + ] + }, + { + "cell_type": "markdown", + "id": "19fd7570-3f91-45ff-ba2b-4aebce4a95b4", + "metadata": {}, + "source": [ + "We’ll use a basic serving function and enrich it with the logged models.\n", + "\n", + "\n", + "Note that if you want to monitor a serving function along with its associated models, you must enable tracking by calling `set_tracking()`. Otherwise, the serving function’s requests won’t be monitored." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cb806c5b-a0a0-4deb-a63d-f2ea72dc3e02", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the serving\n", + "serving = mlrun.new_function('serving-model-v1',kind='serving')\n", + "graph = serving.set_topology(\"router\", engine=\"sync\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "93ee54ec-0c4a-4eb1-8bc3-d065aec64c8f", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply monitoring\n", + "serving.set_tracking()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f162a254-00ce-4c8a-89df-0cf5d25da5b1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 5/5 [00:00<00:00, 22052.07it/s]\n" + ] + } + ], + "source": [ + "# Add models to your serving\n", + "models_uri = [model.uri for model in project.list_models(tag=\"latest\")]\n", + "i=0\n", + "from tqdm import tqdm\n", + "for uri in tqdm(models_uri):\n", + " serving.add_model(key=f'model_{i}',model_path=uri,class_name='mlrun.frameworks.sklearn.SKLearnModelServer')\n", + " i+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ff91f360-5c85-4bc7-a3c3-80a31f1ebd3c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-05 15:55:08,989 [info] Starting remote function deploy\n", + "2025-11-05 15:55:09 (info) Deploying function\n", + "2025-11-05 15:55:09 (info) Building\n", + "2025-11-05 15:55:09 (info) Staging files and preparing base images\n", + "2025-11-05 15:55:09 (warn) Using user provided base image, runtime interpreter version is provided by the base image\n", + "2025-11-05 15:55:09 (info) Building processor image\n", + "2025-11-05 15:56:54 (info) Build complete\n", + "2025-11-05 15:57:06 (info) Function deploy complete\n", + "> 2025-11-05 15:57:10,181 [info] Model endpoint creation task completed with state succeeded\n", + "> 2025-11-05 15:57:10,181 [info] Successfully deployed function: {\"external_invocation_urls\":[\"count-events-demo-serving-model-v1.default-tenant.app.vmdev211.lab.iguazeng.com/\"],\"internal_invocation_urls\":[\"nuclio-count-events-demo-serving-model-v1.default-tenant.svc.cluster.local:8080\"]}\n" + ] + } + ], + "source": [ + "# Deploy serving\n", + "serving_function = project.deploy_function(serving)" + ] + }, + { + "cell_type": "markdown", + "id": "1652a010-e086-4c62-9493-1a82bc125ad4", + "metadata": {}, + "source": [ + "### Invoke Serving" + ] + }, + { + "cell_type": "markdown", + "id": "4c937193-27bc-4b6f-bc1d-cf7472045778", + "metadata": {}, + "source": [ + "Let’s generate some dummy data and invoke our serving function." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "66f469db-9f5b-4e3d-bc85-160a9c90bc8f", + "metadata": {}, "outputs": [], "source": [ - "" + "serving = project.get_function(\"serving-model-v1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "50305c3e-bd1b-4240-9c63-9851173af75e", + "metadata": {}, + "outputs": [], + "source": [ + "inputs = [[-0.51,0.051,0.6287761723991921,-0.8751269647375463,-1.0660002219502747], [-0.51,0.051,0.6287761723991921,-0.8751269647375463,-1.0660002219502747], [-0.51,0.051,0.6287761723991921,-0.8751269647375463,-1.0660002219502747], [-0.51,0.051,0.6287761723991921,-0.8751269647375463,-1.0660002219502747], [-0.51,0.051,0.6287761723991921,-0.8751269647375463,-1.0660002219502747]]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "9e8372d6-4fa7-4b45-8932-1f690b55048c", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "for i in range(5):\n", + " for j in range(100):\n", + " serving.invoke(f\"/v2/models/model_{i}/infer\", {\"inputs\": inputs})" + ] + }, + { + "cell_type": "markdown", + "id": "4eeb44e1-9c1a-430a-b978-f58f1adeaa12", + "metadata": {}, + "source": [ + "# Evaluate App" + ] + }, + { + "cell_type": "markdown", + "id": "936afba8-c06b-4141-a85e-5cbc9d32aa45", + "metadata": {}, + "source": [ + "Before deploying the Count Events application, let’s first test it to make sure it works as expected. We’ll import it as a module, which downloads the module file to your local filesystem, and then run it as a job using the `evaluate` mechanism." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "213425d1-8470-483e-b325-14aaa991c8c5", + "metadata": {}, + "outputs": [], + "source": [ + "# Import count events from the hub\n", + "count_events_app = mlrun.import_module(\"hub://count_events\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d91450e4-effb-4963-b913-dcd9829e78b9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-05 15:57:37,746 [info] Changing function name - adding `\"-batch\"` suffix: {\"func_name\":\"countapp-batch\"}\n", + "> 2025-11-05 15:57:37,927 [info] Storing function: {\"db\":\"http://mlrun-api:8080\",\"name\":\"countapp-batch--handler\",\"uid\":\"b7c240fd99ed4c9b940db6a587a53b80\"}\n", + "> 2025-11-05 15:57:38,202 [info] Job is running in the background, pod: countapp-batch--handler-469fm\n", + "> 2025-11-05 15:57:42,390 [info] Counted events for model endpoint window: {\"count\":4,\"end\":\"NaT\",\"model_endpoint_name\":\"model_0\",\"start\":\"NaT\"}\n", + "> 2025-11-05 15:57:42,498 [info] To track results use the CLI: {\"info_cmd\":\"mlrun get run b7c240fd99ed4c9b940db6a587a53b80 -p count-events-demo\",\"logs_cmd\":\"mlrun logs b7c240fd99ed4c9b940db6a587a53b80 -p count-events-demo\"}\n", + "> 2025-11-05 15:57:42,498 [info] Or click for UI: {\"ui_url\":\"https://dashboard.default-tenant.app.vmdev211.lab.iguazeng.com/mlprojects/count-events-demo/jobs/monitor-jobs/countapp-batch--handler/b7c240fd99ed4c9b940db6a587a53b80/overview\"}\n", + "> 2025-11-05 15:57:42,499 [info] Run execution finished: {\"name\":\"countapp-batch--handler\",\"status\":\"completed\"}\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartendstatekindnamelabelsinputsparametersresults
count-events-demo0Nov 05 15:57:412025-11-05 15:57:42.474376+00:00completedruncountapp-batch--handler
v3io_user=iguazio
kind=job
owner=iguazio
mlrun/client_version=0.0.0+unstable
mlrun/client_python_version=3.11.12
host=countapp-batch--handler-469fm
sample_data
endpoints=['model_0']
write_output=False
existing_data_handling=fail_on_overlap
stream_profile=None
model_0-d25a6714a19b4027b9bccfe8adca8ddc_NaT_NaT={'metric_name': 'count', 'metric_value': 4.0}
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-05 15:57:46,373 [info] Run execution finished: {\"name\":\"countapp-batch--handler\",\"status\":\"completed\"}\n" + ] + } + ], + "source": [ + "# Run the app as a job\n", + "res = count_events_app.CountApp.evaluate(func_path=\"count_events.py\",\n", + " run_local=False,\n", + " sample_data=pd.DataFrame({\"col\": [1, 2, 3, 4]}),\n", + " image=image,\n", + " endpoints=[\"model_0\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "504adb0b-6ccf-421c-98fc-25ed1a8691e8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'model_0-d25a6714a19b4027b9bccfe8adca8ddc_NaT_NaT': {'metric_name': 'count',\n", + " 'metric_value': 4.0}}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res.outputs" + ] + }, + { + "cell_type": "markdown", + "id": "3a05a1c9-b62d-470a-9e18-4c3f5ca61b91", + "metadata": {}, + "source": [ + "Now that the application is available on your filesystem, you can register and deploy it just like any other custom application." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "28bc9645-69b2-418d-a5c5-7ba94f64745f", + "metadata": {}, + "outputs": [], + "source": [ + "fn = project.set_model_monitoring_function(\n", + " func=\"count_events.py\",\n", + " application_class=\"CountApp\",\n", + " name=\"CountEventsFromFile\",\n", + " image=image,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "f318f85f-76d8-4494-8029-870edf54df6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-05 16:09:48,293 [info] Starting remote function deploy\n", + "2025-11-05 16:09:48 (info) Deploying function\n", + "2025-11-05 16:09:48 (info) Building\n", + "2025-11-05 16:09:48 (info) Staging files and preparing base images\n", + "2025-11-05 16:09:48 (warn) Using user provided base image, runtime interpreter version is provided by the base image\n", + "2025-11-05 16:09:48 (info) Building processor image\n", + "2025-11-05 16:11:33 (info) Build complete\n", + "2025-11-05 16:11:41 (info) Function deploy complete\n", + "> 2025-11-05 16:11:49,604 [info] Model endpoint creation task completed with state succeeded\n", + "> 2025-11-05 16:11:49,605 [info] Successfully deployed function: {\"external_invocation_urls\":[],\"internal_invocation_urls\":[\"nuclio-count-events-demo-counteventsfromfile.default-tenant.svc.cluster.local:8080\"]}\n" + ] + }, + { + "data": { + "text/plain": [ + "DeployStatus(state=ready, outputs={'endpoint': 'http://nuclio-count-events-demo-counteventsfromfile.default-tenant.svc.cluster.local:8080', 'name': 'count-events-demo-counteventsfromfile'})" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project.deploy_function(fn)" + ] + }, + { + "cell_type": "markdown", + "id": "d2b527ee-19e6-4f89-9e51-702fa1707986", + "metadata": {}, + "source": [ + "## Set Application from Hub" + ] + }, + { + "cell_type": "markdown", + "id": "b8fa2433-535c-498b-a7ee-3d82d474d447", + "metadata": {}, + "source": [ + "As mentioned, you can set the application directly from the hub by providing a valid hub path (`hub://`)." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "61c50ac6-8dac-41a2-bb9c-705ab543e234", + "metadata": {}, + "outputs": [], + "source": [ + "fn = project.set_model_monitoring_function(\n", + " func=\"hub://count_events\",\n", + " application_class=\"CountApp\",\n", + " name=\"CountEvents\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "df313a94-d742-4ff6-8a28-8390322b8074", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-05 15:57:58,659 [info] Starting remote function deploy\n", + "2025-11-05 15:57:59 (info) Deploying function\n", + "2025-11-05 15:57:59 (info) Building\n", + "2025-11-05 15:57:59 (info) Staging files and preparing base images\n", + "2025-11-05 15:57:59 (warn) Using user provided base image, runtime interpreter version is provided by the base image\n", + "2025-11-05 15:57:59 (info) Building processor image\n", + "2025-11-05 15:59:34 (info) Build complete\n", + "2025-11-05 15:59:42 (info) Function deploy complete\n", + "> 2025-11-05 15:59:49,826 [info] Model endpoint creation task completed with state succeeded\n", + "> 2025-11-05 15:59:49,827 [info] Successfully deployed function: {\"external_invocation_urls\":[],\"internal_invocation_urls\":[\"nuclio-count-events-demo-countevents.default-tenant.svc.cluster.local:8080\"]}\n" + ] + }, + { + "data": { + "text/plain": [ + "DeployStatus(state=ready, outputs={'endpoint': 'http://nuclio-count-events-demo-countevents.default-tenant.svc.cluster.local:8080', 'name': 'count-events-demo-countevents'})" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project.deploy_function(fn)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "mlrun-base-py311", "language": "python", - "name": "python3" + "name": "conda-env-mlrun-base-py311-py" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.11.12" } }, "nbformat": 4, diff --git a/modules/src/count_events/count_events.py b/modules/src/count_events/count_events.py index c2d6444e4..1c6d97621 100644 --- a/modules/src/count_events/count_events.py +++ b/modules/src/count_events/count_events.py @@ -20,9 +20,20 @@ class CountApp(ModelMonitoringApplicationBase): + """ + Model Monitoring Application that counts the number of events in the given time window. + """ def do_tracking( - self, monitoring_context: mm_context.MonitoringApplicationContext + self, + monitoring_context: mm_context.MonitoringApplicationContext ) -> ModelMonitoringApplicationMetric: + """" + he do_tracking method implementation for the CountApp class. + It counts the number of events in the sample data-frame and logs the count. + + :param monitoring_context: The monitoring application context. It includes the current window data as a + pandas data-frame: monitoring_context.sample_df. + """ sample_df = monitoring_context.sample_df monitoring_context.logger.debug("Sample data-frame", sample_df=sample_df) count = len(sample_df) diff --git a/modules/src/count_events/item.yaml b/modules/src/count_events/item.yaml index e5d796b62..049651ddb 100644 --- a/modules/src/count_events/item.yaml +++ b/modules/src/count_events/item.yaml @@ -7,11 +7,11 @@ generationDate: 2025-09-16:12-25 hidden: false labels: author: Iguazio -mlrunVersion: 1.10.0-rc27 +mlrunVersion: 1.10.0-rc41 name: count_events spec: filename: count_events.py image: mlrun/mlrun kind: monitoring_application requirements: -version: 1.0.0 +version: 1.0.0 \ No newline at end of file diff --git a/modules/src/count_events/requirements.txt b/modules/src/count_events/requirements.txt index 89741402a..0c107c276 100644 --- a/modules/src/count_events/requirements.txt +++ b/modules/src/count_events/requirements.txt @@ -1,3 +1,3 @@ -mlrun==1.10.0-rc27 +mlrun==1.10.0-rc41 pandas==2.1.4 -pytest~=8.2 +pytest~=8.2 \ No newline at end of file From 333d4e70c285aed40e393d7e86274cd87d68ce56 Mon Sep 17 00:00:00 2001 From: Eyal Danieli Date: Thu, 6 Nov 2025 11:50:58 +0200 Subject: [PATCH 04/17] avoid noise reduction unit test (#909) --- functions/src/noise_reduction/item.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/functions/src/noise_reduction/item.yaml b/functions/src/noise_reduction/item.yaml index d8f2cddd4..c37b4ab39 100644 --- a/functions/src/noise_reduction/item.yaml +++ b/functions/src/noise_reduction/item.yaml @@ -26,4 +26,5 @@ spec: torchaudio>=2.1.2, ] url: '' -version: 1.1.0 \ No newline at end of file +version: 1.1.0 +test_valid: False \ No newline at end of file From 77e28ba35d535ca552908433350a9522f4094c4a Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Sun, 9 Nov 2025 11:11:36 +0200 Subject: [PATCH 05/17] Add histogram-data-drift monitoring application module (without example) (#911) * histogram data drift module with empty example notebook * post review fixes --- .../assets/feature_stats.csv | 23 ++ .../assets/sample_df_stats.csv | 23 ++ .../histogram_data_drift.ipynb | 31 ++ .../histogram_data_drift.py | 388 ++++++++++++++++++ modules/src/histogram_data_drift/item.yaml | 20 + .../src/histogram_data_drift/requirements.txt | 3 + .../test_histogram_data_drift.py | 279 +++++++++++++ 7 files changed, 767 insertions(+) create mode 100644 modules/src/histogram_data_drift/assets/feature_stats.csv create mode 100644 modules/src/histogram_data_drift/assets/sample_df_stats.csv create mode 100644 modules/src/histogram_data_drift/histogram_data_drift.ipynb create mode 100644 modules/src/histogram_data_drift/histogram_data_drift.py create mode 100644 modules/src/histogram_data_drift/item.yaml create mode 100644 modules/src/histogram_data_drift/requirements.txt create mode 100644 modules/src/histogram_data_drift/test_histogram_data_drift.py diff --git a/modules/src/histogram_data_drift/assets/feature_stats.csv b/modules/src/histogram_data_drift/assets/feature_stats.csv new file mode 100644 index 000000000..de76ff176 --- /dev/null +++ b/modules/src/histogram_data_drift/assets/feature_stats.csv @@ -0,0 +1,23 @@ +,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm +0,0.0,0.0,0.0,0.0 +1,0.02666666666666667,0.006666666666666667,0.02666666666666667,0.22666666666666666 +2,0.03333333333333333,0.02,0.22,0.04666666666666667 +3,0.04666666666666667,0.02666666666666667,0.07333333333333333,0.04666666666666667 +4,0.10666666666666667,0.02,0.013333333333333334,0.006666666666666667 +5,0.06,0.05333333333333334,0.0,0.006666666666666667 +6,0.03333333333333333,0.09333333333333334,0.0,0.0 +7,0.08666666666666667,0.09333333333333334,0.006666666666666667,0.0 +8,0.09333333333333334,0.06666666666666667,0.013333333333333334,0.04666666666666667 +9,0.06666666666666667,0.17333333333333334,0.02,0.02 +10,0.04,0.07333333333333333,0.03333333333333333,0.03333333333333333 +11,0.06666666666666667,0.12666666666666668,0.08,0.14 +12,0.10666666666666667,0.08,0.09333333333333334,0.08 +13,0.04666666666666667,0.04,0.08,0.02666666666666667 +14,0.07333333333333333,0.02666666666666667,0.11333333333333333,0.013333333333333334 +15,0.02666666666666667,0.06,0.04,0.08 +16,0.013333333333333334,0.013333333333333334,0.08,0.07333333333333333 +17,0.02666666666666667,0.006666666666666667,0.04666666666666667,0.04 +18,0.006666666666666667,0.006666666666666667,0.02666666666666667,0.02 +19,0.03333333333333333,0.006666666666666667,0.013333333333333334,0.05333333333333334 +20,0.006666666666666667,0.006666666666666667,0.02,0.04 +21,0.0,0.0,0.0,0.0 diff --git a/modules/src/histogram_data_drift/assets/sample_df_stats.csv b/modules/src/histogram_data_drift/assets/sample_df_stats.csv new file mode 100644 index 000000000..dc02ef3ba --- /dev/null +++ b/modules/src/histogram_data_drift/assets/sample_df_stats.csv @@ -0,0 +1,23 @@ +,p0,petal_length_cm,petal_width_cm,sepal_length_cm,sepal_width_cm +0,0.0,1.0,1.0,1.0,1.0 +1,0.0,0.0,0.0,0.0,0.0 +2,0.0,0.0,0.0,0.0,0.0 +3,0.0,0.0,0.0,0.0,0.0 +4,0.0,0.0,0.0,0.0,0.0 +5,0.0,0.0,0.0,0.0,0.0 +6,0.0,0.0,0.0,0.0,0.0 +7,0.0,0.0,0.0,0.0,0.0 +8,0.0,0.0,0.0,0.0,0.0 +9,0.0,0.0,0.0,0.0,0.0 +10,0.0,0.0,0.0,0.0,0.0 +11,1.0,0.0,0.0,0.0,0.0 +12,0.0,0.0,0.0,0.0,0.0 +13,0.0,0.0,0.0,0.0,0.0 +14,0.0,0.0,0.0,0.0,0.0 +15,0.0,0.0,0.0,0.0,0.0 +16,0.0,0.0,0.0,0.0,0.0 +17,0.0,0.0,0.0,0.0,0.0 +18,0.0,0.0,0.0,0.0,0.0 +19,0.0,0.0,0.0,0.0,0.0 +20,0.0,0.0,0.0,0.0,0.0 +21,0.0,0.0,0.0,0.0,0.0 diff --git a/modules/src/histogram_data_drift/histogram_data_drift.ipynb b/modules/src/histogram_data_drift/histogram_data_drift.ipynb new file mode 100644 index 000000000..54a15016a --- /dev/null +++ b/modules/src/histogram_data_drift/histogram_data_drift.ipynb @@ -0,0 +1,31 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": "# Histogram Data Drift Demo", + "id": "2517d91b275da01d" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/src/histogram_data_drift/histogram_data_drift.py b/modules/src/histogram_data_drift/histogram_data_drift.py new file mode 100644 index 000000000..b8cdcf299 --- /dev/null +++ b/modules/src/histogram_data_drift/histogram_data_drift.py @@ -0,0 +1,388 @@ +# Copyright 2024 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Final, Optional, Protocol, Union, cast + +import numpy as np +from pandas import DataFrame, Series + +import mlrun.artifacts +import mlrun.common.model_monitoring.helpers +import mlrun.model_monitoring.applications.context as mm_context +import mlrun.model_monitoring.applications.results as mm_results +import mlrun.model_monitoring.features_drift_table as mm_drift_table +from mlrun.common.schemas.model_monitoring.constants import ( + ResultKindApp, + ResultStatusApp, + StatsKind, +) +from mlrun.model_monitoring.applications import ( + ModelMonitoringApplicationBase, +) +from mlrun.model_monitoring.metrics.histogram_distance import ( + HellingerDistance, + HistogramDistanceMetric, + KullbackLeiblerDivergence, + TotalVarianceDistance, +) + + +class InvalidMetricValueError(ValueError): + pass + + +class InvalidThresholdValueError(ValueError): + pass + + +class ValueClassifier(Protocol): + def value_to_status(self, value: float) -> ResultStatusApp: ... + + +class HistogramDataDriftApplicationConstants: + NAME = "histogram-data-drift" + GENERAL_RESULT_NAME = "general_drift" + + +@dataclass +class DataDriftClassifier: + """ + Classify data drift numeric values into categorical status. + """ + + potential: float = 0.5 + detected: float = 0.7 + + def __post_init__(self) -> None: + """Catch erroneous threshold values""" + if not 0 < self.potential < self.detected < 1: + raise InvalidThresholdValueError( + "The provided thresholds do not comply with the rules" + ) + + def value_to_status(self, value: float) -> ResultStatusApp: + """ + Translate the numeric value into status category. + + :param value: The numeric value of the data drift metric, between 0 and 1. + :returns: `ResultStatusApp` according to the classification. + """ + if value > 1 or value < 0: + raise InvalidMetricValueError( + f"{value = } is invalid, must be in the range [0, 1]." + ) + if value >= self.detected: + return ResultStatusApp.detected + if value >= self.potential: + return ResultStatusApp.potential_detection + return ResultStatusApp.no_detection + + +class HistogramDataDriftApplication(ModelMonitoringApplicationBase): + """ + MLRun's default data drift application for model monitoring. + + The application expects tabular numerical data, and calculates three metrics over the shared features' histograms. + The metrics are calculated on features that have reference data from the training dataset. When there is no + reference data (`feature_stats`), this application send a warning log and does nothing. + The three metrics are: + + * Hellinger distance. + * Total variance distance. + * Kullback-Leibler divergence. + + Each metric is calculated over all the features individually and the mean is taken as the metric value. + The average of Hellinger and total variance distance is taken as the result. + + The application can log two artifacts (disabled by default due to performance issues): + + * JSON with the general drift value per feature. + * Plotly table with the various metrics and histograms per feature. + + If you want to change the application defaults, such as the classifier or which artifacts to produce, you + can either modify the downloaded source code file directly, or inherit from this class (in the same file), then + deploy it as any other model monitoring application. + Please make sure to keep the default application name. This ensures that the full functionality of the application, + including the statistics view in the UI, is available. + """ + + NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME + + _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance} + _STATS_TYPES: tuple[StatsKind, StatsKind] = ( + StatsKind.CURRENT_STATS, + StatsKind.DRIFT_MEASURES, + ) + + metrics: list[type[HistogramDistanceMetric]] = [ + HellingerDistance, + KullbackLeiblerDivergence, + TotalVarianceDistance, + ] + + def __init__( + self, + value_classifier: Optional[ValueClassifier] = None, + produce_json_artifact: bool = False, + produce_plotly_artifact: bool = False, + ) -> None: + """ + :param value_classifier: Classifier object that adheres to the :py:class:`~ValueClassifier` protocol. + If not provided, the default :py:class:`~DataDriftClassifier` is used. + :param produce_json_artifact: Whether to produce the JSON artifact or not, ``False`` by default. + :param produce_plotly_artifact: Whether to produce the Plotly artifact or not, ``False`` by default. + """ + self._value_classifier = value_classifier or DataDriftClassifier() + assert self._REQUIRED_METRICS <= set( + self.metrics + ), "TVD and Hellinger distance are required for the general data drift result" + + self._produce_json_artifact = produce_json_artifact + self._produce_plotly_artifact = produce_plotly_artifact + + def _compute_metrics_per_feature( + self, monitoring_context: mm_context.MonitoringApplicationContext + ) -> DataFrame: + """Compute the metrics for the different features and labels""" + metrics_per_feature = DataFrame( + columns=[metric_class.NAME for metric_class in self.metrics] + ) + feature_stats = monitoring_context.dict_to_histogram( + monitoring_context.feature_stats + ) + sample_df_stats = monitoring_context.dict_to_histogram( + monitoring_context.sample_df_stats + ) + for feature_name in feature_stats: + sample_hist = np.asarray(sample_df_stats[feature_name]) + reference_hist = np.asarray(feature_stats[feature_name]) + monitoring_context.logger.info( + "Computing metrics for feature", feature_name=feature_name + ) + metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType] + metric.NAME: metric( + distrib_t=sample_hist, distrib_u=reference_hist + ).compute() + for metric in self.metrics + } + monitoring_context.logger.info("Finished computing the metrics") + + return metrics_per_feature + + def _get_general_drift_result( + self, metrics: list[mm_results.ModelMonitoringApplicationMetric] + ) -> mm_results.ModelMonitoringApplicationResult: + """Get the general drift result from the metrics list""" + value = cast( + float, + np.mean( + [ + metric.value + for metric in metrics + if metric.name + in [ + f"{HellingerDistance.NAME}_mean", + f"{TotalVarianceDistance.NAME}_mean", + ] + ] + ), + ) + + status = self._value_classifier.value_to_status(value) + + return mm_results.ModelMonitoringApplicationResult( + name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME, + value=value, + kind=ResultKindApp.data_drift, + status=status, + ) + + @staticmethod + def _get_metrics( + metrics_per_feature: DataFrame, + ) -> list[mm_results.ModelMonitoringApplicationMetric]: + """Average the metrics over the features and add the status""" + metrics: list[mm_results.ModelMonitoringApplicationMetric] = [] + + metrics_mean = metrics_per_feature.mean().to_dict() + + for name, value in metrics_mean.items(): + metrics.append( + mm_results.ModelMonitoringApplicationMetric( + name=f"{name}_mean", + value=value, + ) + ) + + return metrics + + @staticmethod + def _get_stats( + metrics: list[mm_results.ModelMonitoringApplicationMetric], + metrics_per_feature: DataFrame, + monitoring_context: mm_context.MonitoringApplicationContext, + ) -> list[mm_results._ModelMonitoringApplicationStats]: + """ + Return a list of the statistics. + + :param metrics: the calculated metrics + :param metrics_per_feature: metric calculated per feature + :param monitoring_context: context object for current monitoring application + :returns: list of mm_results._ModelMonitoringApplicationStats for histogram data drift application + """ + stats = [] + for stats_type in HistogramDataDriftApplication._STATS_TYPES: + stats.append( + mm_results._ModelMonitoringApplicationStats( + name=stats_type, + stats=metrics_per_feature.T.to_dict() + | {metric.name: metric.value for metric in metrics} + if stats_type == StatsKind.DRIFT_MEASURES + else monitoring_context.sample_df_stats, + timestamp=monitoring_context.end_infer_time.isoformat( + sep=" ", timespec="microseconds" + ), + ) + ) + return stats + + @staticmethod + def _get_shared_features_sample_stats( + monitoring_context: mm_context.MonitoringApplicationContext, + ) -> mlrun.common.model_monitoring.helpers.FeatureStats: + """ + Filter out features without reference data in `feature_stats`, e.g. `timestamp`. + """ + return mlrun.common.model_monitoring.helpers.FeatureStats( + { + key: monitoring_context.sample_df_stats[key] + for key in monitoring_context.feature_stats + } + ) + + @staticmethod + def _log_json_artifact( + drift_per_feature_values: Series, + monitoring_context: mm_context.MonitoringApplicationContext, + ) -> None: + """Log the drift values as a JSON artifact""" + monitoring_context.logger.debug("Logging drift value per feature JSON artifact") + monitoring_context.log_artifact( + mlrun.artifacts.Artifact( + body=drift_per_feature_values.to_json(), + format="json", + key="features_drift_results", + ) + ) + monitoring_context.logger.debug("Logged JSON artifact successfully") + + def _log_plotly_table_artifact( + self, + sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats, + inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats, + metrics_per_feature: DataFrame, + drift_per_feature_values: Series, + monitoring_context: mm_context.MonitoringApplicationContext, + ) -> None: + """Log the Plotly drift table artifact""" + monitoring_context.logger.debug( + "Feature stats", + sample_set_statistics=sample_set_statistics, + inputs_statistics=inputs_statistics, + ) + + monitoring_context.logger.debug("Computing drift results per feature") + drift_results = { + cast(str, key): (self._value_classifier.value_to_status(value), value) + for key, value in drift_per_feature_values.items() + } + monitoring_context.logger.debug("Producing plotly artifact") + artifact = mm_drift_table.FeaturesDriftTablePlot().produce( + sample_set_statistics=sample_set_statistics, + inputs_statistics=inputs_statistics, + metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType] + drift_results=drift_results, + ) + monitoring_context.logger.debug("Logging plotly artifact") + monitoring_context.log_artifact(artifact) + monitoring_context.logger.debug("Logged plotly artifact successfully") + + def _log_drift_artifacts( + self, + monitoring_context: mm_context.MonitoringApplicationContext, + metrics_per_feature: DataFrame, + ) -> None: + """Log JSON and Plotly drift data per feature artifacts""" + if not self._produce_json_artifact and not self._produce_plotly_artifact: + return + + drift_per_feature_values = metrics_per_feature[ + [HellingerDistance.NAME, TotalVarianceDistance.NAME] + ].mean(axis=1) + + if self._produce_json_artifact: + self._log_json_artifact(drift_per_feature_values, monitoring_context) + + if self._produce_plotly_artifact: + self._log_plotly_table_artifact( + sample_set_statistics=self._get_shared_features_sample_stats( + monitoring_context + ), + inputs_statistics=monitoring_context.feature_stats, + metrics_per_feature=metrics_per_feature, + drift_per_feature_values=drift_per_feature_values, + monitoring_context=monitoring_context, + ) + + def do_tracking( + self, monitoring_context: mm_context.MonitoringApplicationContext + ) -> list[ + Union[ + mm_results.ModelMonitoringApplicationResult, + mm_results.ModelMonitoringApplicationMetric, + mm_results._ModelMonitoringApplicationStats, + ] + ]: + """ + Calculate and return the data drift metrics, averaged over the features. + """ + monitoring_context.logger.debug("Starting to run the application") + if not monitoring_context.feature_stats: + monitoring_context.logger.warning( + "No feature statistics found, skipping the application. \n" + "In order to run the application, training set must be provided when logging the model." + ) + return [] + metrics_per_feature = self._compute_metrics_per_feature( + monitoring_context=monitoring_context + ) + monitoring_context.logger.debug("Saving artifacts") + self._log_drift_artifacts( + monitoring_context=monitoring_context, + metrics_per_feature=metrics_per_feature, + ) + monitoring_context.logger.debug("Computing average per metric") + metrics = self._get_metrics(metrics_per_feature) + result = self._get_general_drift_result(metrics=metrics) + stats = self._get_stats( + metrics=metrics, + monitoring_context=monitoring_context, + metrics_per_feature=metrics_per_feature, + ) + metrics_result_and_stats = metrics + [result] + stats + monitoring_context.logger.debug( + "Finished running the application", results=metrics_result_and_stats + ) + return metrics_result_and_stats diff --git a/modules/src/histogram_data_drift/item.yaml b/modules/src/histogram_data_drift/item.yaml new file mode 100644 index 000000000..e439e1699 --- /dev/null +++ b/modules/src/histogram_data_drift/item.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +categories: +- model-serving +- structured-ML +description: Model-monitoring application for detecting and visualizing data drift +example: histogram_data_drift.ipynb +generationDate: 2025-11-06 +hidden: false +labels: + author: Iguazio +mlrunVersion: 1.10.0-rc41 +name: histogram_data_drift +spec: + filename: histogram_data_drift.py + image: mlrun/mlrun + kind: monitoring_application + requirements: + - plotly~=5.23 + - pandas +version: 1.0.0 \ No newline at end of file diff --git a/modules/src/histogram_data_drift/requirements.txt b/modules/src/histogram_data_drift/requirements.txt new file mode 100644 index 000000000..4c3614d2b --- /dev/null +++ b/modules/src/histogram_data_drift/requirements.txt @@ -0,0 +1,3 @@ +hypothesis[numpy]~=6.103 +plotly~=5.23 +pandas \ No newline at end of file diff --git a/modules/src/histogram_data_drift/test_histogram_data_drift.py b/modules/src/histogram_data_drift/test_histogram_data_drift.py new file mode 100644 index 000000000..018edaa86 --- /dev/null +++ b/modules/src/histogram_data_drift/test_histogram_data_drift.py @@ -0,0 +1,279 @@ +# Copyright 2024 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from pathlib import Path +from unittest.mock import Mock + +import pandas as pd +import pytest +from hypothesis import given +from hypothesis import strategies as st + +import mlrun.common.model_monitoring.helpers +import mlrun.model_monitoring.applications +import mlrun.model_monitoring.applications.context as mm_context +import mlrun.utils +from mlrun.common.schemas.model_monitoring.constants import ( + ResultKindApp, + ResultStatusApp, +) +from histogram_data_drift import ( + DataDriftClassifier, + HistogramDataDriftApplication, + InvalidMetricValueError, + InvalidThresholdValueError, +) + +assets_folder = Path(__file__).parent / "assets" + + +@pytest.fixture +def project(tmp_path: Path) -> mlrun.MlrunProject: + project = mlrun.get_or_create_project("temp", allow_cross_project=True) + project.artifact_path = str(tmp_path) + return project + + +@pytest.fixture +def application() -> HistogramDataDriftApplication: + app = HistogramDataDriftApplication( + produce_json_artifact=True, produce_plotly_artifact=True + ) + return app + + +@pytest.fixture +def logger() -> mlrun.utils.Logger: + return mlrun.utils.Logger(level=logging.DEBUG, name="test_histogram_data_drift_app") + + +class TestDataDriftClassifier: + @staticmethod + @pytest.mark.parametrize( + ("potential", "detected"), [(0.4, 0.2), (0.0, 0.5), (0.7, 1.0), (-1, 2)] + ) + def test_invalid_threshold(potential: float, detected: float) -> None: + with pytest.raises(InvalidThresholdValueError): + DataDriftClassifier(potential=potential, detected=detected) + + @staticmethod + @given( + st.one_of( + st.floats(max_value=0, exclude_max=True), + st.floats(min_value=1, exclude_min=True), + ) + ) + def test_invalid_metric(value: float) -> None: + with pytest.raises(InvalidMetricValueError): + DataDriftClassifier().value_to_status(value) + + @staticmethod + @pytest.fixture + def classifier() -> DataDriftClassifier: + return DataDriftClassifier(potential=0.5, detected=0.7) + + @staticmethod + @pytest.mark.parametrize( + ("value", "expected_status"), + [ + (0, ResultStatusApp.no_detection), + (0.2, ResultStatusApp.no_detection), + (0.5, ResultStatusApp.potential_detection), + (0.6, ResultStatusApp.potential_detection), + (0.71, ResultStatusApp.detected), + (1, ResultStatusApp.detected), + ], + ) + def test_status( + classifier: DataDriftClassifier, value: float, expected_status: ResultStatusApp + ) -> None: + assert ( + classifier.value_to_status(value) == expected_status + ), "The status is different than expected" + + +class TestApplication: + COUNT = 12 # the sample df size + + @classmethod + @pytest.fixture + def sample_df_stats(cls) -> mlrun.common.model_monitoring.helpers.FeatureStats: + return mlrun.common.model_monitoring.helpers.FeatureStats( + { + "timestamp": { + "count": cls.COUNT, + "25%": "2024-03-11 09:31:39.152301+00:00", + "50%": "2024-03-11 09:31:39.152301+00:00", + "75%": "2024-03-11 09:31:39.152301+00:00", + "max": "2024-03-11 09:31:39.152301+00:00", + "mean": "2024-03-11 09:31:39.152301+00:00", + "min": "2024-03-11 09:31:39.152301+00:00", + }, + "ticker": { + "count": cls.COUNT, + "unique": 1, + "top": "AAPL", + "freq": cls.COUNT, + }, + "f1": { + "count": cls.COUNT, + "hist": [[2, 3, 0, 3, 1, 3], [-10, -5, 0, 5, 10, 15, 20]], + }, + "f2": { + "count": cls.COUNT, + "hist": [[0, 6, 0, 2, 1, 3], [66, 67, 68, 69, 70, 71, 72]], + }, + "l": { + "count": cls.COUNT, + "hist": [ + [10, 0, 0, 0, 0, 2], + [0.0, 0.16, 0.33, 0.5, 0.67, 0.83, 1.0], + ], + }, + } + ) + + @staticmethod + @pytest.fixture + def feature_stats() -> mlrun.common.model_monitoring.helpers.FeatureStats: + return mlrun.common.model_monitoring.helpers.FeatureStats( + { + "f1": { + "count": 100, + "hist": [[0, 0, 0, 30, 70, 0], [-10, -5, 0, 5, 10, 15, 20]], + }, + "f2": { + "count": 100, + "hist": [[0, 45, 5, 15, 35, 0], [66, 67, 68, 69, 70, 71, 72]], + }, + "l": { + "count": 100, + "hist": [ + [30, 0, 0, 0, 0, 70], + [0.0, 0.16, 0.33, 0.5, 0.67, 0.83, 1.0], + ], + }, + } + ) + + @staticmethod + @pytest.fixture + def monitoring_context( + sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats, + feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats, + application: HistogramDataDriftApplication, + logger: mlrun.utils.Logger, + project: mlrun.MlrunProject, + ) -> mm_context.MonitoringApplicationContext: + monitoring_context = mm_context.MonitoringApplicationContext( + application_name=application.NAME, + event={}, + artifacts_logger=project, + logger=logger, + project=project, + nuclio_logger=logger, # the wrong type but works here + ) + monitoring_context._sample_df_stats = sample_df_stats + monitoring_context._feature_stats = feature_stats + + return monitoring_context + + @classmethod + def test( + cls, + application: HistogramDataDriftApplication, + monitoring_context: mm_context.MonitoringApplicationContext, + project: mlrun.MlrunProject, + ) -> None: + results = application.do_tracking(monitoring_context) + metrics = [] + assert len(results) == 6, "Expected four results & metrics % stats" + for res in results: + if isinstance( + res, + mlrun.model_monitoring.applications.ModelMonitoringApplicationResult, + ): + assert ( + res.kind == ResultKindApp.data_drift + ), "The kind should be data drift" + assert ( + res.name == "general_drift" + ), "The result name should be general_drift" + assert ( + res.status == ResultStatusApp.potential_detection + ), "Expected potential detection in the general drift" + elif isinstance( + res, + mlrun.model_monitoring.applications.ModelMonitoringApplicationMetric, + ): + metrics.append(res) + assert len(metrics) == 3, "Expected three metrics" + + # Check the artifacts + assert project._artifact_manager.artifact_uris.keys() == { + "features_drift_results", + "drift_table_plot", + }, "The artifacts in the artifact manager are different than expected" + assert {f.name for f in Path(project.artifact_path).glob("*")} == { + "drift_table_plot.html", + "features_drift_results.json", + }, "The artifact files were not found or are different than expected" + + +class TestMetricsPerFeature: + @staticmethod + @pytest.fixture + def monitoring_context( + logger: mlrun.utils.Logger, + ) -> mm_context.MonitoringApplicationContext: + ctx = Mock() + + def dict_to_histogram(df: pd.DataFrame) -> pd.DataFrame: + return df + + ctx.dict_to_histogram = dict_to_histogram + ctx.logger = logger + return ctx + + @staticmethod + @pytest.mark.parametrize( + ("sample_df_stats", "feature_stats"), + [ + pytest.param(pd.DataFrame(), pd.DataFrame(), id="empty-dfs"), + pytest.param( + pd.read_csv(assets_folder / "sample_df_stats.csv", index_col=0), + pd.read_csv(assets_folder / "feature_stats.csv", index_col=0), + id="real-world-csv-dfs", + ), + ], + ) + def test_compute_metrics_per_feature( + application: HistogramDataDriftApplication, + monitoring_context: Mock, + sample_df_stats: pd.DataFrame, + feature_stats: pd.DataFrame, + ) -> None: + monitoring_context.sample_df_stats = sample_df_stats + monitoring_context.feature_stats = feature_stats + + metrics_per_feature = application._compute_metrics_per_feature( + monitoring_context=monitoring_context + ) + assert set(metrics_per_feature.columns) == { + metric.NAME for metric in application.metrics + }, "Different metrics than expected" + assert set(metrics_per_feature.index) == set( + feature_stats.columns + ), "The features are different than expected" From 608112c442c8bcaf87626251ac454d17f1ea986f Mon Sep 17 00:00:00 2001 From: iguazio-cicd Date: Sun, 9 Nov 2025 09:12:43 +0000 Subject: [PATCH 06/17] chore(readme): auto-update asset tables [skip ci] --- modules/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/README.md b/modules/README.md index a14dbf5bb..05e7cfefd 100644 --- a/modules/README.md +++ b/modules/README.md @@ -7,4 +7,5 @@ | Name | Description | Kind | Categories | | --- | --- | --- | --- | | [count_events](/home/runner/work/functions/functions/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | +| [histogram_data_drift](/home/runner/work/functions/functions/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | From c56ef485ff021f658c7b6b2384bd6d5ff9f2246e Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Sun, 9 Nov 2025 19:54:20 +0200 Subject: [PATCH 07/17] Fill histogram-data-drift example notebook (#912) * fill data-drift nb * post review fixes --- .../histogram_data_drift.ipynb | 292 +++++++++++++++++- 1 file changed, 285 insertions(+), 7 deletions(-) diff --git a/modules/src/histogram_data_drift/histogram_data_drift.ipynb b/modules/src/histogram_data_drift/histogram_data_drift.ipynb index 54a15016a..eceb28ca3 100644 --- a/modules/src/histogram_data_drift/histogram_data_drift.ipynb +++ b/modules/src/histogram_data_drift/histogram_data_drift.ipynb @@ -1,29 +1,307 @@ { "cells": [ { + "cell_type": "markdown", + "id": "283b6000-4acd-4eb3-bf51-25ee79e9e5dc", + "metadata": {}, + "source": [ + "# Histogram Data Drift Demo\n", + "The Histogram Data Drift monitoring app is MLRun’s default data drift application for model monitoring. It’s considered a built-in app within the model monitoring flow and is deployed by default when model monitoring is enabled for a project. For more information, see the [MLRun documentation](https://docs.mlrun.org/en/latest/model-monitoring/index.html#model-monitoring-applications).\n", + "\n", + "This notebook walks through a simple example of using this app from the hub to monitor data drift between a baseline dataset and a new dataset, using the `evaluate()` method." + ] + }, + { + "cell_type": "markdown", + "id": "da432405-e8bb-400c-b1e0-45e31b0571f1", + "metadata": {}, + "source": [ + "## Set up a project and prepare the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "62fcc7a4-4df5-4f2e-bd97-6aa831bbf958", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun\n", + "project = mlrun.get_or_create_project(\"histogram-data-drift-demo\",'./histogram-data-drift-demo')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d7ec1628-0303-4bbb-ba34-5cd96eaef304", + "metadata": {}, + "outputs": [], + "source": [ + "sample_data = mlrun.get_sample_path(\"data/batch-predict/training_set.parquet\")\n", + "reference_data = mlrun.get_sample_path(\"data/batch-predict/prediction_set.parquet\")" + ] + }, + { + "cell_type": "markdown", + "id": "072f1411-33a2-444e-88bf-76d9394d7877", + "metadata": {}, + "source": [ + "## Get the module from the hub and edit its defaults" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5c04dec9-ea6e-410e-a36d-42a71a223caa", + "metadata": {}, + "outputs": [], + "source": [ + "hub_mod = mlrun.get_hub_module(\"hub://histogram_data_drift\", download_files=True)\n", + "src_file_path = hub_mod.get_module_file_path()" + ] + }, + { + "cell_type": "markdown", + "id": "ce26e487-bfe5-442c-9d5a-04a8d75407a6", + "metadata": {}, + "source": [ + "Since the histogram data drift application doesn’t produce artifacts by default, we need to modify the class defaults. This can be done in one of two ways: either by editing the downloaded source file directly and then evaluating with the standard class, or - as we’ll do now - by adding an inheriting class to the same file and evaluating using that new class." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "055a31d8-00fd-4f55-b07c-1169db6af919", + "metadata": {}, + "outputs": [], + "source": [ + "# add a declaration of an inheriting class to change the default parameters\n", + "wrapper_code = \"\"\"\n", + "class HistogramDataDriftApplicationWithArtifacts(HistogramDataDriftApplication):\n", + " # The same histogram application but with artifacts\n", + "\n", + " def __init__(self) -> None:\n", + " super().__init__(produce_json_artifact=True, produce_plotly_artifact=True)\n", + "\"\"\"\n", + "with open(src_file_path, \"a\") as f:\n", + " f.write(wrapper_code)" + ] + }, + { + "cell_type": "markdown", + "id": "c17b176b-f838-472f-aaeb-7cedaeb66b56", + "metadata": {}, + "source": [ + "Now we can actually import it as a module, using the `module()` method" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6f57d3c9-9e7e-4fde-b78b-2daf799893e1", + "metadata": {}, + "outputs": [], + "source": [ + "app_module = hub_mod.module()\n", + "hist_app = app_module.HistogramDataDriftApplicationWithArtifacts # or the standard class if you chose to modify its code" + ] + }, + { + "cell_type": "markdown", + "id": "a017bc5a-4935-456b-8648-57c11e11df27", + "metadata": {}, + "source": [ + "And we are ready to call `evaluate()` (notice that the run is linked to the current (active) project)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c20fc990-d0e6-4aab-a576-29cea322bfb5", "metadata": {}, + "outputs": [], + "source": [ + "run_result = hist_app.evaluate(\n", + " func_path=hub_mod.get_module_file_path(),\n", + " sample_data=sample_data,\n", + " reference_data=reference_data,\n", + " run_local=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "661cdf4d-ee2a-4156-8a71-59f2a1e3b9eb", + "metadata": {}, + "source": [ + "## Examine the results" + ] + }, + { "cell_type": "markdown", - "source": "# Histogram Data Drift Demo", - "id": "2517d91b275da01d" + "id": "e715b6aa-75c0-4352-b98f-bd5a790e1d06", + "metadata": {}, + "source": [ + "First, we'll print nicely the average results:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3688d6a0-6cae-4141-8851-dfd12842c484", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hellinger_mean : 0.34211088243167637\n", + "kld_mean : 2.2839485090490426\n", + "tvd_mean : 0.30536\n", + "general_drift : 0.3237354412158382\n" + ] + } + ], + "source": [ + "for i in range (3):\n", + " metric = run_result.status.results[\"return\"][i]\n", + " print(metric[\"metric_name\"], \": \", metric[\"metric_value\"])\n", + "result = run_result.status.results[\"return\"][3]\n", + "print(result[\"result_name\"], \": \", result[\"result_value\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0422ca13-661b-4574-ad51-d1665be6acdb", + "metadata": {}, + "source": [ + "And we can also examine these metrics per feature, along with other metrics, using the artifacts the app generated for us.\n", + "\n", + "The rightmost column indicates whether the feature has drifted or not. The drift decision rule is the value per-feature mean of the Total Variance Distance (TVD) and Hellinger distance scores. In the histogram-data-drift application, the \"Drift detected\" threshold is 0.7 and the \"Drift suspected\" threshold is 0.5" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d9e7e688-6a71-4b9b-8b99-b2d7f42077e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# The artifact is logged with the run's name\n", + "artifact_key = f\"{run_result.metadata.name}_drift_table_plot\"\n", + "artifact = project.get_artifact(artifact_key)\n", + "artifact.to_dataitem().show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f8a17a07-6cc4-4bf3-abd8-187042b1973a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Drift value per feature:\n" + ] + }, + { + "data": { + "application/json": { + "feature_0": 0.034754757, + "feature_1": 0.0409220715, + "feature_10": 0.0529929347, + "feature_11": 0.7582778852, + "feature_12": 0.7680105477, + "feature_13": 0.0359189896, + "feature_14": 0.0388433161, + "feature_15": 0.6959895187, + "feature_16": 0.7682657628, + "feature_17": 0.0381781891, + "feature_18": 0.032682812, + "feature_19": 0.7400673333, + "feature_2": 0.7365591239, + "feature_3": 0.0492651761, + "feature_4": 0.0373909913, + "feature_5": 0.0374548709, + "feature_6": 0.7788618285, + "feature_7": 0.7443223594, + "feature_8": 0.0381141123, + "feature_9": 0.0478362439 + }, + "text/plain": [ + "" + ] + }, + "metadata": { + "application/json": { + "expanded": false, + "root": "root" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "print(\"Drift value per feature:\")\n", + "artifact_key = f\"{run_result.metadata.name}_features_drift_results\"\n", + "artifact = project.get_artifact(artifact_key)\n", + "artifact.to_dataitem().show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a8767ca-8a65-4841-9ced-4f36e86bb789", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "mlrun-base-py311", "language": "python", - "name": "python3" + "name": "conda-env-mlrun-base-py311-py" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.11.12" } }, "nbformat": 4, From 9884e8556f348d5b9ea39ef7942c7667af599598 Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:26:32 +0200 Subject: [PATCH 08/17] Add evidently demo app monitoring application module (without example) (#913) * sphinx build docs bug fix * add evidently demo app module (empty example notebook) * post review changes --- cli/marketplace/conf.template | 10 +- modules/src/evidently/evidently_iris.ipynb | 37 ++++++ modules/src/evidently/evidently_iris.py | 117 +++++++++++++++++++ modules/src/evidently/item.yaml | 21 ++++ modules/src/evidently/requirements.txt | 3 + modules/src/evidently/test_evidently_iris.py | 72 ++++++++++++ 6 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 modules/src/evidently/evidently_iris.ipynb create mode 100644 modules/src/evidently/evidently_iris.py create mode 100644 modules/src/evidently/item.yaml create mode 100644 modules/src/evidently/requirements.txt create mode 100644 modules/src/evidently/test_evidently_iris.py diff --git a/cli/marketplace/conf.template b/cli/marketplace/conf.template index 93c83c9d3..e26f065aa 100644 --- a/cli/marketplace/conf.template +++ b/cli/marketplace/conf.template @@ -15,8 +15,14 @@ import re import sys import os -sys.path.insert(0, "{{sphinx_docs_target}}") -sys.path.insert(0, os.path.abspath(os.path.join("{{sphinx_docs_target}}", "../functions"))) +import pathlib + +DOCS_DIR = pathlib.Path(__file__).resolve().parent +REPO_ROOT = DOCS_DIR.parent + +# Add both source trees +sys.path.insert(0, str(REPO_ROOT / "functions")) +sys.path.insert(0, str(REPO_ROOT / "modules")) # -- Project information ----------------------------------------------------- diff --git a/modules/src/evidently/evidently_iris.ipynb b/modules/src/evidently/evidently_iris.ipynb new file mode 100644 index 000000000..54f657bb0 --- /dev/null +++ b/modules/src/evidently/evidently_iris.ipynb @@ -0,0 +1,37 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/src/evidently/evidently_iris.py b/modules/src/evidently/evidently_iris.py new file mode 100644 index 000000000..e7a9f3ef9 --- /dev/null +++ b/modules/src/evidently/evidently_iris.py @@ -0,0 +1,117 @@ +# Copyright 2025 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import pandas as pd +from sklearn.datasets import load_iris + +import mlrun.model_monitoring.applications.context as mm_context +from mlrun.common.schemas.model_monitoring.constants import ( + ResultKindApp, + ResultStatusApp, +) +from mlrun.feature_store.api import norm_column_name +from mlrun.model_monitoring.applications import ModelMonitoringApplicationResult +from mlrun.model_monitoring.applications.evidently import EvidentlyModelMonitoringApplicationBase + +from evidently.core.report import Report, Snapshot +from evidently.metrics import DatasetMissingValueCount, ValueDrift +from evidently.presets import DataDriftPreset, DataSummaryPreset +from evidently.ui.workspace import ( + STR_UUID, + OrgID, +) + +_PROJECT_NAME = "Iris Monitoring" +_PROJECT_DESCRIPTION = "Test project using iris dataset" + + +class EvidentlyIrisMonitoringApp(EvidentlyModelMonitoringApplicationBase): + """ + This model monitoring application is a simple example of integrating MLRun with Evidently for data monitoring, + which you can adapt to fit your own project needs or use as a reference implementation. + """ + NAME = "Evidently-App-Test" + + def __init__( + self, + evidently_project_id: Optional["STR_UUID"] = None, + evidently_workspace_path: Optional[str] = None, + cloud_workspace: bool = False, + evidently_organization_id: Optional["OrgID"] = None, + ) -> None: + self.org_id = evidently_organization_id + self._init_iris_data() + super().__init__( + evidently_project_id=evidently_project_id, + evidently_workspace_path=evidently_workspace_path, + cloud_workspace=cloud_workspace, + ) + + def _init_iris_data(self) -> None: + iris = load_iris() + self.columns = [norm_column_name(col) for col in iris.feature_names] + self.train_set = pd.DataFrame(iris.data, columns=self.columns) + + def do_tracking( + self, monitoring_context: mm_context.MonitoringApplicationContext + ) -> ModelMonitoringApplicationResult: + monitoring_context.logger.info("Running evidently app") + + sample_df = monitoring_context.sample_df[self.columns] + + data_drift_report_run = self.create_report_run( + sample_df, monitoring_context.end_infer_time + ) + self.evidently_workspace.add_run( + self.evidently_project_id, data_drift_report_run + ) + + self.log_evidently_object( + monitoring_context, data_drift_report_run, "evidently_report" + ) + monitoring_context.logger.info("Logged evidently object") + + return ModelMonitoringApplicationResult( + name="data_drift_test", + value=0.5, + kind=ResultKindApp.data_drift, + status=ResultStatusApp.potential_detection, + ) + + def create_report_run( + self, sample_df: pd.DataFrame, schedule_time: pd.Timestamp + ) -> "Snapshot": + metrics = [ + DataDriftPreset(), + DatasetMissingValueCount(), + DataSummaryPreset(), + ] + metrics.extend( + [ + ValueDrift(column=col_name, method="wasserstein") + for col_name in self.columns + ] + ) + + data_drift_report = Report( + metrics=metrics, + metadata={"timestamp": str(schedule_time)}, + include_tests=True, + ) + + return data_drift_report.run( + current_data=sample_df, reference_data=self.train_set + ) diff --git a/modules/src/evidently/item.yaml b/modules/src/evidently/item.yaml new file mode 100644 index 000000000..c6a2abc2c --- /dev/null +++ b/modules/src/evidently/item.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +categories: +- model-serving +- structured-ML +description: Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset +example: evidently_iris.ipynb +generationDate: 2025-11-09 +hidden: false +labels: + author: Iguazio +mlrunVersion: 1.10.0-rc41 +name: evidently_iris +spec: + filename: evidently_iris.py + image: mlrun/mlrun + kind: monitoring_application + requirements: + - scikit-learn~=1.5.2 + - evidently~=0.7.6 + - pandas +version: 1.0.0 \ No newline at end of file diff --git a/modules/src/evidently/requirements.txt b/modules/src/evidently/requirements.txt new file mode 100644 index 000000000..bd4abb36f --- /dev/null +++ b/modules/src/evidently/requirements.txt @@ -0,0 +1,3 @@ +scikit-learn~=1.5.2 +evidently~=0.7.6 +pandas \ No newline at end of file diff --git a/modules/src/evidently/test_evidently_iris.py b/modules/src/evidently/test_evidently_iris.py new file mode 100644 index 000000000..6488768fd --- /dev/null +++ b/modules/src/evidently/test_evidently_iris.py @@ -0,0 +1,72 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings +from contextlib import AbstractContextManager +from contextlib import nullcontext as does_not_raise +from pathlib import Path +from uuid import uuid4 + +import pytest +import semver + +from mlrun.errors import MLRunIncompatibleVersionError +from mlrun.model_monitoring.applications.evidently.base import ( + _check_evidently_version, +) + +from evidently_iris import EvidentlyIrisMonitoringApp + + +@pytest.mark.parametrize( + ("cur", "ref", "expectation"), + [ + ("0.4.11", "0.4.11", does_not_raise()), + ("0.4.12", "0.4.11", does_not_raise()), + ("1.23.0", "1.1.32", does_not_raise()), + ("0.4.11", "0.4.12", pytest.raises(MLRunIncompatibleVersionError)), + ("0.4.11", "0.4.12", pytest.raises(MLRunIncompatibleVersionError)), + ("1.0.3", "0.9.9", pytest.raises(MLRunIncompatibleVersionError)), + ("0.6.0", "0.3.0", pytest.warns(UserWarning)), + pytest.param("0.6.0", "0.3.0", does_not_raise(), marks=pytest.mark.xfail), + ], +) +def test_version_check( + cur: str, + ref: str, + expectation: AbstractContextManager, +) -> None: + with warnings.catch_warnings(): + warnings.simplefilter("error") + with expectation: + _check_evidently_version( + cur=semver.Version.parse(cur), ref=semver.Version.parse(ref) + ) + + +def test_demo_evidently_app(tmpdir: Path) -> None: + """Test that the workspace and the project's dashboards are created""" + evidently_app = EvidentlyIrisMonitoringApp( + evidently_project_id=uuid4(), evidently_workspace_path=str(tmpdir) + ) + run = evidently_app.create_report_run( + sample_df=evidently_app.train_set, schedule_time=None + ) + added_run_uid = evidently_app.evidently_workspace.add_run( + project_id=evidently_app.evidently_project_id, + run=run, + ).id + assert evidently_app.evidently_workspace.list_runs( + project_id=evidently_app.evidently_project_id + ) == [added_run_uid], "Different project runs than expected" From 659b7910f1100a1807d2878a2e1d602d87001e72 Mon Sep 17 00:00:00 2001 From: iguazio-cicd Date: Tue, 11 Nov 2025 13:29:54 +0000 Subject: [PATCH 09/17] chore(readme): auto-update asset tables [skip ci] --- modules/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/README.md b/modules/README.md index 05e7cfefd..f49c8472f 100644 --- a/modules/README.md +++ b/modules/README.md @@ -7,5 +7,6 @@ | Name | Description | Kind | Categories | | --- | --- | --- | --- | | [count_events](/home/runner/work/functions/functions/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | +| [evidently](/home/runner/work/functions/functions/modules/src/evidently) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | | [histogram_data_drift](/home/runner/work/functions/functions/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | From ce1999315d5e1fb1e2e4dc317ed0cec0849fab2e Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Sun, 16 Nov 2025 12:59:17 +0200 Subject: [PATCH 10/17] [Translate] Require torch>=2.6 for the translate function to work properly (#915) * lock torch valid version * edit the item.yaml and generated function.yaml * update mlrun version --- functions/src/translate/function.yaml | 41 ++++++++++++------------ functions/src/translate/item.yaml | 6 ++-- functions/src/translate/requirements.txt | 2 +- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/functions/src/translate/function.yaml b/functions/src/translate/function.yaml index 9595b77a3..eb1ffd345 100644 --- a/functions/src/translate/function.yaml +++ b/functions/src/translate/function.yaml @@ -1,4 +1,8 @@ +verbose: false spec: + description: Translate text files from one language to another + filename: /Users/Daniel_Perez/PycharmProjects/functions/functions/src/translate/translate.py + command: '' entry_points: open_mpi_handler: lineno: 56 @@ -8,24 +12,24 @@ spec: - name: root_worker_inputs type: Dict[str, Any] default: null - name: open_mpi_handler - has_kwargs: false doc: '' + has_kwargs: false has_varargs: false + name: open_mpi_handler decorator: lineno: 68 parameters: - name: handler - name: decorator - has_kwargs: false doc: '' + has_kwargs: false has_varargs: false + name: decorator wrapper: lineno: 73 - name: wrapper - has_kwargs: true doc: '' + has_kwargs: true has_varargs: false + name: wrapper translate: outputs: - doc: 'A tuple of:' @@ -75,8 +79,6 @@ spec: type: bool doc: 'Whether to present logs of a progress bar and errors. Default: True.' default: false - name: translate - has_kwargs: false doc: 'Translate text files using a transformer model from Huggingface''s hub according to the source and target languages @@ -89,27 +91,26 @@ spec: * text_file - The text file path. * translation_file - The translation text file name in the output directory.' + has_kwargs: false has_varargs: false + name: translate + disable_auto_mount: false + image: '' + default_handler: translate build: + functionSourceCode:  + origin_filename: '' + base_image: mlrun/mlrun requirements: - transformers - sentencepiece - - torch + - torch>=2.6 - tqdm code_origin: '' - functionSourceCode:  - base_image: mlrun/mlrun - origin_filename: '' - image: '' - default_handler: translate - disable_auto_mount: false - command: '' - description: Translate text files from one language to another -verbose: false +kind: job metadata: + tag: '' categories: - genai - NLP - tag: '' name: translate -kind: job diff --git a/functions/src/translate/item.yaml b/functions/src/translate/item.yaml index eb0e821e4..68f176ac2 100644 --- a/functions/src/translate/item.yaml +++ b/functions/src/translate/item.yaml @@ -12,7 +12,7 @@ labels: author: Iguazio maintainers: [] marketplaceType: '' -mlrunVersion: 1.7.0 +mlrunVersion: 1.10.0-rc41 name: translate platformVersion: 3.5.3 spec: @@ -23,8 +23,8 @@ spec: requirements: - transformers - sentencepiece - - torch + - torch>=2.6 - tqdm url: '' -version: 0.2.0 +version: 0.3.0 test_valid: True diff --git a/functions/src/translate/requirements.txt b/functions/src/translate/requirements.txt index 94e548463..746da576c 100644 --- a/functions/src/translate/requirements.txt +++ b/functions/src/translate/requirements.txt @@ -1,4 +1,4 @@ transformers tqdm -torch +torch>=2.6 sentencepiece \ No newline at end of file From f2ec9318edb32abf60aa4492f0b613fed7a93ddd Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Mon, 17 Nov 2025 14:27:25 +0200 Subject: [PATCH 11/17] [CLI] Generated READMEs are produced with broken links to the items (#918) * fix * test fix * test fix * test fix * test fix * final workflow --- .github/workflows/test-all.yaml | 8 +++++++- cli/README.md | 4 ++-- cli/common/update_readme.py | 17 ++++++++++------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-all.yaml b/.github/workflows/test-all.yaml index 162804863..d8eb6c6ed 100644 --- a/.github/workflows/test-all.yaml +++ b/.github/workflows/test-all.yaml @@ -116,6 +116,10 @@ jobs: permissions: contents: write steps: + - name: Get the current branch name + shell: bash + run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT + id: branch - uses: actions/checkout@v4 with: fetch-depth: 0 @@ -128,7 +132,9 @@ jobs: pip install --upgrade pip pip install -r requirements.txt - name: Regenerate README tables - run: python -m cli.cli update-readme --asset functions --asset modules + env: + CHANNEL: ${{ steps.branch.outputs.branch }} + run: python -m cli.cli update-readme -c $CHANNEL --asset functions --asset modules - name: Commit & push (if changed) env: USERNAME: ${{ secrets.USERNAME }} diff --git a/cli/README.md b/cli/README.md index 4a3cd3bfc..31443b132 100644 --- a/cli/README.md +++ b/cli/README.md @@ -60,7 +60,7 @@ Example: Regenerate the `README.md` files in each of the asset directories (functions/modules). Usage: - `python -m cli.cli update-readme --asset TYPE` + `python -m cli.cli update-readme -c CHANNEL --asset TYPE` Example: - `python -m cli.cli update-readme --asset functions --asset modules` \ No newline at end of file + `python -m cli.cli update-readme -c master --asset functions --asset modules` \ No newline at end of file diff --git a/cli/common/update_readme.py b/cli/common/update_readme.py index 6bcab8d33..89b6aa094 100644 --- a/cli/common/update_readme.py +++ b/cli/common/update_readme.py @@ -25,6 +25,7 @@ COLUMNS = ("Name", "Description", "Kind", "Categories") @click.command("update-readme") +@click.option("-c", "--channel", default="master", help="Name of build channel") @click.option( "--asset", multiple=True, @@ -34,7 +35,7 @@ ) @click.option("--check", is_flag=True, help="Do not write; exit non‑zero if README(s) would change.") -def update_readme(asset: Iterable[str], +def update_readme(channel: str, asset: Iterable[str], check: bool) -> None: """ Regenerate the README tables for asset types from their item.yaml files. @@ -50,7 +51,7 @@ def update_readme(asset: Iterable[str], root = Path(".").resolve() asset_dir = root / t readme = asset_dir / "README.md" - rows = _rows_for_asset_type(asset_dir) + rows = _rows_for_asset_type(channel, asset_dir) table_md = _build_table_md(rows) old = readme.read_text() if readme.exists() else f"# {t.title()}\n\n" new = _replace_block(old, table_md) @@ -58,7 +59,7 @@ def update_readme(asset: Iterable[str], changed_any = True touched.append(str(readme)) else: - if _update_one(t): + if _update_one(channel, t): changed_any = True touched.append(str((Path(t) / "README.md").as_posix())) @@ -78,7 +79,7 @@ def update_readme(asset: Iterable[str], click.echo("No README changes.") -def _rows_for_asset_type(asset_dir: Path) -> List[Tuple[str, str, str, str]]: +def _rows_for_asset_type(channel: str, asset_dir: Path) -> List[Tuple[str, str, str, str]]: """Scan /src/*/item.yaml and return table rows.""" src = asset_dir / "src" if not src.exists(): @@ -97,7 +98,9 @@ def _rows_for_asset_type(asset_dir: Path) -> List[Tuple[str, str, str, str]]: cats = data.get("categories") or [] cats_str = ", ".join(c.strip() for c in cats) if isinstance(cats, list) else str(cats).strip() # Link the name to its source directory - link = f"[{asset_name}]({(asset_dir / 'src' / asset_name).as_posix()})" + # Construct the relative path from the repo root for the asset + rel_path = asset_dir.relative_to(Path(".").resolve()) + link = f"[{asset_name}](https://github.com/mlrun/functions/tree/{channel}/{rel_path}/src/{asset_name})" rows.append((link, desc, kind, cats_str)) rows.sort(key=lambda r: r[0].lower()) @@ -140,13 +143,13 @@ def _replace_block(readme_text: str, new_block: str) -> str: return readme_text[:start_close] + "\n" + new_block + "\n" + readme_text[ei:] -def _update_one(asset_type: str) -> bool: +def _update_one(channel: str, asset_type: str) -> bool: """Generate/replace the table in /README.md. Return True if changed.""" root = Path(".").resolve() asset_dir = root / asset_type readme = asset_dir / "README.md" - rows = _rows_for_asset_type(asset_dir) + rows = _rows_for_asset_type(channel, asset_dir) table_md = _build_table_md(rows) old = readme.read_text() if readme.exists() else f"# {asset_type.title()}\n\n" new = _replace_block(old, table_md) From 5c013ba18d25e6a840874575ac4aa71212e16397 Mon Sep 17 00:00:00 2001 From: iguazio-cicd Date: Mon, 17 Nov 2025 12:31:34 +0000 Subject: [PATCH 12/17] chore(readme): auto-update asset tables [skip ci] --- functions/README.md | 72 ++++++++++++++++++++++----------------------- modules/README.md | 6 ++-- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/functions/README.md b/functions/README.md index 08b1c7ad9..3618833a5 100644 --- a/functions/README.md +++ b/functions/README.md @@ -9,40 +9,40 @@ it is expected that contributors follow certain guidelines/protocols (please chi | Name | Description | Kind | Categories | | --- | --- | --- | --- | -| [aggregate](/home/runner/work/functions/functions/functions/src/aggregate) | Rolling aggregation over Metrics and Lables according to specifications | job | data-preparation | -| [arc_to_parquet](/home/runner/work/functions/functions/functions/src/arc_to_parquet) | retrieve remote archive, open and save as parquet | job | utils | -| [auto_trainer](/home/runner/work/functions/functions/functions/src/auto_trainer) | Automatic train, evaluate and predict functions for the ML frameworks - Scikit-Learn, XGBoost and LightGBM. | job | machine-learning, model-training | -| [azureml_serving](/home/runner/work/functions/functions/functions/src/azureml_serving) | AzureML serving function | serving | machine-learning, model-serving | -| [azureml_utils](/home/runner/work/functions/functions/functions/src/azureml_utils) | Azure AutoML integration in MLRun, including utils functions for training models on Azure AutoML platfrom. | job | model-serving, utils | -| [batch_inference](/home/runner/work/functions/functions/functions/src/batch_inference) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | -| [batch_inference_v2](/home/runner/work/functions/functions/functions/src/batch_inference_v2) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | -| [describe](/home/runner/work/functions/functions/functions/src/describe) | describe and visualizes dataset stats | job | data-analysis | -| [describe_dask](/home/runner/work/functions/functions/functions/src/describe_dask) | describe and visualizes dataset stats | job | data-analysis | -| [describe_spark](/home/runner/work/functions/functions/functions/src/describe_spark) | | job | data-analysis | -| [feature_selection](/home/runner/work/functions/functions/functions/src/feature_selection) | Select features through multiple Statistical and Model filters | job | data-preparation, machine-learning | -| [gen_class_data](/home/runner/work/functions/functions/functions/src/gen_class_data) | Create a binary classification sample dataset and save. | job | data-generation | -| [github_utils](/home/runner/work/functions/functions/functions/src/github_utils) | add comments to github pull request | job | utils | -| [hugging_face_serving](/home/runner/work/functions/functions/functions/src/hugging_face_serving) | Generic Hugging Face model server. | serving | genai, model-serving | -| [load_dataset](/home/runner/work/functions/functions/functions/src/load_dataset) | load a toy dataset from scikit-learn | job | data-preparation | -| [mlflow_utils](/home/runner/work/functions/functions/functions/src/mlflow_utils) | Mlflow model server, and additional utils. | serving | model-serving, utils | -| [model_server](/home/runner/work/functions/functions/functions/src/model_server) | generic sklearn model server | nuclio:serving | model-serving, machine-learning | -| [model_server_tester](/home/runner/work/functions/functions/functions/src/model_server_tester) | test model servers | job | monitoring, model-serving | -| [noise_reduction](/home/runner/work/functions/functions/functions/src/noise_reduction) | Reduce noise from audio files | job | data-preparation, audio | -| [onnx_utils](/home/runner/work/functions/functions/functions/src/onnx_utils) | ONNX intigration in MLRun, some utils functions for the ONNX framework, optimizing and converting models from different framework to ONNX using MLRun. | job | utils, deep-learning | -| [open_archive](/home/runner/work/functions/functions/functions/src/open_archive) | Open a file/object archive into a target directory | job | utils | -| [pii_recognizer](/home/runner/work/functions/functions/functions/src/pii_recognizer) | This function is used to recognize PII in a directory of text files | job | data-preparation, NLP | -| [pyannote_audio](/home/runner/work/functions/functions/functions/src/pyannote_audio) | pyannote's speech diarization of audio files | job | deep-learning, audio | -| [question_answering](/home/runner/work/functions/functions/functions/src/question_answering) | GenAI approach of question answering on a given data | job | genai | -| [send_email](/home/runner/work/functions/functions/functions/src/send_email) | Send Email messages through SMTP server | job | utils | -| [silero_vad](/home/runner/work/functions/functions/functions/src/silero_vad) | Silero VAD (Voice Activity Detection) functions. | job | deep-learning, audio | -| [sklearn_classifier](/home/runner/work/functions/functions/functions/src/sklearn_classifier) | train any classifier using scikit-learn's API | job | machine-learning, model-training | -| [sklearn_classifier_dask](/home/runner/work/functions/functions/functions/src/sklearn_classifier_dask) | train any classifier using scikit-learn's API over Dask | job | machine-learning, model-training | -| [structured_data_generator](/home/runner/work/functions/functions/functions/src/structured_data_generator) | GenAI approach of generating structured data according to a given schema | job | data-generation, genai | -| [test_classifier](/home/runner/work/functions/functions/functions/src/test_classifier) | test a classifier using held-out or new data | job | machine-learning, model-testing | -| [text_to_audio_generator](/home/runner/work/functions/functions/functions/src/text_to_audio_generator) | Generate audio file from text using different speakers | job | data-generation, audio | -| [tf2_serving](/home/runner/work/functions/functions/functions/src/tf2_serving) | tf2 image classification server | nuclio:serving | model-serving, machine-learning | -| [transcribe](/home/runner/work/functions/functions/functions/src/transcribe) | Transcribe audio files into text files | job | audio, genai | -| [translate](/home/runner/work/functions/functions/functions/src/translate) | Translate text files from one language to another | job | genai, NLP | -| [v2_model_server](/home/runner/work/functions/functions/functions/src/v2_model_server) | generic sklearn model server | serving | model-serving, machine-learning | -| [v2_model_tester](/home/runner/work/functions/functions/functions/src/v2_model_tester) | test v2 model servers | job | model-testing, machine-learning | +| [aggregate](https://github.com/mlrun/functions/tree/development/functions/src/aggregate) | Rolling aggregation over Metrics and Lables according to specifications | job | data-preparation | +| [arc_to_parquet](https://github.com/mlrun/functions/tree/development/functions/src/arc_to_parquet) | retrieve remote archive, open and save as parquet | job | utils | +| [auto_trainer](https://github.com/mlrun/functions/tree/development/functions/src/auto_trainer) | Automatic train, evaluate and predict functions for the ML frameworks - Scikit-Learn, XGBoost and LightGBM. | job | machine-learning, model-training | +| [azureml_serving](https://github.com/mlrun/functions/tree/development/functions/src/azureml_serving) | AzureML serving function | serving | machine-learning, model-serving | +| [azureml_utils](https://github.com/mlrun/functions/tree/development/functions/src/azureml_utils) | Azure AutoML integration in MLRun, including utils functions for training models on Azure AutoML platfrom. | job | model-serving, utils | +| [batch_inference](https://github.com/mlrun/functions/tree/development/functions/src/batch_inference) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | +| [batch_inference_v2](https://github.com/mlrun/functions/tree/development/functions/src/batch_inference_v2) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | +| [describe](https://github.com/mlrun/functions/tree/development/functions/src/describe) | describe and visualizes dataset stats | job | data-analysis | +| [describe_dask](https://github.com/mlrun/functions/tree/development/functions/src/describe_dask) | describe and visualizes dataset stats | job | data-analysis | +| [describe_spark](https://github.com/mlrun/functions/tree/development/functions/src/describe_spark) | | job | data-analysis | +| [feature_selection](https://github.com/mlrun/functions/tree/development/functions/src/feature_selection) | Select features through multiple Statistical and Model filters | job | data-preparation, machine-learning | +| [gen_class_data](https://github.com/mlrun/functions/tree/development/functions/src/gen_class_data) | Create a binary classification sample dataset and save. | job | data-generation | +| [github_utils](https://github.com/mlrun/functions/tree/development/functions/src/github_utils) | add comments to github pull request | job | utils | +| [hugging_face_serving](https://github.com/mlrun/functions/tree/development/functions/src/hugging_face_serving) | Generic Hugging Face model server. | serving | genai, model-serving | +| [load_dataset](https://github.com/mlrun/functions/tree/development/functions/src/load_dataset) | load a toy dataset from scikit-learn | job | data-preparation | +| [mlflow_utils](https://github.com/mlrun/functions/tree/development/functions/src/mlflow_utils) | Mlflow model server, and additional utils. | serving | model-serving, utils | +| [model_server](https://github.com/mlrun/functions/tree/development/functions/src/model_server) | generic sklearn model server | nuclio:serving | model-serving, machine-learning | +| [model_server_tester](https://github.com/mlrun/functions/tree/development/functions/src/model_server_tester) | test model servers | job | monitoring, model-serving | +| [noise_reduction](https://github.com/mlrun/functions/tree/development/functions/src/noise_reduction) | Reduce noise from audio files | job | data-preparation, audio | +| [onnx_utils](https://github.com/mlrun/functions/tree/development/functions/src/onnx_utils) | ONNX intigration in MLRun, some utils functions for the ONNX framework, optimizing and converting models from different framework to ONNX using MLRun. | job | utils, deep-learning | +| [open_archive](https://github.com/mlrun/functions/tree/development/functions/src/open_archive) | Open a file/object archive into a target directory | job | utils | +| [pii_recognizer](https://github.com/mlrun/functions/tree/development/functions/src/pii_recognizer) | This function is used to recognize PII in a directory of text files | job | data-preparation, NLP | +| [pyannote_audio](https://github.com/mlrun/functions/tree/development/functions/src/pyannote_audio) | pyannote's speech diarization of audio files | job | deep-learning, audio | +| [question_answering](https://github.com/mlrun/functions/tree/development/functions/src/question_answering) | GenAI approach of question answering on a given data | job | genai | +| [send_email](https://github.com/mlrun/functions/tree/development/functions/src/send_email) | Send Email messages through SMTP server | job | utils | +| [silero_vad](https://github.com/mlrun/functions/tree/development/functions/src/silero_vad) | Silero VAD (Voice Activity Detection) functions. | job | deep-learning, audio | +| [sklearn_classifier](https://github.com/mlrun/functions/tree/development/functions/src/sklearn_classifier) | train any classifier using scikit-learn's API | job | machine-learning, model-training | +| [sklearn_classifier_dask](https://github.com/mlrun/functions/tree/development/functions/src/sklearn_classifier_dask) | train any classifier using scikit-learn's API over Dask | job | machine-learning, model-training | +| [structured_data_generator](https://github.com/mlrun/functions/tree/development/functions/src/structured_data_generator) | GenAI approach of generating structured data according to a given schema | job | data-generation, genai | +| [test_classifier](https://github.com/mlrun/functions/tree/development/functions/src/test_classifier) | test a classifier using held-out or new data | job | machine-learning, model-testing | +| [text_to_audio_generator](https://github.com/mlrun/functions/tree/development/functions/src/text_to_audio_generator) | Generate audio file from text using different speakers | job | data-generation, audio | +| [tf2_serving](https://github.com/mlrun/functions/tree/development/functions/src/tf2_serving) | tf2 image classification server | nuclio:serving | model-serving, machine-learning | +| [transcribe](https://github.com/mlrun/functions/tree/development/functions/src/transcribe) | Transcribe audio files into text files | job | audio, genai | +| [translate](https://github.com/mlrun/functions/tree/development/functions/src/translate) | Translate text files from one language to another | job | genai, NLP | +| [v2_model_server](https://github.com/mlrun/functions/tree/development/functions/src/v2_model_server) | generic sklearn model server | serving | model-serving, machine-learning | +| [v2_model_tester](https://github.com/mlrun/functions/tree/development/functions/src/v2_model_tester) | test v2 model servers | job | model-testing, machine-learning | diff --git a/modules/README.md b/modules/README.md index f49c8472f..38cb474d3 100644 --- a/modules/README.md +++ b/modules/README.md @@ -6,7 +6,7 @@ | Name | Description | Kind | Categories | | --- | --- | --- | --- | -| [count_events](/home/runner/work/functions/functions/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | -| [evidently](/home/runner/work/functions/functions/modules/src/evidently) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | -| [histogram_data_drift](/home/runner/work/functions/functions/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | +| [count_events](https://github.com/mlrun/functions/tree/development/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | +| [evidently](https://github.com/mlrun/functions/tree/development/modules/src/evidently) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | +| [histogram_data_drift](https://github.com/mlrun/functions/tree/development/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | From 2f3397477cd0ebdf24bee0e63a0e82db761ed9c5 Mon Sep 17 00:00:00 2001 From: guylei-code Date: Mon, 17 Nov 2025 15:06:43 +0200 Subject: [PATCH 13/17] OpenAI Module without notebook (#917) * First commit OpenAI Module * First commit OpenAI Module * Update example filename in item.yaml * Delete modules/src/openai_proxy/requirements.txt No need due to no unitest * Update item.yaml for OpenAI application configuration * Update modules/src/openai_proxy/openai.py Co-authored-by: Daniel Perez <100069700+danielperezz@users.noreply.github.com> * Change category name from 'GenAI' to 'genai' * Update package requirements with version constraints * Second commit adding notebook * Refactor OpenAI proxy to use base64 encoded script Refactor OpenAI proxy implementation to use base64 encoded script and update FastAPI app configuration. * Change deployment method to OpenAIModule * Third commit adding notebook * Third commit adding notebook * Remove package requirements from item.yaml Removed specific requirements for fastapi and requests. * Rename item and update kind in YAML * Update openai.py * Third commit adding notebook * Fix after review * Fix after review --------- Co-authored-by: Daniel Perez <100069700+danielperezz@users.noreply.github.com> --- modules/src/openai_proxy_app/item.yaml | 19 +++++ .../openai_proxy_app/openai_proxy_app.ipynb | 72 +++++++++++++++++++ .../src/openai_proxy_app/openai_proxy_app.py | 56 +++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 modules/src/openai_proxy_app/item.yaml create mode 100644 modules/src/openai_proxy_app/openai_proxy_app.ipynb create mode 100644 modules/src/openai_proxy_app/openai_proxy_app.py diff --git a/modules/src/openai_proxy_app/item.yaml b/modules/src/openai_proxy_app/item.yaml new file mode 100644 index 000000000..bf295cf2a --- /dev/null +++ b/modules/src/openai_proxy_app/item.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +categories: +- genai +description: OpenAI application runtime based on fastapi +example: openai_proxy_app.ipynb +generationDate: 2025-11-11:12-25 +hidden: false +labels: + author: Iguazio +mlrunVersion: 1.10.0 +name: openai_proxy_app +spec: + filename: openai_proxy_app.py + image: mlrun/mlrun + requirements: + - fastapi>=0.110,<1.0 + - requests>=2.31,<3.0 + kind: generic +version: 1.0.0 diff --git a/modules/src/openai_proxy_app/openai_proxy_app.ipynb b/modules/src/openai_proxy_app/openai_proxy_app.ipynb new file mode 100644 index 000000000..123934fbd --- /dev/null +++ b/modules/src/openai_proxy_app/openai_proxy_app.ipynb @@ -0,0 +1,72 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "220629c8-17aa-45f6-ac81-0ca31e165412", + "metadata": {}, + "source": [ + "# OpenAI Module Demo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "967b4d5d-7250-40bf-8149-de11e1e3244c", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17d208f4-a00a-42ef-a849-0fa79bed10cb", + "metadata": {}, + "outputs": [], + "source": [ + "project = mlrun.get_or_create_project(\"fastapi-openai\",user_project=True,context=\"./src\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67c93a0d-8240-48b8-808e-9cd0af418309", + "metadata": {}, + "outputs": [], + "source": [ + "app = mlrun.import_module(\"hub://openai\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93e67d6a-5f53-4bda-b0b5-4e2977088139", + "metadata": {}, + "outputs": [], + "source": "app.OpenAIModule.deploy()" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/src/openai_proxy_app/openai_proxy_app.py b/modules/src/openai_proxy_app/openai_proxy_app.py new file mode 100644 index 000000000..a0e9df7ac --- /dev/null +++ b/modules/src/openai_proxy_app/openai_proxy_app.py @@ -0,0 +1,56 @@ +# Copyright 2025 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#This module acts as a lightweight gateway to OpenAI-compatible APIs. +#You can send chat prompts, create embeddings, or get model responses without worrying about authentication or endpoint differences. +#It simplifies access so you can test, analyze, or integrate AI features directly into your projects or notebooks with minimal setup. + + +BASE64 = "IyBvcGVuYWlfcHJveHkvb3BlbmFpLnB5CgppbXBvcnQgb3MKaW1wb3J0IGpzb24KZnJvbSB1cmxsaWIucGFyc2UgaW1wb3J0IHVybGpvaW4KZnJvbSB0eXBpbmcgaW1wb3J0IEFueSwgRGljdCwgTGlzdCwgT3B0aW9uYWwKCmltcG9ydCByZXF1ZXN0cwpmcm9tIGZhc3RhcGkgaW1wb3J0IEZhc3RBUEksIFJlcXVlc3QsIFJlc3BvbnNlLCBCb2R5CgphcHAgPSBGYXN0QVBJKAogICAgdGl0bGU9Ik9wZW5BSSBQcm94eSBBcHAiLAogICAgZGVzY3JpcHRpb249IkxvY2FsIEZhc3RBUEkgcHJveHkgZm9yIE9wZW5BSSBzdHlsZSBlbmRwb2ludHMiLAogICAgdmVyc2lvbj0iMS4wLjAiLAopCgpPUEVOQUlfQkFTRV9VUkwgPSBvcy5nZXRlbnYoIk9QRU5BSV9CQVNFX1VSTCIsICJodHRwczovL2FwaS5vcGVuYWkuY29tIikucnN0cmlwKCIvIikKT1BFTkFJX0FQSV9LRVkgPSBvcy5nZXRlbnYoIk9QRU5BSV9BUElfS0VZIiwgIiIpCk9QRU5BSV9ERUZBVUxUX01PREVMID0gb3MuZ2V0ZW52KCJPUEVOQUlfREVGQVVMVF9NT0RFTCIsICJncHQtNG8tbWluaSIpCgoKZGVmIGJ1aWxkX2hlYWRlcnMoaW5jb21pbmc6IGRpY3QpIC0+IGRpY3Q6CiAgICBoZWFkZXJzID0ge30KICAgIGF1dGggPSBpbmNvbWluZy5nZXQoImF1dGhvcml6YXRpb24iKSBvciBpbmNvbWluZy5nZXQoIkF1dGhvcml6YXRpb24iKQogICAgaWYgYXV0aDoKICAgICAgICBoZWFkZXJzWyJBdXRob3JpemF0aW9uIl0gPSBhdXRoCiAgICBlbGlmIE9QRU5BSV9BUElfS0VZOgogICAgICAgIGhlYWRlcnNbIkF1dGhvcml6YXRpb24iXSA9IGYiQmVhcmVyIHtPUEVOQUlfQVBJX0tFWX0iCiAgICBjdHlwZSA9IGluY29taW5nLmdldCgiY29udGVudC10eXBlIikgb3IgaW5jb21pbmcuZ2V0KCJDb250ZW50LVR5cGUiKSBvciAiYXBwbGljYXRpb24vanNvbiIKICAgIGhlYWRlcnNbIkNvbnRlbnQtVHlwZSJdID0gY3R5cGUKICAgIHJldHVybiBoZWFkZXJzCgoKZGVmIGJ1aWxkX3RhcmdldChwYXRoOiBzdHIpIC0+IHN0cjoKICAgIGJhc2UgPSBPUEVOQUlfQkFTRV9VUkwKICAgIGlmIGJhc2UuZW5kc3dpdGgoIi92MSIpIG9yIGJhc2UuZW5kc3dpdGgoIi92MS8iKToKICAgICAgICBiYXNlID0gYmFzZVs6LTNdIGlmIGJhc2UuZW5kc3dpdGgoIi92MSIpIGVsc2UgYmFzZVs6LTRdCiAgICByZXR1cm4gdXJsam9pbihiYXNlICsgIi8iLCBwYXRoLmxzdHJpcCgiLyIpKQoKCmRlZiBmb3J3YXJkX2pzb24ocGF0aDogc3RyLCBib2R5OiBkaWN0LCBoZWFkZXJzOiBkaWN0LCBxdWVyeTogZGljdCk6CiAgICB0YXJnZXQgPSBidWlsZF90YXJnZXQocGF0aCkKICAgIHJlc3AgPSByZXF1ZXN0cy5wb3N0KAogICAgICAgIHRhcmdldCwKICAgICAgICBoZWFkZXJzPWhlYWRlcnMsCiAgICAgICAgcGFyYW1zPXF1ZXJ5LAogICAgICAgIGpzb249Ym9keSwKICAgICAgICB0aW1lb3V0PTYwLAogICAgKQogICAgcmV0dXJuIHJlc3AKCkBhcHAuZ2V0KCIvIikKZGVmIGhlYWx0aCgpOgogICAgcmV0dXJuIHsic3RhdHVzIjogIm9rIn0KCgojIHJlbGF4ZWQgY2hhdCBlbmRwb2ludCwgYWNjZXB0cyBhbnkgSlNPTiB0aGF0IGluY2x1ZGVzIG1lc3NhZ2VzCkBhcHAucG9zdCgiL3YxL2NoYXQvY29tcGxldGlvbnMiKQphc3luYyBkZWYgY2hhdF9jb21wbGV0aW9ucygKICAgIHJlcXVlc3Q6IFJlcXVlc3QsCiAgICBwYXlsb2FkOiBEaWN0W3N0ciwgQW55XSA9IEJvZHkoLi4uKSwKKToKICAgIGlmICJtZXNzYWdlcyIgbm90IGluIHBheWxvYWQgb3Igbm90IGlzaW5zdGFuY2UocGF5bG9hZFsibWVzc2FnZXMiXSwgbGlzdCk6CiAgICAgICAgcmV0dXJuIFJlc3BvbnNlKAogICAgICAgICAgICBjb250ZW50PWpzb24uZHVtcHMoeyJlcnJvciI6ICJtZXNzYWdlcyBtdXN0IGJlIGEgbGlzdCBvZiBjaGF0IG1lc3NhZ2VzIn0pLAogICAgICAgICAgICBzdGF0dXNfY29kZT00MDAsCiAgICAgICAgICAgIG1lZGlhX3R5cGU9ImFwcGxpY2F0aW9uL2pzb24iLAogICAgICAgICkKCiAgICBpZiAibW9kZWwiIG5vdCBpbiBwYXlsb2FkIG9yIHBheWxvYWRbIm1vZGVsIl0gaXMgTm9uZToKICAgICAgICBwYXlsb2FkWyJtb2RlbCJdID0gT1BFTkFJX0RFRkFVTFRfTU9ERUwKCiAgICBoZWFkZXJzID0gYnVpbGRfaGVhZGVycyhkaWN0KHJlcXVlc3QuaGVhZGVycykpCiAgICByZXNwID0gZm9yd2FyZF9qc29uKCIvdjEvY2hhdC9jb21wbGV0aW9ucyIsIHBheWxvYWQsIGhlYWRlcnMsIGRpY3QocmVxdWVzdC5xdWVyeV9wYXJhbXMpKQogICAgcmV0dXJuIFJlc3BvbnNlKAogICAgICAgIGNvbnRlbnQ9cmVzcC5jb250ZW50LAogICAgICAgIHN0YXR1c19jb2RlPXJlc3Auc3RhdHVzX2NvZGUsCiAgICAgICAgbWVkaWFfdHlwZT1yZXNwLmhlYWRlcnMuZ2V0KCJDb250ZW50LVR5cGUiLCAiYXBwbGljYXRpb24vanNvbiIpLAogICAgKQoKCkBhcHAucG9zdCgiL3YxL2VtYmVkZGluZ3MiKQphc3luYyBkZWYgZW1iZWRkaW5ncygKICAgIHJlcXVlc3Q6IFJlcXVlc3QsCiAgICBwYXlsb2FkOiBEaWN0W3N0ciwgQW55XSA9IEJvZHkoLi4uKSwKKToKICAgIGlmICJtb2RlbCIgbm90IGluIHBheWxvYWQgb3Igbm90IHBheWxvYWRbIm1vZGVsIl06CiAgICAgICAgcGF5bG9hZFsibW9kZWwiXSA9ICJ0ZXh0LWVtYmVkZGluZy0zLXNtYWxsIgogICAgaGVhZGVycyA9IGJ1aWxkX2hlYWRlcnMoZGljdChyZXF1ZXN0LmhlYWRlcnMpKQogICAgcmVzcCA9IGZvcndhcmRfanNvbigiL3YxL2VtYmVkZGluZ3MiLCBwYXlsb2FkLCBoZWFkZXJzLCBkaWN0KHJlcXVlc3QucXVlcnlfcGFyYW1zKSkKICAgIHJldHVybiBSZXNwb25zZSgKICAgICAgICBjb250ZW50PXJlc3AuY29udGVudCwKICAgICAgICBzdGF0dXNfY29kZT1yZXNwLnN0YXR1c19jb2RlLAogICAgICAgIG1lZGlhX3R5cGU9cmVzcC5oZWFkZXJzLmdldCgiQ29udGVudC1UeXBlIiwgImFwcGxpY2F0aW9uL2pzb24iKSwKICAgICkKCgpAYXBwLnBvc3QoIi92MS9yZXNwb25zZXMiKQphc3luYyBkZWYgcmVzcG9uc2VzX2FwaSgKICAgIHJlcXVlc3Q6IFJlcXVlc3QsCiAgICBwYXlsb2FkOiBEaWN0W3N0ciwgQW55XSA9IEJvZHkoLi4uKSwKKToKICAgIGlmICJtb2RlbCIgbm90IGluIHBheWxvYWQgb3IgcGF5bG9hZFsibW9kZWwiXSBpcyBOb25lOgogICAgICAgIHBheWxvYWRbIm1vZGVsIl0gPSBPUEVOQUlfREVGQVVMVF9NT0RFTAogICAgaGVhZGVycyA9IGJ1aWxkX2hlYWRlcnMoZGljdChyZXF1ZXN0LmhlYWRlcnMpKQogICAgcmVzcCA9IGZvcndhcmRfanNvbigiL3YxL3Jlc3BvbnNlcyIsIHBheWxvYWQsIGhlYWRlcnMsIGRpY3QocmVxdWVzdC5xdWVyeV9wYXJhbXMpKQogICAgcmV0dXJuIFJlc3BvbnNlKAogICAgICAgIGNvbnRlbnQ9cmVzcC5jb250ZW50LAogICAgICAgIHN0YXR1c19jb2RlPXJlc3Auc3RhdHVzX2NvZGUsCiAgICAgICAgbWVkaWFfdHlwZT1yZXNwLmhlYWRlcnMuZ2V0KCJDb250ZW50LVR5cGUiLCAiYXBwbGljYXRpb24vanNvbiIpLAogICAgKQoKCiMgLS0tLS0tLS0tLS0tLS0tLSBjbGllbnQgLS0tLS0tLS0tLS0tLS0tLQpjbGFzcyBPcGVuQUlQcm94eUNsaWVudDoKICAgICIiIgogICAgU2ltcGxlIGNsaWVudCBmb3IgdGhlIGxvY2FsIHByb3h5LgogICAgRGVmYXVsdCBiYXNlIHVybCBpcyBodHRwOi8vbG9jYWxob3N0OjgwMDAKICAgIElmIGFwaV9rZXkgaXMgbm90IHByb3ZpZGVkLCBpdCB1c2VzIE9QRU5BSV9BUElfS0VZIGZyb20gZW52aXJvbm1lbnQuCiAgICAiIiIKCiAgICBkZWYgX19pbml0X18oc2VsZiwgYmFzZV91cmw6IHN0ciA9ICJodHRwOi8vbG9jYWxob3N0OjgwMDAiLCBhcGlfa2V5OiBPcHRpb25hbFtzdHJdID0gTm9uZSk6CiAgICAgICAgc2VsZi5iYXNlX3VybCA9IGJhc2VfdXJsLnJzdHJpcCgiLyIpCiAgICAgICAgc2VsZi5hcGlfa2V5ID0gYXBpX2tleQoKICAgIGRlZiBfaGVhZGVycyhzZWxmKSAtPiBEaWN0W3N0ciwgc3RyXToKICAgICAgICBoZWFkZXJzID0geyJDb250ZW50LVR5cGUiOiAiYXBwbGljYXRpb24vanNvbiJ9CiAgICAgICAga2V5ID0gc2VsZi5hcGlfa2V5IG9yIG9zLmdldGVudigiT1BFTkFJX0FQSV9LRVkiLCAiIikKICAgICAgICBpZiBrZXk6CiAgICAgICAgICAgIGhlYWRlcnNbIkF1dGhvcml6YXRpb24iXSA9IGYiQmVhcmVyIHtrZXl9IgogICAgICAgIHJldHVybiBoZWFkZXJzCgogICAgZGVmIGNoYXQoc2VsZiwgbWVzc2FnZXM6IExpc3RbRGljdFtzdHIsIHN0cl1dLCBtb2RlbDogT3B0aW9uYWxbc3RyXSA9IE5vbmUpIC0+IERpY3Rbc3RyLCBBbnldOgogICAgICAgIGJvZHk6IERpY3Rbc3RyLCBBbnldID0geyJtZXNzYWdlcyI6IG1lc3NhZ2VzfQogICAgICAgIGlmIG1vZGVsOgogICAgICAgICAgICBib2R5WyJtb2RlbCJdID0gbW9kZWwKICAgICAgICByZXNwID0gcmVxdWVzdHMucG9zdCgKICAgICAgICAgICAgZiJ7c2VsZi5iYXNlX3VybH0vdjEvY2hhdC9jb21wbGV0aW9ucyIsCiAgICAgICAgICAgIGhlYWRlcnM9c2VsZi5faGVhZGVycygpLAogICAgICAgICAgICBqc29uPWJvZHksCiAgICAgICAgICAgIHRpbWVvdXQ9NjAsCiAgICAgICAgKQogICAgICAgIHJlc3AucmFpc2VfZm9yX3N0YXR1cygpCiAgICAgICAgcmV0dXJuIHJlc3AuanNvbigpCgogICAgZGVmIGVtYmVkZGluZ3Moc2VsZiwgdGV4dDogQW55LCBtb2RlbDogT3B0aW9uYWxbc3RyXSA9IE5vbmUpIC0+IERpY3Rbc3RyLCBBbnldOgogICAgICAgIGJvZHk6IERpY3Rbc3RyLCBBbnldID0geyJpbnB1dCI6IHRleHR9CiAgICAgICAgaWYgbW9kZWw6CiAgICAgICAgICAgIGJvZHlbIm1vZGVsIl0gPSBtb2RlbAogICAgICAgIHJlc3AgPSByZXF1ZXN0cy5wb3N0KAogICAgICAgICAgICBmIntzZWxmLmJhc2VfdXJsfS92MS9lbWJlZGRpbmdzIiwKICAgICAgICAgICAgaGVhZGVycz1zZWxmLl9oZWFkZXJzKCksCiAgICAgICAgICAgIGpzb249Ym9keSwKICAgICAgICAgICAgdGltZW91dD02MCwKICAgICAgICApCiAgICAgICAgcmVzcC5yYWlzZV9mb3Jfc3RhdHVzKCkKICAgICAgICByZXR1cm4gcmVzcC5qc29uKCkKCiAgICBkZWYgcmVzcG9uc2VzKHNlbGYsIGlucHV0X3RleHQ6IEFueSwgbW9kZWw6IE9wdGlvbmFsW3N0cl0gPSBOb25lKSAtPiBEaWN0W3N0ciwgQW55XToKICAgICAgICBib2R5OiBEaWN0W3N0ciwgQW55XSA9IHsiaW5wdXQiOiBpbnB1dF90ZXh0fQogICAgICAgIGlmIG1vZGVsOgogICAgICAgICAgICBib2R5WyJtb2RlbCJdID0gbW9kZWwKICAgICAgICByZXNwID0gcmVxdWVzdHMucG9zdCgKICAgICAgICAgICAgZiJ7c2VsZi5iYXNlX3VybH0vdjEvcmVzcG9uc2VzIiwKICAgICAgICAgICAgaGVhZGVycz1zZWxmLl9oZWFkZXJzKCksCiAgICAgICAgICAgIGpzb249Ym9keSwKICAgICAgICAgICAgdGltZW91dD02MCwKICAgICAgICApCiAgICAgICAgcmVzcC5yYWlzZV9mb3Jfc3RhdHVzKCkKICAgICAgICByZXR1cm4gcmVzcC5qc29uKCkKCgojIG9wdGlvbmFsIHF1aWNrIHNlbGYgdGVzdCB3aGVuIHJ1bm5pbmcgdGhpcyBmaWxlIGRpcmVjdGx5CmlmIF9fbmFtZV9fID09ICJfX21haW5fXyI6CiAgICAjIHN0YXJ0IHRoZSBzZXJ2ZXIgaW4gYW5vdGhlciB0ZXJtaW5hbCBmaXJzdDoKICAgICMgdXZpY29ybiBvcGVuYWlfcHJveHkub3BlbmFpOmFwcCAtLWhvc3QgMC4wLjAuMCAtLXBvcnQgODAwMCAtLXJlbG9hZAogICAgYyA9IE9wZW5BSVByb3h5Q2xpZW50KCkKICAgIHRyeToKICAgICAgICBwcmludCgiSGVhbHRoOiIsIHJlcXVlc3RzLmdldChmIntjLmJhc2VfdXJsfS8iKS5qc29uKCkpCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgcHJpbnQoIlNlcnZlciBub3QgcnVubmluZzoiLCBlKQo=" +CMD = r''' +set -e +python - <<'PY' +import os, base64, pathlib +code = os.environ["BASE64"] +pathlib.Path("/opt/app").mkdir(parents=True, exist_ok=True) +with open("/opt/app/openai_proxy_app.py","wb") as f: + f.write(base64.b64decode(code)) +print("Wrote /opt/app/openai_proxy_app.py") +PY + +exec gunicorn openai:app \ + --chdir /opt/app \ + --bind 0.0.0.0:8000 \ + --worker-class uvicorn.workers.UvicornWorker \ + --log-level info +'''.strip() +class OpenAIModule: + def __init__(self,project): + self.project = project + self.fastapi_app = self.project.set_function(name="openai",kind="application",image="python:3.11") + self.fastapi_app.with_requirements([ + "fastapi>=0.110,<1.0", + "uvicorn[standard]>=0.29,<1.0", + "gunicorn>=21.2,<22.0", + "requests>=2.31,<3.0", + ]) + self.fastapi_app.set_env("BASE64",BASE64) + self.fastapi_app.set_internal_application_port(8000) + self.fastapi_app.spec.command = "/bin/sh" + self.fastapi_app.spec.args = ["-c", CMD] + + + + + From 277e11d32a969d32502517a3fa2876db1b391d41 Mon Sep 17 00:00:00 2001 From: iguazio-cicd Date: Mon, 17 Nov 2025 13:07:54 +0000 Subject: [PATCH 14/17] chore(readme): auto-update asset tables [skip ci] --- modules/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/README.md b/modules/README.md index 38cb474d3..79da6c416 100644 --- a/modules/README.md +++ b/modules/README.md @@ -9,4 +9,5 @@ | [count_events](https://github.com/mlrun/functions/tree/development/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | | [evidently](https://github.com/mlrun/functions/tree/development/modules/src/evidently) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | | [histogram_data_drift](https://github.com/mlrun/functions/tree/development/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | +| [openai_proxy_app](https://github.com/mlrun/functions/tree/development/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai | From 356cb3841990d37978cda2fc49592aaddd18f839 Mon Sep 17 00:00:00 2001 From: Daniel Perez <100069700+danielperezz@users.noreply.github.com> Date: Mon, 17 Nov 2025 15:51:52 +0200 Subject: [PATCH 15/17] [Evidently] Fill example notebook (#919) * add notebook + rename directory + correct evidently version * remove extra cell --- modules/src/evidently/evidently_iris.ipynb | 37 - .../src/evidently_iris/evidently_iris.ipynb | 1295 +++++++++++++++++ .../evidently_iris.py | 0 .../{evidently => evidently_iris}/item.yaml | 2 +- .../requirements.txt | 2 +- .../test_evidently_iris.py | 0 6 files changed, 1297 insertions(+), 39 deletions(-) delete mode 100644 modules/src/evidently/evidently_iris.ipynb create mode 100644 modules/src/evidently_iris/evidently_iris.ipynb rename modules/src/{evidently => evidently_iris}/evidently_iris.py (100%) rename modules/src/{evidently => evidently_iris}/item.yaml (96%) rename modules/src/{evidently => evidently_iris}/requirements.txt (60%) rename modules/src/{evidently => evidently_iris}/test_evidently_iris.py (100%) diff --git a/modules/src/evidently/evidently_iris.ipynb b/modules/src/evidently/evidently_iris.ipynb deleted file mode 100644 index 54f657bb0..000000000 --- a/modules/src/evidently/evidently_iris.ipynb +++ /dev/null @@ -1,37 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "initial_id", - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/modules/src/evidently_iris/evidently_iris.ipynb b/modules/src/evidently_iris/evidently_iris.ipynb new file mode 100644 index 000000000..c3299f82f --- /dev/null +++ b/modules/src/evidently_iris/evidently_iris.ipynb @@ -0,0 +1,1295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8f92a6bb-e4b4-4b5d-91c7-2e99c97798c6", + "metadata": {}, + "source": [ + "# Evidently Iris Demo\n", + "\n", + "In this notebook, we’ll import the hub’s Evidently demo app, which monitors data quality and drift on Scikit-Learn’s Iris dataset. We’ll run it using the `evaluate()` method with a slightly modified dataset as the monitored data.\n", + "\n", + "The Evidently Iris module demonstrates a simple example of integrating MLRun with Evidently for data monitoring, which you can adapt to fit your own project needs or use as a reference implementation." + ] + }, + { + "cell_type": "markdown", + "id": "a6775277-5f4f-4261-9a06-5c6d87cb85c7", + "metadata": {}, + "source": [ + "## Set up an MLRun project and prepare the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d7a8c256-035f-4261-b494-f3f3cbd8c77c", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun\n", + "project = mlrun.get_or_create_project(\"evidently-demo\",'./evidently-demo')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1e89667f-f84e-492a-a886-61104bc5ce49", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris\n", + "import pandas as pd\n", + "from mlrun.feature_store.api import norm_column_name\n", + "\n", + "iris = load_iris()\n", + "columns = [norm_column_name(col) for col in iris.feature_names]\n", + "current_df = pd.DataFrame(iris.data, columns=columns)\n", + "current_df[\"sepal_length_cm\"] += 0.3 # simulate drift" + ] + }, + { + "cell_type": "markdown", + "id": "af6e56af-c99d-481e-a32e-f7e5eac4ae3a", + "metadata": {}, + "source": [ + "## Get the module from the hub and edit its defaults" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "35a4bb6b-d15e-4bfd-8d04-2fa188cb36cc", + "metadata": {}, + "outputs": [], + "source": [ + "hub_mod = mlrun.get_hub_module(\"hub://evidently_iris\", download_files=True)\n", + "src_file_path = hub_mod.get_module_file_path()" + ] + }, + { + "cell_type": "markdown", + "id": "ba0c043b-7356-44da-b6d2-84eb02718482", + "metadata": {}, + "source": [ + "We need to modify the class defaults to include the Evidently workspace path and project ID parameters. This can be done in one of two ways: either by editing the downloaded source file directly and then evaluating with the standard class, or - as we’ll do now - by adding an inheriting class to the same file and evaluating using that new class.\n", + "\n", + "(Note: this is only needed when runnning the app using `evaluate()`. When setting it as a real-time function we can simply pass the parameters)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4e9253a9-58bd-4732-8eb1-80a7d15b2e7a", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import uuid\n", + "\n", + "ws = Path(\"./evidently_workspace\")\n", + "ws.mkdir(parents=True, exist_ok=True) # will create if missing\n", + "evidently_project_id = str(uuid.uuid4())\n", + "\n", + "wrapper_code = f\"\"\"\n", + "class EvidentlyIrisMonitoringAppWithWorkspaceSet(EvidentlyIrisMonitoringApp):\n", + " def __init__(self) -> None:\n", + " super().__init__(evidently_workspace_path=\"{ws}\", evidently_project_id=\"{evidently_project_id}\")\n", + " \"\"\"\n", + "\n", + "with open(src_file_path, \"a\") as f:\n", + " f.write(wrapper_code)" + ] + }, + { + "cell_type": "markdown", + "id": "5776541f-2d6f-4c10-9246-75fe14e1bbea", + "metadata": {}, + "source": [ + "Now we can actually import it as a module, using the `module()` method" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3742576d-6da2-423d-8c1c-2861712a698f", + "metadata": {}, + "outputs": [], + "source": [ + "app_module = hub_mod.module()\n", + "evidently_app = app_module.EvidentlyIrisMonitoringAppWithWorkspaceSet" + ] + }, + { + "cell_type": "markdown", + "id": "57a81ea8-f203-4152-9492-a0f7b916d02b", + "metadata": {}, + "source": [ + "## Run the app\n", + "We are ready to call `evaluate()` (notice that the run is linked to the current (active) project that we created at the beggining of the notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d8103577-8523-4b64-bd67-e93bbde8dd06", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-17 09:14:43,241 [info] Changing function name - adding `\"-batch\"` suffix: {\"func_name\":\"evidentlyirismonitoringappwithworkspaceset-batch\"}\n", + "> 2025-11-17 09:14:43,580 [info] Storing function: {\"db\":\"http://mlrun-api:8080\",\"name\":\"evidentlyirismonitoringappwithworkspaceset-batch--handler\",\"uid\":\"9ecf72a1bd82498c92d5897809b6a438\"}\n", + "> 2025-11-17 09:14:43,856 [info] downloading v3io:///projects/evidently-demo/artifacts/evidentlyirismonitoringappwithworkspaceset-batch_sample_data.parquet to local temp file\n", + "> 2025-11-17 09:14:43,890 [info] Running evidently app\n", + "> 2025-11-17 09:14:46,214 [info] Logged evidently object\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartendstatekindnamelabelsinputsparametersresultsartifact_uris
evidently-demo0Nov 17 09:14:43NaTcompletedrunevidentlyirismonitoringappwithworkspaceset-batch--handler
v3io_user=iguazio
kind=local
owner=iguazio
host=jupyter-97c64f97b-8qtcv
sample_data
write_output=False
existing_data_handling=fail_on_overlap
stream_profile=None
return={result_name: 'data_drift_test', result_value: 0.5, result_kind: 0, result_status: 1, result_extra_data: '{}'}
evidently_report=store://artifacts/evidently-demo/evidentlyirismonitoringappwithworkspaceset-batch--handler_evidently_report#0@9ecf72a1bd82498c92d5897809b6a438^2f82c069b396f23b4daae81540ffa386b44f165c
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2025-11-17 09:14:46,354 [info] Run execution finished: {\"name\":\"evidentlyirismonitoringappwithworkspaceset-batch--handler\",\"status\":\"completed\"}\n" + ] + } + ], + "source": [ + "# Evaluate directly on the sample data\n", + "run_result = evidently_app.evaluate(\n", + " func_path=hub_mod.get_module_file_path(),\n", + " sample_data=current_df,\n", + " run_local=True)" + ] + }, + { + "cell_type": "markdown", + "id": "2c6843cd-70d4-4e1a-8aa2-52b6ef5b0ec9", + "metadata": {}, + "source": [ + "## Examine the results\n", + "Notice that the 0.5 value in the demo run result is not derived from Evidently’s drift metrics, but is a constant placeholder added for demonstration only.\n", + "\n", + "Let's take a look at the artifact the app generated for us:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7f1680f5-0ee7-4a82-a351-f8348bf398cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "artifact_key = f\"{run_result.metadata.name}_evidently_report\"\n", + "artifact = project.get_artifact(artifact_key)\n", + "artifact.to_dataitem().show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mlrun-base-py311", + "language": "python", + "name": "conda-env-mlrun-base-py311-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/src/evidently/evidently_iris.py b/modules/src/evidently_iris/evidently_iris.py similarity index 100% rename from modules/src/evidently/evidently_iris.py rename to modules/src/evidently_iris/evidently_iris.py diff --git a/modules/src/evidently/item.yaml b/modules/src/evidently_iris/item.yaml similarity index 96% rename from modules/src/evidently/item.yaml rename to modules/src/evidently_iris/item.yaml index c6a2abc2c..262b7e1b7 100644 --- a/modules/src/evidently/item.yaml +++ b/modules/src/evidently_iris/item.yaml @@ -16,6 +16,6 @@ spec: kind: monitoring_application requirements: - scikit-learn~=1.5.2 - - evidently~=0.7.6 + - evidently~=0.7.5 - pandas version: 1.0.0 \ No newline at end of file diff --git a/modules/src/evidently/requirements.txt b/modules/src/evidently_iris/requirements.txt similarity index 60% rename from modules/src/evidently/requirements.txt rename to modules/src/evidently_iris/requirements.txt index bd4abb36f..6bd12d901 100644 --- a/modules/src/evidently/requirements.txt +++ b/modules/src/evidently_iris/requirements.txt @@ -1,3 +1,3 @@ scikit-learn~=1.5.2 -evidently~=0.7.6 +evidently~=0.7.5 pandas \ No newline at end of file diff --git a/modules/src/evidently/test_evidently_iris.py b/modules/src/evidently_iris/test_evidently_iris.py similarity index 100% rename from modules/src/evidently/test_evidently_iris.py rename to modules/src/evidently_iris/test_evidently_iris.py From 284fb2a6d7c42c9da48034c801b708505cd8439d Mon Sep 17 00:00:00 2001 From: iguazio-cicd Date: Mon, 17 Nov 2025 13:53:07 +0000 Subject: [PATCH 16/17] chore(readme): auto-update asset tables [skip ci] --- modules/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/README.md b/modules/README.md index 79da6c416..d22a576ef 100644 --- a/modules/README.md +++ b/modules/README.md @@ -7,7 +7,7 @@ | Name | Description | Kind | Categories | | --- | --- | --- | --- | | [count_events](https://github.com/mlrun/functions/tree/development/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | -| [evidently](https://github.com/mlrun/functions/tree/development/modules/src/evidently) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | +| [evidently_iris](https://github.com/mlrun/functions/tree/development/modules/src/evidently_iris) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | | [histogram_data_drift](https://github.com/mlrun/functions/tree/development/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | | [openai_proxy_app](https://github.com/mlrun/functions/tree/development/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai | From cdbcc2c5df14aa9b00b4635af3eb74dd83e1f8ba Mon Sep 17 00:00:00 2001 From: iguazio-cicd Date: Tue, 18 Nov 2025 08:20:04 +0000 Subject: [PATCH 17/17] chore(readme): auto-update asset tables [skip ci] --- modules/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/README.md b/modules/README.md index ed2ade6d1..c3ed1c597 100644 --- a/modules/README.md +++ b/modules/README.md @@ -10,3 +10,15 @@ | [evidently_iris](https://github.com/mlrun/functions/tree/development/modules/src/evidently_iris) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | | [histogram_data_drift](https://github.com/mlrun/functions/tree/development/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | | [openai_proxy_app](https://github.com/mlrun/functions/tree/development/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai | + + +## Catalog + + +| Name | Description | Kind | Categories | +| --- | --- | --- | --- | +| [count_events](https://github.com/mlrun/functions/tree/development/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | +| [evidently_iris](https://github.com/mlrun/functions/tree/development/modules/src/evidently_iris) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | +| [histogram_data_drift](https://github.com/mlrun/functions/tree/development/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | +| [openai_proxy_app](https://github.com/mlrun/functions/tree/development/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai | +