From 01267fdb7a454d7e55c335af0a8aa1ec5384343a Mon Sep 17 00:00:00 2001 From: Zhengfei Wang <38847871+zhengfeiwang@users.noreply.github.com> Date: Mon, 17 Jun 2024 17:44:01 +0800 Subject: [PATCH 1/2] [fundamental][bugfix] Pin `tenacity`<8.4.0 to fix import-linter CI (#3424) # Description Pin `tenacity` to avoid issue described in https://github.com/langchain-ai/langchain/issues/22972. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --- .github/workflows/build_doc_ci.yml | 4 ++-- .github/workflows/promptflow-executor-e2e-test.yml | 2 +- .github/workflows/promptflow-executor-unit-test.yml | 2 +- .github/workflows/promptflow-import-linter.yml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_doc_ci.yml b/.github/workflows/build_doc_ci.yml index a2176567a03..c9f48e02669 100644 --- a/.github/workflows/build_doc_ci.yml +++ b/.github/workflows/build_doc_ci.yml @@ -52,7 +52,7 @@ jobs: shell: powershell working-directory: scripts/docs/ run: |- - pip install langchain + pip install langchain tenacity<8.4.0 ./doc_generation.ps1 -WithReferenceDoc:$true -WarningAsError:$true # Note: We have this job separately because some error may missing when build link check exists. @@ -85,5 +85,5 @@ jobs: shell: powershell working-directory: scripts/docs/ run: |- - pip install langchain + pip install langchain tenacity<8.4.0 ./doc_generation.ps1 -WithReferenceDoc:$true -WarningAsError:$true -BuildLinkCheck diff --git a/.github/workflows/promptflow-executor-e2e-test.yml b/.github/workflows/promptflow-executor-e2e-test.yml index 93af8cdb637..705b02a117d 100644 --- a/.github/workflows/promptflow-executor-e2e-test.yml +++ b/.github/workflows/promptflow-executor-e2e-test.yml @@ -148,7 +148,7 @@ jobs: run: | gci env:* | sort-object name az account show - pip install langchain-community + pip install langchain-community tenacity<8.4.0 # numexpr is required by langchain in e2e tests. pip install numexpr python scripts/building/run_coverage_tests.py ` diff --git a/.github/workflows/promptflow-executor-unit-test.yml b/.github/workflows/promptflow-executor-unit-test.yml index 214a82495db..969fcff2757 100644 --- a/.github/workflows/promptflow-executor-unit-test.yml +++ b/.github/workflows/promptflow-executor-unit-test.yml @@ -150,7 +150,7 @@ jobs: run: | gci env:* | sort-object name az account show - pip install langchain-community + pip install langchain-community tenacity<8.4.0 python scripts/building/run_coverage_tests.py ` -p ${{ env.testWorkingDirectory }}/promptflow ` -t ${{ env.testWorkingDirectory }}/tests/executor/unittests ` diff --git a/.github/workflows/promptflow-import-linter.yml b/.github/workflows/promptflow-import-linter.yml index cafa3bfc81d..51a2a07bd29 100644 --- a/.github/workflows/promptflow-import-linter.yml +++ b/.github/workflows/promptflow-import-linter.yml @@ -67,5 +67,5 @@ jobs: echo "=== promptflow-azure full lints ===" poetry run pip install langchain + poetry run pip install "tenacity<8.4.0" poetry run python ${{ github.workspace }}/scripts/import_linter/import_linter.py - From 32115d397b888be86eaea9b54039f5dd20f55dc0 Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Mon, 17 Jun 2024 09:39:59 -0700 Subject: [PATCH 2/2] [Perf] Evaluate API: Support parallelized evaluator batch run through pf.run (#3380) # Description The change in this PR addresses the performance issues we were seeing with the Evaluate API. This is the first step in optimizing performance. The improvements include: - Parallelizing the pf.run for evaluators. Previously, it ran sequentially, contributing to most of the latency. - Addressing the slowness of the import Evaluate API, which was due to the import of MLClient. - Using threads to infer signatures for eval batch runs instead of processes. (This change has been moved to a seperated PR: https://github.com/microsoft/promptflow/pull/3412) **Improvements from this change:** Windows OS, remote tracking disabled - 4 evaluators, 100 rows: - Previous (pf.run without threadpool): 320 secs - Current (pf.run with threadpool): 78 secs (~75% improvement) - 1 evaluator, 1 row: - Previous: 53 secs - Current: 17 secs (~68% improvement) Investigation details can be found [here](https://microsoft-my.sharepoint.com/:w:/p/ninhu/ETB_zdMkFrdAuf3Lcg9ssrUB6RVmyuFs5Un1G74O1HlwSA?e=23ngPm) # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [ ] Title of the pull request is clear and informative. - [ ] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes. --------- Co-authored-by: Clement Wang <47586720+wangchao1230@users.noreply.github.com> --- .../promptflow/_sdk/_constants.py | 1 + .../_sdk/_utilities/general_utils.py | 13 +- .../promptflow/evals/_constants.py | 8 +- .../evaluate/_batch_run_client/__init__.py | 8 + .../_batch_run_client/batch_run_context.py | 32 + .../code_client.py | 51 +- .../_batch_run_client/proxy_client.py | 40 ++ .../evals/evaluate/_code_client/__init__.py | 3 - .../promptflow/evals/evaluate/_eval_run.py | 152 ++--- .../promptflow/evals/evaluate/_evaluate.py | 55 +- .../tests/evals/e2etests/test_evaluate.py | 30 +- .../evals/e2etests/test_metrics_upload.py | 68 +- .../evals/unittests/test_batch_run_context.py | 2 +- .../tests/evals/unittests/test_evaluate.py | 2 +- ...upload_TestMetricsUpload_test_e2e_run.yaml | 596 ------------------ 15 files changed, 255 insertions(+), 806 deletions(-) create mode 100644 src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/__init__.py create mode 100644 src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py rename src/promptflow-evals/promptflow/evals/evaluate/{_code_client => _batch_run_client}/code_client.py (80%) create mode 100644 src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py delete mode 100644 src/promptflow-evals/promptflow/evals/evaluate/_code_client/__init__.py delete mode 100644 src/promptflow-evals/tests/recordings/azure/test_metrics_upload_TestMetricsUpload_test_e2e_run.yaml diff --git a/src/promptflow-devkit/promptflow/_sdk/_constants.py b/src/promptflow-devkit/promptflow/_sdk/_constants.py index 6bddceb87fb..16ff8996a36 100644 --- a/src/promptflow-devkit/promptflow/_sdk/_constants.py +++ b/src/promptflow-devkit/promptflow/_sdk/_constants.py @@ -98,6 +98,7 @@ def _prepare_home_dir() -> Path: PF_TRACE_CONTEXT_ATTR = "attributes" PF_SERVICE_DEBUG = "PF_SERVICE_DEBUG" PF_SYSTEM_METRICS_PREFIX = "__pf__" +PF_FLOW_ENTRY_IN_TMP = "PF_FLOW_ENTRY_IN_TMP" LOCAL_MGMT_DB_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite").resolve() LOCAL_MGMT_DB_SESSION_ACQUIRE_LOCK_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite.lock").resolve() diff --git a/src/promptflow-devkit/promptflow/_sdk/_utilities/general_utils.py b/src/promptflow-devkit/promptflow/_sdk/_utilities/general_utils.py index 51e88a0f3cf..4adacdd9b78 100644 --- a/src/promptflow-devkit/promptflow/_sdk/_utilities/general_utils.py +++ b/src/promptflow-devkit/promptflow/_sdk/_utilities/general_utils.py @@ -53,6 +53,7 @@ NODE, NODE_VARIANTS, NODES, + PF_FLOW_ENTRY_IN_TMP, PROMPT_FLOW_DIR_NAME, REFRESH_CONNECTIONS_DIR_LOCK_PATH, REGISTRY_URI_PREFIX, @@ -1019,8 +1020,16 @@ def create_temp_flex_flow_yaml_core(entry: Union[str, PathLike, Callable], code: logger.warning(f"Found existing {flow_yaml_path.as_posix()}, will not respect it in runtime.") with open(flow_yaml_path, "r", encoding=DEFAULT_ENCODING) as f: existing_content = f.read() - if not is_local_module(entry_string=entry, code=code): - logger.debug(f"Entry {entry} is not found in local, it's snapshot will be empty.") + + create_yaml_in_tmp = False + if os.environ.get(PF_FLOW_ENTRY_IN_TMP, "False").lower() == "true": + logger.debug("PF_FLOW_ENTRY_IN_TMP is set to true, its snapshot will be empty.") + create_yaml_in_tmp = True + elif not is_local_module(entry_string=entry, code=code): + logger.debug(f"Entry {entry} is not found in local, its snapshot will be empty.") + create_yaml_in_tmp = True + + if create_yaml_in_tmp: # make sure run name is from entry instead of random folder name temp_dir = tempfile.mkdtemp(prefix=_sanitize_python_variable_name(entry) + "_") flow_yaml_path = Path(temp_dir) / FLOW_FLEX_YAML diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py index 02d48d1f9b9..c31b88322e3 100644 --- a/src/promptflow-evals/promptflow/evals/_constants.py +++ b/src/promptflow-evals/promptflow/evals/_constants.py @@ -14,11 +14,13 @@ class EvaluationMetrics: class Prefixes: - _INPUTS = 'inputs.' - _OUTPUTS = 'outputs.' - _TGT_OUTPUTS = '__outputs.' + _INPUTS = "inputs." + _OUTPUTS = "outputs." + _TGT_OUTPUTS = "__outputs." DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json" CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4 + +BATCH_RUN_TIMEOUT = 3600 diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/__init__.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/__init__.py new file mode 100644 index 00000000000..5f811e4513a --- /dev/null +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/__init__.py @@ -0,0 +1,8 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from .batch_run_context import BatchRunContext +from .code_client import CodeClient +from .proxy_client import ProxyClient + +__all__ = ["CodeClient", "ProxyClient", "BatchRunContext"] diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py new file mode 100644 index 00000000000..f9b55f288ff --- /dev/null +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py @@ -0,0 +1,32 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +import os + +from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP +from promptflow._utils.user_agent_utils import ClientUserAgentUtil +from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api + +from ..._user_agent import USER_AGENT +from .code_client import CodeClient +from .proxy_client import ProxyClient + + +class BatchRunContext: + def __init__(self, client): + self.client = client + + def __enter__(self): + if isinstance(self.client, CodeClient): + ClientUserAgentUtil.append_user_agent(USER_AGENT) + inject_openai_api() + + if isinstance(self.client, ProxyClient): + os.environ[PF_FLOW_ENTRY_IN_TMP] = "true" + + def __exit__(self, exc_type, exc_val, exc_tb): + if isinstance(self.client, CodeClient): + recover_openai_api() + + if isinstance(self.client, ProxyClient): + os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_code_client/code_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py similarity index 80% rename from src/promptflow-evals/promptflow/evals/evaluate/_code_client/code_client.py rename to src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py index 0c72380c434..10336f80ecb 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_code_client/code_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py @@ -7,31 +7,15 @@ import pandas as pd -from promptflow._utils.user_agent_utils import ClientUserAgentUtil -from promptflow.evals.evaluate._utils import _apply_column_mapping, load_jsonl, _has_aggregator -from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor -from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api from promptflow.contracts.types import AttrDict +from promptflow.evals.evaluate._utils import _apply_column_mapping, _has_aggregator, load_jsonl +from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor -from ..._user_agent import USER_AGENT +from ..._constants import BATCH_RUN_TIMEOUT LOGGER = logging.getLogger(__name__) -class BatchRunContext: - def __init__(self, client): - self.client = client - - def __enter__(self): - if isinstance(self.client, CodeClient): - ClientUserAgentUtil.append_user_agent(USER_AGENT) - inject_openai_api() - - def __exit__(self, exc_type, exc_val, exc_tb): - if isinstance(self.client, CodeClient): - recover_openai_api() - - class CodeRun: def __init__(self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs): self.run = run @@ -40,22 +24,27 @@ def __init__(self, run, input_data, evaluator_name=None, aggregated_metrics=None self.aggregated_metrics = aggregated_metrics def get_result_df(self, exclude_inputs=False): - result_df = self.run.result(timeout=60 * 60) + result_df = self.run.result(timeout=BATCH_RUN_TIMEOUT) if exclude_inputs: result_df = result_df.drop(columns=[col for col in result_df.columns if col.startswith("inputs.")]) return result_df def get_aggregated_metrics(self): try: - aggregated_metrics = self.aggregated_metrics.result(timeout=60 * 60) \ - if self.aggregated_metrics is not None else None + aggregated_metrics = ( + self.aggregated_metrics.result(timeout=BATCH_RUN_TIMEOUT) + if self.aggregated_metrics is not None + else None + ) except Exception as ex: LOGGER.debug(f"Error calculating metrics for evaluator {self.evaluator_name}, failed with error {str(ex)}") aggregated_metrics = None if not isinstance(aggregated_metrics, dict): - LOGGER.warning(f"Aggregated metrics for evaluator {self.evaluator_name}" - f" is not a dictionary will not be logged as metrics") + LOGGER.warning( + f"Aggregated metrics for evaluator {self.evaluator_name}" + f" is not a dictionary will not be logged as metrics" + ) aggregated_metrics = aggregated_metrics if isinstance(aggregated_metrics, dict) else {} @@ -71,8 +60,11 @@ def _calculate_metric(self, evaluator, input_df, column_mapping, evaluator_name) row_metric_results = [] input_df = _apply_column_mapping(input_df, column_mapping) # Ignoring args and kwargs from the signature since they are usually catching extra arguments - parameters = {param.name for param in inspect.signature(evaluator).parameters.values() - if param.name not in ['args', 'kwargs']} + parameters = { + param.name + for param in inspect.signature(evaluator).parameters.values() + if param.name not in ["args", "kwargs"] + } for value in input_df.to_dict("records"): # Filter out only the parameters that are present in the input data # if no parameters then pass data as is @@ -83,7 +75,7 @@ def _calculate_metric(self, evaluator, input_df, column_mapping, evaluator_name) try: result = row_metric_future.result() if not isinstance(result, dict): - result = {'output': result} + result = {"output": result} row_metric_results.append(result) except Exception as ex: # pylint: disable=broad-except msg_1 = f"Error calculating value for row {row_number} for metric {evaluator_name}, " @@ -114,8 +106,9 @@ def _calculate_aggregations(self, evaluator, run): aggregated_output = aggr_func(aggregate_input) return aggregated_output except Exception as ex: - LOGGER.warning(f"Error calculating aggregations for evaluator {run.evaluator_name}," - f" failed with error {str(ex)}") + LOGGER.warning( + f"Error calculating aggregations for evaluator {run.evaluator_name}," f" failed with error {str(ex)}" + ) return None def run(self, flow, data, evaluator_name=None, column_mapping=None, **kwargs): diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py new file mode 100644 index 00000000000..4c58b729483 --- /dev/null +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py @@ -0,0 +1,40 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +import logging + +import numpy as np + +from promptflow.client import PFClient +from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor + +from ..._constants import BATCH_RUN_TIMEOUT + +LOGGER = logging.getLogger(__name__) + + +class ProxyRun: + def __init__(self, run, **kwargs): + self.run = run + + +class ProxyClient: + def __init__(self, pf_client: PFClient): + self._pf_client = pf_client + self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread") + + def run(self, flow, data, column_mapping=None, **kwargs): + eval_future = self._thread_pool.submit( + self._pf_client.run, flow, data=data, column_mapping=column_mapping, **kwargs + ) + return ProxyRun(run=eval_future) + + def get_details(self, proxy_run, all_results=False): + run = proxy_run.run.result(timeout=BATCH_RUN_TIMEOUT) + result_df = self._pf_client.get_details(run, all_results=all_results) + result_df.replace("(Failed)", np.nan, inplace=True) + return result_df + + def get_metrics(self, proxy_run): + run = proxy_run.run.result(timeout=BATCH_RUN_TIMEOUT) + return self._pf_client.get_metrics(run) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_code_client/__init__.py b/src/promptflow-evals/promptflow/evals/evaluate/_code_client/__init__.py deleted file mode 100644 index 1adb9bbdf71..00000000000 --- a/src/promptflow-evals/promptflow/evals/evaluate/_code_client/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .code_client import BatchRunContext, CodeClient - -__all__ = ["CodeClient", "BatchRunContext"] diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py b/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py index a456fd45258..7ba60c72c50 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py @@ -1,38 +1,37 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- -from typing import Any, Dict, Optional, Type - import dataclasses import json import logging import os import posixpath -import requests +import time import uuid +from typing import Any, Dict, Optional, Type +from urllib.parse import urlparse -from azure.ai.ml import MLClient +import requests from azure.storage.blob import BlobClient from requests.adapters import HTTPAdapter -from urllib.parse import urlparse from urllib3.util.retry import Retry from promptflow.evals._version import VERSION -import time LOGGER = logging.getLogger(__name__) @dataclasses.dataclass -class RunInfo(): +class RunInfo: """ A holder for run info, needed for logging. """ + run_id: str experiment_id: str @staticmethod - def generate() -> 'RunInfo': + def generate() -> "RunInfo": """ Generate the new RunInfo instance with the RunID and Experiment ID. """ @@ -44,6 +43,7 @@ def generate() -> 'RunInfo': class Singleton(type): """Singleton class, which will be used as a metaclass.""" + _instances = {} def __call__(cls, *args, **kwargs): @@ -63,7 +63,7 @@ def destroy(cls: Type) -> None: class EvalRun(metaclass=Singleton): - ''' + """ The simple singleton run class, used for accessing artifact store. :param run_name: The name of the run. @@ -78,21 +78,22 @@ class EvalRun(metaclass=Singleton): :type workspace_name: str :param ml_client: The ml client used for authentication into Azure. :type ml_client: MLClient - ''' + """ _MAX_RETRIES = 5 _BACKOFF_FACTOR = 2 _TIMEOUT = 5 _SCOPE = "https://management.azure.com/.default" - def __init__(self, - run_name: Optional[str], - tracking_uri: str, - subscription_id: str, - group_name: str, - workspace_name: str, - ml_client: MLClient - ): + def __init__( + self, + run_name: Optional[str], + tracking_uri: str, + subscription_id: str, + group_name: str, + workspace_name: str, + ml_client, + ): """ Constructor """ @@ -101,7 +102,7 @@ def __init__(self, self._subscription_id: str = subscription_id self._resource_group_name: str = group_name self._workspace_name: str = workspace_name - self._ml_client: MLClient = ml_client + self._ml_client = ml_client self._url_base = urlparse(self._tracking_uri).netloc self._is_broken = self._start_run() self._is_terminated = False @@ -117,9 +118,7 @@ def _get_scope(self): :rtype: str """ return ( - "/subscriptions/{}/resourceGroups/{}/providers" - "/Microsoft.MachineLearningServices" - "/workspaces/{}" + "/subscriptions/{}/resourceGroups/{}/providers" "/Microsoft.MachineLearningServices" "/workspaces/{}" ).format( self._subscription_id, self._resource_group_name, @@ -133,34 +132,25 @@ def _start_run(self) -> bool: marked as broken and the logging will be switched off. :returns: True if the run has started and False otherwise. """ - url = ( - f"https://{self._url_base}/mlflow/v2.0" - f"{self._get_scope()}/api/2.0/mlflow/runs/create") + url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/create" body = { "experiment_id": "0", "user_id": "promptflow-evals", "start_time": int(time.time() * 1000), - "tags": [ - { - "key": "mlflow.user", - "value": "promptflow-evals" - } - ] + "tags": [{"key": "mlflow.user", "value": "promptflow-evals"}], } - response = self.request_with_retry( - url=url, - method='POST', - json_dict=body - ) + response = self.request_with_retry(url=url, method="POST", json_dict=body) if response.status_code != 200: self.info = RunInfo.generate() - LOGGER.error(f"The run failed to start: {response.status_code}: {response.text}." - "The results will be saved locally, but will not be logged to Azure.") + LOGGER.error( + f"The run failed to start: {response.status_code}: {response.text}." + "The results will be saved locally, but will not be logged to Azure." + ) return True parsed_response = response.json() self.info = RunInfo( - run_id=parsed_response['run']['info']['run_id'], - experiment_id=parsed_response['run']['info']['experiment_id'], + run_id=parsed_response["run"]["info"]["run_id"], + experiment_id=parsed_response["run"]["info"]["experiment_id"], ) return False @@ -174,28 +164,22 @@ def end_run(self, status: str) -> None: """ if status not in ("FINISHED", "FAILED", "KILLED"): raise ValueError( - f"Incorrect terminal status {status}. " - "Valid statuses are \"FINISHED\", \"FAILED\" and \"KILLED\".") + f"Incorrect terminal status {status}. " 'Valid statuses are "FINISHED", "FAILED" and "KILLED".' + ) if self._is_terminated: LOGGER.warning("Unable to stop run because it was already terminated.") return if self._is_broken: LOGGER.error("Unable to stop run because the run failed to start.") return - url = ( - f"https://{self._url_base}/mlflow/v2.0" - f"{self._get_scope()}/api/2.0/mlflow/runs/update") + url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/update" body = { "run_uuid": self.info.run_id, "status": status, "end_time": int(time.time() * 1000), - "run_id": self.info.run_id + "run_id": self.info.run_id, } - response = self.request_with_retry( - url=url, - method='POST', - json_dict=body - ) + response = self.request_with_retry(url=url, method="POST", json_dict=body) if response.status_code != 200: LOGGER.error("Unable to terminate the run.") Singleton.destroy(EvalRun) @@ -209,25 +193,20 @@ def get_run_history_uri(self) -> str: f"https://{self._url_base}" "/history/v1.0" f"{self._get_scope()}" - f'/experimentids/{self.info.experiment_id}/runs/{self.info.run_id}' + f"/experimentids/{self.info.experiment_id}/runs/{self.info.run_id}" ) def get_artifacts_uri(self) -> str: """ Returns the url to upload the artifacts. """ - return self.get_run_history_uri() + '/artifacts/batch/metadata' + return self.get_run_history_uri() + "/artifacts/batch/metadata" def get_metrics_url(self): """ Return the url needed to track the mlflow metrics. """ - return ( - f"https://{self._url_base}" - "/mlflow/v2.0" - f"{self._get_scope()}" - f'/api/2.0/mlflow/runs/log-metric' - ) + return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric" def _get_token(self): """The simple method to get token from the MLClient.""" @@ -237,11 +216,7 @@ def _get_token(self): return self._ml_client._credential.get_token(EvalRun._SCOPE) def request_with_retry( - self, - url: str, - method: str, - json_dict: Dict[str, Any], - headers: Optional[Dict[str, str]] = None + self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None ) -> requests.Response: """ Send the request with retries. @@ -258,8 +233,8 @@ def request_with_retry( """ if headers is None: headers = {} - headers['User-Agent'] = f'promptflow/{VERSION}' - headers['Authorization'] = f'Bearer {self._get_token().token}' + headers["User-Agent"] = f"promptflow/{VERSION}" + headers["Authorization"] = f"Bearer {self._get_token().token}" retry = Retry( total=EvalRun._MAX_RETRIES, connect=EvalRun._MAX_RETRIES, @@ -268,18 +243,12 @@ def request_with_retry( status=EvalRun._MAX_RETRIES, status_forcelist=(408, 429, 500, 502, 503, 504), backoff_factor=EvalRun._BACKOFF_FACTOR, - allowed_methods=None + allowed_methods=None, ) adapter = HTTPAdapter(max_retries=retry) session = requests.Session() session.mount("https://", adapter) - return session.request( - method, - url, - headers=headers, - json=json_dict, - timeout=EvalRun._TIMEOUT - ) + return session.request(method, url, headers=headers, json=json_dict, timeout=EvalRun._TIMEOUT) def _log_error(self, failed_op: str, response: requests.Response) -> None: """ @@ -318,42 +287,39 @@ def log_artifact(self, artifact_folder: str) -> None: return # First we will list the files and the appropriate remote paths for them. upload_path = os.path.basename(os.path.normpath(artifact_folder)) - remote_paths = {'paths': []} + remote_paths = {"paths": []} local_paths = [] for (root, _, filenames) in os.walk(artifact_folder): if root != artifact_folder: rel_path = os.path.relpath(root, artifact_folder) - if rel_path != '.': + if rel_path != ".": upload_path = posixpath.join(upload_path, rel_path) for f in filenames: remote_file_path = posixpath.join(upload_path, f) - remote_paths['paths'].append({'path': remote_file_path}) + remote_paths["paths"].append({"path": remote_file_path}) local_file_path = os.path.join(root, f) local_paths.append(local_file_path) # Now we need to reserve the space for files in the artifact store. headers = { - 'Content-Type': "application/json", - 'Accept': "application/json", - 'Content-Length': str(len(json.dumps(remote_paths))), - 'x-ms-client-request-id': str(uuid.uuid1()), + "Content-Type": "application/json", + "Accept": "application/json", + "Content-Length": str(len(json.dumps(remote_paths))), + "x-ms-client-request-id": str(uuid.uuid1()), } response = self.request_with_retry( - url=self.get_artifacts_uri(), - method='POST', - json_dict=remote_paths, - headers=headers + url=self.get_artifacts_uri(), method="POST", json_dict=remote_paths, headers=headers ) if response.status_code != 200: self._log_error("allocate Blob for the artifact", response) return - empty_artifacts = response.json()['artifactContentInformation'] + empty_artifacts = response.json()["artifactContentInformation"] # The response from Azure contains the URL with SAS, that allows to upload file to the # artifact store. - for local, remote in zip(local_paths, remote_paths['paths']): - artifact_loc = empty_artifacts[remote['path']] - blob_client = BlobClient.from_blob_url(artifact_loc['contentUri'], max_single_put_size=32 * 1024 * 1024) - with open(local, 'rb') as fp: + for local, remote in zip(local_paths, remote_paths["paths"]): + artifact_loc = empty_artifacts[remote["path"]] + blob_client = BlobClient.from_blob_url(artifact_loc["contentUri"], max_single_put_size=32 * 1024 * 1024) + with open(local, "rb") as fp: blob_client.upload_blob(fp) def log_metric(self, key: str, value: float) -> None: @@ -374,15 +340,15 @@ def log_metric(self, key: str, value: float) -> None: "value": value, "timestamp": int(time.time() * 1000), "step": 0, - "run_id": self.info.run_id + "run_id": self.info.run_id, } response = self.request_with_retry( url=self.get_metrics_url(), - method='POST', + method="POST", json_dict=body, ) if response.status_code != 200: - self._log_error('save metrics', response) + self._log_error("save metrics", response) @staticmethod def get_instance(*args, **kwargs) -> "EvalRun": diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py index b0261cf3bbc..74496440cf8 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py @@ -9,19 +9,19 @@ import pandas as pd from promptflow._sdk._constants import LINE_NUMBER +from promptflow._sdk._telemetry import ActivityType, log_activity +from promptflow._sdk._telemetry.telemetry import get_telemetry_logger from promptflow.client import PFClient from .._constants import CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT, EvaluationMetrics, Prefixes from .._user_agent import USER_AGENT -from ._code_client import BatchRunContext, CodeClient +from ._batch_run_client import BatchRunContext, CodeClient, ProxyClient from ._utils import ( _apply_column_mapping, _log_metrics_and_instance_results, _trace_destination_from_project_scope, _write_output, ) -from promptflow._sdk._telemetry import ActivityType, log_activity -from promptflow._sdk._telemetry.telemetry import get_telemetry_logger def _aggregate_metrics(df, evaluators) -> Dict[str, float]: @@ -42,10 +42,10 @@ def _aggregate_metrics(df, evaluators) -> Dict[str, float]: # Check the namespace of the evaluator module = inspect.getmodule(evaluators[evaluator_name]) if ( - module and - module.__name__.startswith("promptflow.evals.evaluators.") and - metric_name.endswith("_score") and - metric_name.replace("_score", "") in content_safety_metrics + module + and module.__name__.startswith("promptflow.evals.evaluators.") + and metric_name.endswith("_score") + and metric_name.replace("_score", "") in content_safety_metrics ): content_safety_cols.append(col) @@ -53,10 +53,10 @@ def _aggregate_metrics(df, evaluators) -> Dict[str, float]: defect_rates = {} for col in content_safety_df.columns: defect_rate_name = col.replace("_score", "_defect_rate") - col_with_numeric_values = pd.to_numeric(content_safety_df[col], errors='coerce') + col_with_numeric_values = pd.to_numeric(content_safety_df[col], errors="coerce") defect_rates[defect_rate_name] = round( - np.sum(col_with_numeric_values >= CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT) / - col_with_numeric_values.count(), + np.sum(col_with_numeric_values >= CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT) + / col_with_numeric_values.count(), 2, ) @@ -115,8 +115,7 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj try: initial_data_df = pd.read_json(data, lines=True) except Exception as e: - raise ValueError( - f"Failed to load data from {data}. Please validate it is a valid jsonl data. Error: {str(e)}.") + raise ValueError(f"Failed to load data from {data}. Please validate it is a valid jsonl data. Error: {str(e)}.") return initial_data_df @@ -155,13 +154,14 @@ def _validate_columns( _validate_input_data_for_evaluator(evaluator, evaluator_name, new_df) -def _apply_target_to_data(target: Callable, - data: str, - pf_client: PFClient, - initial_data: pd.DataFrame, - evaluation_name: Optional[str] = None, - _run_name: Optional[str] = None) -> Tuple[pd.DataFrame, - Set[str]]: +def _apply_target_to_data( + target: Callable, + data: str, + pf_client: PFClient, + initial_data: pd.DataFrame, + evaluation_name: Optional[str] = None, + _run_name: Optional[str] = None, +) -> Tuple[pd.DataFrame, Set[str]]: """ Apply the target function to the data set and return updated data and generated columns. @@ -187,12 +187,12 @@ def _apply_target_to_data(target: Callable, data=data, properties={"runType": "eval_run", "isEvaluatorRun": "true"}, stream=True, - name=_run_name + name=_run_name, ) target_output = pf_client.runs.get_details(run, all_results=True) # Remove input and output prefix generated_columns = { - col[len(Prefixes._OUTPUTS):] for col in target_output.columns if col.startswith(Prefixes._OUTPUTS) + col[len(Prefixes._OUTPUTS) :] for col in target_output.columns if col.startswith(Prefixes._OUTPUTS) } # Sort output by line numbers target_output.set_index(f"inputs.{LINE_NUMBER}", inplace=True) @@ -358,8 +358,7 @@ def evaluate( target_generated_columns = set() if data is not None and target is not None: input_data_df, target_generated_columns, target_run = _apply_target_to_data( - target, data, pf_client, input_data_df, evaluation_name, - _run_name=kwargs.get('_run_name') + target, data, pf_client, input_data_df, evaluation_name, _run_name=kwargs.get("_run_name") ) # Make sure, the default is always in the configuration. @@ -386,8 +385,8 @@ def evaluate( # Batch Run evaluators_info = {} - use_thread_pool = kwargs.get("_use_thread_pool", True) - batch_run_client = CodeClient() if use_thread_pool else pf_client + use_pf_client = kwargs.get("_use_pf_client", True) + batch_run_client = ProxyClient(pf_client) if use_pf_client else CodeClient() with BatchRunContext(batch_run_client): for evaluator_name, evaluator in evaluators.items(): @@ -397,7 +396,7 @@ def evaluate( run=target_run, evaluator_name=evaluator_name, column_mapping=evaluator_config.get(evaluator_name, evaluator_config.get("default", None)), - data=input_data_df if use_thread_pool else data, + data=input_data_df if isinstance(batch_run_client, CodeClient) else data, stream=True, ) @@ -444,9 +443,7 @@ def evaluate( metrics = _aggregate_metrics(evaluators_result_df, evaluators) metrics.update(evaluators_metric) - studio_url = _log_metrics_and_instance_results( - metrics, result_df, trace_destination, target_run - ) + studio_url = _log_metrics_and_instance_results(metrics, result_df, trace_destination, target_run) result = {"rows": result_df.to_dict("records"), "metrics": metrics, "studio_url": studio_url} diff --git a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py index 4842026fafa..08403e21c98 100644 --- a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py +++ b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py @@ -71,7 +71,7 @@ def _get_run_from_run_history(flow_run_id, runs_operation): raise Exception(f"Failed to get run from service. Code: {response.status_code}, text: {response.text}") -@pytest.mark.usefixtures("recording_injection", "vcr_recording") +@pytest.mark.usefixtures("recording_injection") @pytest.mark.e2etest class TestEvaluate: def test_evaluate_with_groundedness_evaluator(self, model_config, data_file): @@ -145,16 +145,18 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file, assert 0 <= metrics.get("content_safety.self_harm_defect_rate") <= 1 assert 0 <= metrics.get("content_safety.hate_unfairness_defect_rate") <= 1 - @pytest.mark.parametrize('use_thread_pool,function,column', [ - (True, answer_evaluator, 'length'), - (False, answer_evaluator, 'length'), - (True, answer_evaluator_int, 'output'), - (False, answer_evaluator_int, 'output'), - (True, answer_evaluator_int_dict, "42"), - (False, answer_evaluator_int_dict, "42"), - ]) - def test_evaluate_python_function(self, data_file, use_thread_pool, - function, column): + @pytest.mark.parametrize( + "use_thread_pool,function,column", + [ + (True, answer_evaluator, "length"), + (False, answer_evaluator, "length"), + (True, answer_evaluator_int, "output"), + (False, answer_evaluator_int, "output"), + (True, answer_evaluator_int_dict, "42"), + (False, answer_evaluator_int_dict, "42"), + ], + ) + def test_evaluate_python_function(self, data_file, use_thread_pool, function, column): # data input_data = pd.read_json(data_file, lines=True) @@ -393,8 +395,7 @@ def test_evaluate_aggregation_with_threadpool(self, data_file, return_json, aggr result = evaluate( data=data_file, evaluators={ - "answer_length": AnswerLength( - return_json=return_json, aggregate_return_json=aggregate_return_json), + "answer_length": AnswerLength(return_json=return_json, aggregate_return_json=aggregate_return_json), "f1_score": F1ScoreEvaluator(), }, ) @@ -418,8 +419,7 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso result = evaluate( data=data_file, evaluators={ - "answer_length": AnswerLength( - return_json=return_json, aggregate_return_json=aggregate_return_json), + "answer_length": AnswerLength(return_json=return_json, aggregate_return_json=aggregate_return_json), "f1_score": F1ScoreEvaluator(), }, _use_thread_pool=False, diff --git a/src/promptflow-evals/tests/evals/e2etests/test_metrics_upload.py b/src/promptflow-evals/tests/evals/e2etests/test_metrics_upload.py index 0f5914583ba..bc0555b17da 100644 --- a/src/promptflow-evals/tests/evals/e2etests/test_metrics_upload.py +++ b/src/promptflow-evals/tests/evals/e2etests/test_metrics_upload.py @@ -2,13 +2,14 @@ import logging import os import pathlib +from unittest.mock import MagicMock, patch + import pytest -from unittest.mock import patch, MagicMock from promptflow.evals.evaluate import _utils as ev_utils from promptflow.evals.evaluate._eval_run import EvalRun -from promptflow.evals.evaluators._f1_score._f1_score import F1ScoreEvaluator from promptflow.evals.evaluate._evaluate import evaluate +from promptflow.evals.evaluators._f1_score._f1_score import F1ScoreEvaluator from promptflow.recording.record_mode import is_live @@ -33,17 +34,18 @@ def questions_file(): @pytest.fixture def setup_data(azure_pf_client, project_scope): run = EvalRun( - run_name='test', + run_name="test", tracking_uri=( - 'https://eastus2.api.azureml.ms/mlflow/v2.0' + "https://eastus2.api.azureml.ms/mlflow/v2.0" f'/subscriptions{project_scope["subscription_id"]}' f'/resourceGroups/{project_scope["resource_group_name"]}' - '/providers/Microsoft.MachineLearningServices' - f'/workspaces/{project_scope["project_name"]}'), + "/providers/Microsoft.MachineLearningServices" + f'/workspaces/{project_scope["project_name"]}' + ), subscription_id=project_scope["subscription_id"], group_name=project_scope["resource_group_name"], workspace_name=project_scope["project_name"], - ml_client=azure_pf_client._ml_client + ml_client=azure_pf_client._ml_client, ) yield run.end_run("FINISHED") @@ -59,9 +61,11 @@ def _assert_no_errors_for_module(self, records, module_names): error_messages = [] if records: error_messages = [ - lg_rec.message for lg_rec in records if lg_rec.levelno == logging.ERROR and ( - lg_rec.name in module_names)] - assert not error_messages, '\n'.join(error_messages) + lg_rec.message + for lg_rec in records + if lg_rec.levelno == logging.ERROR and (lg_rec.name in module_names) + ] + assert not error_messages, "\n".join(error_messages) @pytest.mark.usefixtures("vcr_recording") def test_writing_to_run_history(self, setup_data, caplog): @@ -74,11 +78,11 @@ def test_writing_to_run_history(self, setup_data, caplog): # Just for sanity check let us make sure that the logging actually works mock_response = MagicMock() mock_response.status_code = 418 - with patch('promptflow.evals.evaluate._eval_run.EvalRun.request_with_retry', return_value=mock_response): - ev_utils._write_properties_to_run_history({'test': 42}) - assert any(lg_rec.levelno == logging.ERROR for lg_rec in caplog.records), 'The error log was not captured!' + with patch("promptflow.evals.evaluate._eval_run.EvalRun.request_with_retry", return_value=mock_response): + ev_utils._write_properties_to_run_history({"test": 42}) + assert any(lg_rec.levelno == logging.ERROR for lg_rec in caplog.records), "The error log was not captured!" caplog.clear() - ev_utils._write_properties_to_run_history({'test': 42}) + ev_utils._write_properties_to_run_history({"test": 42}) self._assert_no_errors_for_module(caplog.records, [ev_utils.__name__]) @pytest.mark.usefixtures("vcr_recording") @@ -92,11 +96,11 @@ def test_logging_metrics(self, setup_data, caplog): ev_run = EvalRun.get_instance() mock_response = MagicMock() mock_response.status_code = 418 - with patch('promptflow.evals.evaluate._eval_run.EvalRun.request_with_retry', return_value=mock_response): - ev_run.log_metric('f1', 0.54) - assert any(lg_rec.levelno == logging.ERROR for lg_rec in caplog.records), 'The error log was not captured!' + with patch("promptflow.evals.evaluate._eval_run.EvalRun.request_with_retry", return_value=mock_response): + ev_run.log_metric("f1", 0.54) + assert any(lg_rec.levelno == logging.ERROR for lg_rec in caplog.records), "The error log was not captured!" caplog.clear() - ev_run.log_metric('f1', 0.54) + ev_run.log_metric("f1", 0.54) self._assert_no_errors_for_module(caplog.records, EvalRun.__module__) @pytest.mark.usefixtures("vcr_recording") @@ -110,20 +114,21 @@ def test_log_artifact(self, setup_data, caplog, tmp_path): ev_run = EvalRun.get_instance() mock_response = MagicMock() mock_response.status_code = 418 - with open(os.path.join(tmp_path, 'test.json'), 'w') as fp: - json.dump({'f1': 0.5}, fp) - os.makedirs(os.path.join(tmp_path, 'internal_dir'), exist_ok=True) - with open(os.path.join(tmp_path, 'internal_dir', 'test.json'), 'w') as fp: - json.dump({'internal_f1': 0.6}, fp) - with patch('promptflow.evals.evaluate._eval_run.EvalRun.request_with_retry', return_value=mock_response): + with open(os.path.join(tmp_path, "test.json"), "w") as fp: + json.dump({"f1": 0.5}, fp) + os.makedirs(os.path.join(tmp_path, "internal_dir"), exist_ok=True) + with open(os.path.join(tmp_path, "internal_dir", "test.json"), "w") as fp: + json.dump({"internal_f1": 0.6}, fp) + with patch("promptflow.evals.evaluate._eval_run.EvalRun.request_with_retry", return_value=mock_response): ev_run.log_artifact(tmp_path) - assert any(lg_rec.levelno == logging.ERROR for lg_rec in caplog.records), 'The error log was not captured!' + assert any(lg_rec.levelno == logging.ERROR for lg_rec in caplog.records), "The error log was not captured!" caplog.clear() ev_run.log_artifact(tmp_path) self._assert_no_errors_for_module(caplog.records, EvalRun.__module__) - @pytest.mark.skipif(condition=not is_live(), - reason="promptflow run create files with random names, which cannot be recorded.") + @pytest.mark.skipif( + condition=not is_live(), reason="promptflow run create files with random names, which cannot be recorded." + ) @pytest.mark.usefixtures("vcr_recording") def test_e2e_run_target_fn(self, caplog, project_scope, questions_answers_file): """Test evaluation run logging.""" @@ -145,11 +150,10 @@ def test_e2e_run_target_fn(self, caplog, project_scope, questions_answers_file): target=target_fn, evaluators={"f1": f1_score_eval}, azure_ai_project=project_scope, - _run_name='eval_test_run2' + _run_name="eval_test_run2", ) self._assert_no_errors_for_module(caplog.records, (ev_utils.__name__, EvalRun.__module__)) - @pytest.mark.usefixtures("vcr_recording") def test_e2e_run(self, caplog, project_scope, questions_answers_file): """Test evaluation run logging.""" # Make sure that the URL ending in TraceSessions is in the recording, it is not always being recorded. @@ -162,9 +166,5 @@ def test_e2e_run(self, caplog, project_scope, questions_answers_file): # resourceGroups/00000000-0000-0000-0000-000000000000/providers/Microsoft.MachineLearningServices/ # workspaces/00000 f1_score_eval = F1ScoreEvaluator() - evaluate( - data=questions_answers_file, - evaluators={"f1": f1_score_eval}, - azure_ai_project=project_scope - ) + evaluate(data=questions_answers_file, evaluators={"f1": f1_score_eval}, azure_ai_project=project_scope) self._assert_no_errors_for_module(caplog.records, (ev_utils.__name__, EvalRun.__module__)) diff --git a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py index 9b1f73660da..b67bba15c30 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py +++ b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py @@ -4,7 +4,7 @@ from promptflow.client import PFClient from promptflow.evals._user_agent import USER_AGENT -from promptflow.evals.evaluate._code_client import BatchRunContext, CodeClient +from promptflow.evals.evaluate._batch_run_client import BatchRunContext, CodeClient @pytest.fixture diff --git a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py index d28384f5dc6..17dfd141daf 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py +++ b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py @@ -371,7 +371,7 @@ def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_t def test_evaluate_with_errors(self): """Test evaluate_handle_errors""" data = _get_file("yeti_questions.jsonl") - result = evaluate(data=data, evaluators={"yeti": _yeti_evaluator}, _use_thread_pool=True) + result = evaluate(data=data, evaluators={"yeti": _yeti_evaluator}) result_df = pd.DataFrame(result["rows"]) expected = pd.read_json(data, lines=True) expected.rename(columns={"question": "inputs.question", "answer": "inputs.answer"}, inplace=True) diff --git a/src/promptflow-evals/tests/recordings/azure/test_metrics_upload_TestMetricsUpload_test_e2e_run.yaml b/src/promptflow-evals/tests/recordings/azure/test_metrics_upload_TestMetricsUpload_test_e2e_run.yaml deleted file mode 100644 index 68e4435c612..00000000000 --- a/src/promptflow-evals/tests/recordings/azure/test_metrics_upload_TestMetricsUpload_test_e2e_run.yaml +++ /dev/null @@ -1,596 +0,0 @@ -interactions: -- request: - body: null - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - azure-ai-ml/1.16.1 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.11.5 - (Windows-10-10.0.22631-SP0) - method: GET - uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000 - response: - body: - string: '{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000", - "name": "00000", "type": "Microsoft.MachineLearningServices/workspaces", "location": - "eastus2", "tags": {}, "etag": null, "kind": "Default", "sku": {"name": "Basic", - "tier": "Basic"}, "properties": {"discoveryUrl": "https://eastus2.api.azureml.ms/discovery", "mlFlowTrackingUri": "azureml://eastus2.api.azureml.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000000-0000-0000-0000-000000000000/providers/Microsoft.MachineLearningServices/workspaces/00000"}}' - headers: - cache-control: - - no-cache - content-length: - - '2978' - content-type: - - application/json; charset=utf-8 - expires: - - '-1' - pragma: - - no-cache - strict-transport-security: - - max-age=31536000; includeSubDomains - vary: - - Accept-Encoding - x-cache: - - CONFIG_NOCACHE - x-content-type-options: - - nosniff - x-request-time: - - '0.031' - status: - code: 200 - message: OK -- request: - body: '[{"ver": 1, "name": "Microsoft.ApplicationInsights.Event", "time": "2024-06-06T23:20:59.838896Z", - "sampleRate": 100.0, "iKey": "00000000-0000-0000-0000-000000000000", "tags": - {"foo": "bar"}}]' - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '927' - Content-Type: - - application/json - User-Agent: - - azsdk-python-azuremonitorclient/unknown Python/3.11.5 (Windows-10-10.0.22631-SP0) - method: POST - uri: https://dc.services.visualstudio.com/v2.1/track - response: - body: - string: '{"itemsReceived": 1, "itemsAccepted": 0, "appId": null, "errors": [{"index": - 0, "statusCode": 307, "message": "Ingestion is allowed only from stamp specific - endpoint - Location: https://eastus-8.in.applicationinsights.azure.com/v2.1/track"}]}' - headers: - cache-control: - - max-age=604800 - content-type: - - application/json; charset=utf-8 - location: - - https://eastus-8.in.applicationinsights.azure.com/v2.1/track - server: - - Microsoft-HTTPAPI/2.0 - strict-transport-security: - - max-age=31536000 - transfer-encoding: - - chunked - x-content-type-options: - - nosniff - status: - code: 307 - message: Temporary Redirect -- request: - body: null - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - azure-ai-ml/1.16.1 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.11.5 - (Windows-10-10.0.22631-SP0) - method: GET - uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000 - response: - body: - string: '{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000", - "name": "00000", "type": "Microsoft.MachineLearningServices/workspaces", "location": - "eastus2", "tags": {}, "etag": null, "kind": "Default", "sku": {"name": "Basic", - "tier": "Basic"}, "properties": {"discoveryUrl": "https://eastus2.api.azureml.ms/discovery", "mlFlowTrackingUri": "azureml://eastus2.api.azureml.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000000-0000-0000-0000-000000000000/providers/Microsoft.MachineLearningServices/workspaces/00000"}}' - headers: - cache-control: - - no-cache - content-length: - - '2978' - content-type: - - application/json; charset=utf-8 - expires: - - '-1' - pragma: - - no-cache - strict-transport-security: - - max-age=31536000; includeSubDomains - vary: - - Accept-Encoding - x-cache: - - CONFIG_NOCACHE - x-content-type-options: - - nosniff - x-request-time: - - '0.023' - status: - code: 200 - message: OK -- request: - body: '[{"ver": 1, "name": "Microsoft.ApplicationInsights.Event", "time": "2024-06-06T23:20:59.838896Z", - "sampleRate": 100.0, "iKey": "00000000-0000-0000-0000-000000000000", "tags": - {"foo": "bar"}}]' - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '927' - Content-Type: - - application/json - User-Agent: - - azsdk-python-azuremonitorclient/unknown Python/3.11.5 (Windows-10-10.0.22631-SP0) - method: POST - uri: https://eastus-8.in.applicationinsights.azure.com/v2.1/track - response: - body: - string: '{"itemsReceived": 1, "itemsAccepted": 1, "appId": null, "errors": []}' - headers: - content-type: - - application/json; charset=utf-8 - server: - - Microsoft-HTTPAPI/2.0 - strict-transport-security: - - max-age=31536000 - transfer-encoding: - - chunked - x-content-type-options: - - nosniff - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - promptflow-azure-sdk/0.0.1.dev0 azsdk-python-azuremachinelearningdesignerserviceclient/unknown - Python/3.11.5 (Windows-10-10.0.22631-SP0) - method: GET - uri: https://eastus2.api.azureml.ms/flow/api/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/TraceSessions - response: - body: - string: '{"entityId": "a1391af3-9f38-48e8-862f-db3846e81dd4", "traceCosmosConfiguration": - "None", "traceCosmosStatus": "Initialized", "accountEndpoint": "", "databaseName": - "PromptFlowTraceSession", "resourceArmId": "", "resourceType": 1}' - headers: - connection: - - keep-alive - content-length: - - '486' - content-type: - - application/json; charset=utf-8 - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - transfer-encoding: - - chunked - vary: - - Accept-Encoding - x-content-type-options: - - nosniff - x-request-time: - - '0.533' - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - azure-ai-ml/1.16.1 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.11.5 - (Windows-10-10.0.22631-SP0) - method: GET - uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000 - response: - body: - string: '{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000", - "name": "00000", "type": "Microsoft.MachineLearningServices/workspaces", "location": - "eastus2", "tags": {}, "etag": null, "kind": "Default", "sku": {"name": "Basic", - "tier": "Basic"}, "properties": {"discoveryUrl": "https://eastus2.api.azureml.ms/discovery", "mlFlowTrackingUri": "azureml://eastus2.api.azureml.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000000-0000-0000-0000-000000000000/providers/Microsoft.MachineLearningServices/workspaces/00000"}}' - headers: - cache-control: - - no-cache - content-length: - - '2978' - content-type: - - application/json; charset=utf-8 - expires: - - '-1' - pragma: - - no-cache - strict-transport-security: - - max-age=31536000; includeSubDomains - vary: - - Accept-Encoding - x-cache: - - CONFIG_NOCACHE - x-content-type-options: - - nosniff - x-request-time: - - '0.027' - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - azure-ai-ml/1.16.1 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.11.5 - (Windows-10-10.0.22631-SP0) - method: GET - uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000 - response: - body: - string: '{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000", - "name": "00000", "type": "Microsoft.MachineLearningServices/workspaces", "location": - "eastus2", "tags": {}, "etag": null, "kind": "Default", "sku": {"name": "Basic", - "tier": "Basic"}, "properties": {"discoveryUrl": "https://eastus2.api.azureml.ms/discovery", "mlFlowTrackingUri": "azureml://eastus2.api.azureml.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000000-0000-0000-0000-000000000000/providers/Microsoft.MachineLearningServices/workspaces/00000"}}' - headers: - cache-control: - - no-cache - content-length: - - '2978' - content-type: - - application/json; charset=utf-8 - expires: - - '-1' - pragma: - - no-cache - strict-transport-security: - - max-age=31536000; includeSubDomains - vary: - - Accept-Encoding - x-cache: - - CONFIG_NOCACHE - x-content-type-options: - - nosniff - x-request-time: - - '0.031' - status: - code: 200 - message: OK -- request: - body: '{"experiment_id": "0", "user_id": "promptflow-evals", "start_time": "1717563256142", - "tags": [{"key": "mlflow.user", "value": "promptflow-evals"}]}' - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '145' - Content-Type: - - application/json - User-Agent: - - promptflow/0.0.1.dev0 - method: POST - uri: https://eastus2.api.azureml.ms/mlflow/v2.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/api/2.0/mlflow/runs/create - response: - body: - string: '{"run": {"info": {"run_uuid": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", - "experiment_id": "5ea666e7-ae55-4060-a8a3-71e6e03fab9c", "run_name": "nice_floor_hb1b1h9t", - "user_id": "00000000-0000-0000-0000-000000000000", "status": "RUNNING", "start_time": - "1717733443454", "artifact_uri": "azureml://eastus2.api.azureml.ms/mlflow/v2.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/experiments/0/runs/b797dd76-c228-4a0c-8ba4-0d41f4dce7ad/artifacts", - "lifecycle_stage": "active", "run_id": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad"}, - "data": {"tags": [{"key": "mlflow.user", "value": "promptflow-evals"}, {"key": - "mlflow.rootRunId", "value": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad"}, {"key": - "mlflow.runName", "value": "nice_floor_hb1b1h9t"}, {"key": "mlflow.user", - "value": "Nikolay Rovinskiy"}]}, "inputs": {}}}' - headers: - connection: - - keep-alive - content-length: - - '931' - content-type: - - application/json; charset=utf-8 - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - transfer-encoding: - - chunked - vary: - - Accept-Encoding - x-content-type-options: - - nosniff - x-request-time: - - '0.351' - status: - code: 200 - message: OK -- request: - body: '{"paths": [{"path": "evaluation_results/eval_results.jsonl"}]}' - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '62' - Content-Type: - - application/json - User-Agent: - - promptflow/0.0.1.dev0 - method: POST - uri: https://eastus2.api.azureml.ms/history/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/experimentids/5ea666e7-ae55-4060-a8a3-71e6e03fab9c/runs/b797dd76-c228-4a0c-8ba4-0d41f4dce7ad/artifacts/batch/metadata - response: - body: - string: '{"artifacts": {"evaluation_results/eval_results.jsonl": {"artifactId": - "ExperimentRun/dcid.b797dd76-c228-4a0c-8ba4-0d41f4dce7ad/evaluation_results/eval_results.jsonl", - "origin": "ExperimentRun", "container": "dcid.b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", - "path": "evaluation_results/eval_results.jsonl", "etag": null, "createdTime": - "2024-06-07T04:10:50.0015468+00:00", "dataPath": null, "tags": {}}}, "artifactContentInformation": - {"evaluation_results/eval_results.jsonl": {"contentUri": "https://nirovinswseast5028951403.blob.core.windows.net/azureml/ExperimentRun/dcid.b797dd76-c228-4a0c-8ba4-0d41f4dce7ad/evaluation_results/eval_results.jsonl?sv=2019-07-07&sr=b&sig=dRHHxYUsJlve7j73P4xhC2zP4lGoae7e0O%2FBSwjm8vE%3D&skoid=f4777969-da99-42e7-a95c-5f6396e15e83&sktid=00000000-0000-0000-0000-000000000000&skt=2024-06-07T04%3A00%3A49Z&ske=2024-06-08T12%3A10%3A49Z&sks=b&skv=2019-07-07&st=2024-06-07T04%3A00%3A49Z&se=2024-06-08T04%3A10%3A49Z&sp=rcw", - "origin": "ExperimentRun", "container": "dcid.b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", - "path": "evaluation_results/eval_results.jsonl", "tags": {}}}, "errors": {}}' - headers: - connection: - - keep-alive - content-length: - - '1222' - content-type: - - application/json; charset=utf-8 - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - transfer-encoding: - - chunked - vary: - - Accept-Encoding - x-content-type-options: - - nosniff - x-request-time: - - '1.682' - status: - code: 200 - message: OK -- request: - body: "{\"inputs.question\":\"How long is flight from Earth to LV-426?\",\"inputs.answer\":\"There - is nothing good there.\",\"inputs.ground_truth\":\"39 light years\",\"outputs.f1.f1_score\":0.0,\"line_number\":0}\r\n" - headers: - Accept: - - application/xml - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '192' - Content-Type: - - application/octet-stream - If-None-Match: - - '*' - User-Agent: - - azsdk-python-storage-blob/12.20.0 Python/3.11.5 (Windows-10-10.0.22631-SP0) - x-ms-blob-type: - - BlockBlob - x-ms-date: - - Fri, 07 Jun 2024 04:10:50 GMT - x-ms-version: - - '2024-05-04' - method: PUT - uri: https://nirovinswseast5028951403.blob.core.windows.net/azureml/ExperimentRun/dcid.b797dd76-c228-4a0c-8ba4-0d41f4dce7ad/evaluation_results/eval_results.jsonl?se=2024-06-08T04%3A10%3A49Z&sig=dRHHxYUsJlve7j73P4xhC2zP4lGoae7e0O%2FBSwjm8vE%3D&ske=2024-06-08T12%3A10%3A49Z&skoid=f4777969-da99-42e7-a95c-5f6396e15e83&sks=b&skt=2024-06-07T04%3A00%3A49Z&sktid=00000000-0000-0000-0000-000000000000&skv=2019-07-07&sp=rcw&sr=b&st=2024-06-07T04%3A00%3A49Z&sv=2019-07-07 - response: - body: - string: '' - headers: - content-length: - - '0' - content-md5: - - G3n2KMFFaqFbUGDeX34xXw== - last-modified: - - Fri, 07 Jun 2024 04:10:51 GMT - server: - - Windows-Azure-Blob/1.0 Microsoft-HTTPAPI/2.0 - x-ms-content-crc64: - - 0Myc02lzbG0= - x-ms-request-server-encrypted: - - 'true' - x-ms-version: - - '2024-05-04' - status: - code: 201 - message: Created -- request: - body: '{"runId": "evals_e2etests_target_fn_wqo0_peh_20240606_102622_386974", "properties": - {"_azureml.evaluation_run": "azure-ai-generative-parent", "_azureml.evaluate_artifacts": - "[{\"path\": \"eval_results.jsonl\", \"type\": \"table\"}]", "isEvaluatorRun": - "true"}}' - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '240' - Content-Type: - - application/json - User-Agent: - - promptflow/0.0.1.dev0 - method: PATCH - uri: https://eastus2.api.azureml.ms/history/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/experimentids/5ea666e7-ae55-4060-a8a3-71e6e03fab9c/runs/b797dd76-c228-4a0c-8ba4-0d41f4dce7ad - response: - body: - string: '{"runNumber": 1717733445, "rootRunId": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", - "createdUtc": "2024-06-07T04:10:43.454+00:00", "createdBy": {"userObjectId": - "00000000-0000-0000-0000-000000000000", "userPuId": "1003BFFDA8A4D0E7", "userIdp": - null, "userAltSecId": null, "userIss": "https://sts.windows.net/00000000-0000-0000-0000-000000000000/", - "userTenantId": "00000000-0000-0000-0000-000000000000", "userName": "Nikolay - Rovinskiy", "upn": "username@microsoft.com"}, "userId": "00000000-0000-0000-0000-000000000000", - "token": "eyJhbGciOiJSUzI1NiIsImtpZCI6Ijk3QTcyRUQyOUNFMjMwMTQwQjVGNzFEOTkyODk2NzBDRDRGNEJFMzUiLCJ0eXAiOiJKV1QifQ.eyJyb2xlIjoiQ29udHJpYnV0b3IiLCJzY29wZSI6Ii9zdWJzY3JpcHRpb25zLzZhNmZmZjAwLTQ0NjQtNGVhYi1hNmIxLTBiNTMzYzcyMDJlMC9yZXNvdXJjZUdyb3Vwcy9uaXJvdmlucy1yZy1lYXN0dXMvcHJvdmlkZXJzL01pY3Jvc29mdC5NYWNoaW5lTGVhcm5pbmdTZXJ2aWNlcy93b3Jrc3BhY2VzL25pcm92aW5zLXdzLWVhc3R1cyIsImFjY291bnRpZCI6IjAwMDAwMDAwLTAwMDAtMDAwMC0wMDAwLTAwMDAwMDAwMDAwMCIsIndvcmtzcGFjZUlkIjoiYTEzOTFhZjMtOWYzOC00OGU4LTg2MmYtZGIzODQ2ZTgxZGQ0IiwicHJvamVjdGlkIjoiMDAwMDAwMDAtMDAwMC0wMDAwLTAwMDAtMDAwMDAwMDAwMDAwIiwiZGlzY292ZXJ5IjoidXJpOi8vZGlzY292ZXJ5dXJpLyIsInRpZCI6IjcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0NyIsIm9pZCI6Ijc4NzViYWE2LTAyOGQtNDljOC1iZDRkLWE3NzJhYTRkYzAxMyIsInB1aWQiOiIxMDAzQkZGREE4QTREMEU3IiwiaXNzIjoiYXp1cmVtbCIsImFwcGlkIjoiTmlrb2xheSBSb3ZpbnNraXkiLCJleHAiOjE3MTk1NTUwNDUsImF1ZCI6ImF6dXJlbWwifQ.kaxuNrkEz4ARPy6HrRPEacX_xSZtHq4yBavn8TzVcP5UgaOCPpMPDWP20kddt2heommexMarDJ22MNzCB_S4x6ZbAfbg8TqBc7eohTq67Dirl2G5hfU0LEbgGxBPsaHSgdwPcVpnV6BGb1EfFctKJk9AegfY9hBNnZjDWJxTwdDxkNnM0PrwZAHqNqEyDPCVe2BGeogIN2n9IVPf80uMkcpTJQr_SyV1KV6uhUS-8975Xj9-wK9-1_XVBOJQcWX3-kkRRl-dfGFk59w7eFJ7eku6h9mm0G7Ft2qzMiR4Zmu3yG8T5cCQYDX8k2OaxIhHqJi1awjux_BvYJuwoXfg5A", - "tokenExpiryTimeUtc": "2024-06-28T06:10:45.8846867+00:00", "error": null, - "warnings": null, "revision": 2, "statusRevision": 0, "runUuid": "9f45dee8-d84a-49b7-8b2c-e93f685c6237", - "parentRunUuid": null, "rootRunUuid": "9f45dee8-d84a-49b7-8b2c-e93f685c6237", - "lastStartTimeUtc": "2024-06-07T04:10:43.454+00:00", "currentComputeTime": - "00:00:00", "computeDuration": null, "effectiveStartTimeUtc": "2024-06-07T04:10:43.454+00:00", - "lastModifiedBy": {"userObjectId": "00000000-0000-0000-0000-000000000000", - "userPuId": "1003BFFDA8A4D0E7", "userIdp": null, "userAltSecId": null, "userIss": - "https://sts.windows.net/00000000-0000-0000-0000-000000000000/", "userTenantId": - "00000000-0000-0000-0000-000000000000", "userName": "Nikolay Rovinskiy", "upn": - "username@microsoft.com"}, "lastModifiedUtc": "2024-06-07T04:10:53.3381884+00:00", - "duration": null, "cancelationReason": null, "currentAttemptId": 1, "runId": - "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", "parentRunId": null, "experimentId": - "00000000-0000-0000-0000-000000000000", "status": "Running", "startTimeUtc": - "2024-06-07T04:10:43.454+00:00", "endTimeUtc": null, "scheduleId": null, "displayName": - "nice_floor_hb1b1h9t", "name": null, "dataContainerId": "dcid.b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", - "description": null, "hidden": false, "runType": null, "runTypeV2": {"orchestrator": - null, "traits": ["mlflow"], "attribution": null, "computeType": null}, "properties": - {"mlflow.artifactUri": "azureml://eastus2.api.azureml.ms/mlflow/v2.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/experiments/0/runs/b797dd76-c228-4a0c-8ba4-0d41f4dce7ad/artifacts", - "_azureml.evaluation_run": "azure-ai-generative-parent", "_azureml.evaluate_artifacts": - "[{\"path\": \"eval_results.jsonl\", \"type\": \"table\"}]", "isEvaluatorRun": - "true"}, "parameters": {}, "actionUris": {}, "scriptName": null, "target": - null, "uniqueChildRunComputeTargets": [], "tags": {"mlflow.user": "promptflow-evals"}, - "settings": {}, "services": {}, "inputDatasets": [], "outputDatasets": [], - "runDefinition": null, "jobSpecification": null, "primaryMetricName": null, - "createdFrom": null, "cancelUri": null, "completeUri": null, "diagnosticsUri": - null, "computeRequest": null, "compute": null, "retainForLifetimeOfWorkspace": - false, "queueingInfo": null, "inputs": null, "outputs": null}' - headers: - connection: - - keep-alive - content-length: - - '4410' - content-type: - - application/json; charset=utf-8 - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - transfer-encoding: - - chunked - vary: - - Accept-Encoding - x-content-type-options: - - nosniff - x-request-time: - - '0.039' - status: - code: 200 - message: OK -- request: - body: '{"run_uuid": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", "key": "f1.f1_score", - "value": 0.0, "timestamp": "1717563256242", "step": 0, "run_id": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad"}' - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '177' - Content-Type: - - application/json - User-Agent: - - promptflow/0.0.1.dev0 - method: POST - uri: https://eastus2.api.azureml.ms/mlflow/v2.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/api/2.0/mlflow/runs/log-metric - response: - body: - string: '{}' - headers: - connection: - - keep-alive - content-length: - - '3' - content-type: - - application/json; charset=utf-8 - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-content-type-options: - - nosniff - x-request-time: - - '0.717' - status: - code: 200 - message: OK -- request: - body: '{"run_uuid": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", "status": "FINISHED", - "end_time": "1717563261483", "run_id": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad"}' - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '151' - Content-Type: - - application/json - User-Agent: - - promptflow/0.0.1.dev0 - method: POST - uri: https://eastus2.api.azureml.ms/mlflow/v2.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/api/2.0/mlflow/runs/update - response: - body: - string: '{"run_info": {"run_uuid": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad", "experiment_id": - "5ea666e7-ae55-4060-a8a3-71e6e03fab9c", "run_name": "nice_floor_hb1b1h9t", - "user_id": "00000000-0000-0000-0000-000000000000", "status": "FINISHED", "start_time": - "1717733443454", "end_time": "1717733458668", "lifecycle_stage": "active", - "run_id": "b797dd76-c228-4a0c-8ba4-0d41f4dce7ad"}}' - headers: - connection: - - keep-alive - content-length: - - '374' - content-type: - - application/json; charset=utf-8 - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - transfer-encoding: - - chunked - vary: - - Accept-Encoding - x-content-type-options: - - nosniff - x-request-time: - - '0.551' - status: - code: 200 - message: OK -version: 1