Merge branch 'main' into users/ninhu/token_cache

microsoft · Jun 18, 2024 · 206099c · 206099c
2 parents 35be3cc + dc84948
commit 206099c
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 10 deletions.
diff --git a/src/promptflow-devkit/promptflow/_proxy/_python_inspector_proxy.py b/src/promptflow-devkit/promptflow/_proxy/_python_inspector_proxy.py
@@ -1,10 +1,11 @@
+import os
 import re
 from pathlib import Path
 from typing import Any, Dict, List
 
 from promptflow._constants import FlowEntryRegex
 from promptflow._core.entry_meta_generator import _generate_flow_meta
-from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT
+from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT, PF_FLOW_META_LOAD_IN_SUBPROCESS
 from promptflow._utils.flow_utils import resolve_python_entry_file
 
 from ._base_inspector_proxy import AbstractInspectorProxy
@@ -35,7 +36,7 @@ def get_entry_meta(
         **kwargs,
     ) -> Dict[str, Any]:
         timeout = kwargs.get("timeout", FLOW_META_JSON_GEN_TIMEOUT)
-        load_in_subprocess = kwargs.get("load_in_subprocess", True)
+        load_in_subprocess = os.environ.get(PF_FLOW_META_LOAD_IN_SUBPROCESS, "True").lower() == "true"
 
         flow_dag = {"entry": entry}
         # generate flow.json only for eager flow for now

diff --git a/src/promptflow-devkit/promptflow/_sdk/_constants.py b/src/promptflow-devkit/promptflow/_sdk/_constants.py
@@ -99,6 +99,7 @@ def _prepare_home_dir() -> Path:
 PF_SERVICE_DEBUG = "PF_SERVICE_DEBUG"
 PF_SYSTEM_METRICS_PREFIX = "__pf__"
 PF_FLOW_ENTRY_IN_TMP = "PF_FLOW_ENTRY_IN_TMP"
+PF_FLOW_META_LOAD_IN_SUBPROCESS = "PF_FLOW_META_LOAD_IN_SUBPROCESS"
 
 LOCAL_MGMT_DB_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite").resolve()
 LOCAL_MGMT_DB_SESSION_ACQUIRE_LOCK_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite.lock").resolve()

diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py
@@ -3,7 +3,7 @@
 # ---------------------------------------------------------
 import os
 
-from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
+from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
 from promptflow._utils.user_agent_utils import ClientUserAgentUtil
 from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
 
@@ -23,10 +23,12 @@ def __enter__(self):
 
         if isinstance(self.client, ProxyClient):
             os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
+            os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false"
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         if isinstance(self.client, CodeClient):
             recover_openai_api()
 
         if isinstance(self.client, ProxyClient):
             os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
+            os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None)
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py
@@ -337,7 +337,46 @@ def evaluate(
             )
 
     """
+    try:
+        return _evaluate(
+            evaluation_name=evaluation_name,
+            target=target,
+            data=data,
+            evaluators=evaluators,
+            evaluator_config=evaluator_config,
+            azure_ai_project=azure_ai_project,
+            output_path=output_path,
+            **kwargs,
+        )
+    except Exception as e:
+        # Handle multiprocess bootstrap error
+        bootstrap_error = (
+            "An attempt has been made to start a new process before the\n        "
+            "current process has finished its bootstrapping phase."
+        )
+        if bootstrap_error in str(e):
+            error_message = (
+                "The evaluation failed due to an error during multiprocess bootstrapping."
+                "Please ensure the evaluate API is properly guarded with the '__main__' block:\n\n"
+                "    if __name__ == '__main__':\n"
+                "        evaluate(...)"
+            )
+            raise RuntimeError(error_message)
 
+        raise e
+
+
+def _evaluate(
+    *,
+    evaluation_name: Optional[str] = None,
+    target: Optional[Callable] = None,
+    data: Optional[str] = None,
+    evaluators: Optional[Dict[str, Callable]] = None,
+    evaluator_config: Optional[Dict[str, Dict[str, str]]] = None,
+    azure_ai_project: Optional[Dict] = None,
+    output_path: Optional[str] = None,
+    **kwargs,
+):
     trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
 
     input_data_df = _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name)

diff --git a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py
@@ -146,7 +146,7 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file,
         assert 0 <= metrics.get("content_safety.hate_unfairness_defect_rate") <= 1
 
     @pytest.mark.parametrize(
-        "use_thread_pool,function,column",
+        "use_pf_client,function,column",
         [
             (True, answer_evaluator, "length"),
             (False, answer_evaluator, "length"),
@@ -156,12 +156,12 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file,
             (False, answer_evaluator_int_dict, "42"),
         ],
     )
-    def test_evaluate_python_function(self, data_file, use_thread_pool, function, column):
+    def test_evaluate_python_function(self, data_file, use_pf_client, function, column):
         # data
         input_data = pd.read_json(data_file, lines=True)
 
         # run the evaluation
-        result = evaluate(data=data_file, evaluators={"answer": function}, _use_thread_pool=use_thread_pool)
+        result = evaluate(data=data_file, evaluators={"answer": function}, _use_pf_client=use_pf_client)
 
         row_result_df = pd.DataFrame(result["rows"])
         metrics = result["metrics"]
@@ -422,7 +422,6 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso
                 "answer_length": AnswerLength(return_json=return_json, aggregate_return_json=aggregate_return_json),
                 "f1_score": F1ScoreEvaluator(),
             },
-            _use_thread_pool=False,
         )
         assert result is not None
         assert "metrics" in result

diff --git a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py
@@ -1,6 +1,7 @@
 import json
 import os
 import pathlib
+from unittest.mock import patch
 
 import numpy as np
 import pandas as pd
@@ -338,14 +339,14 @@ def test_renaming_column(self):
         df_actuals = _rename_columns_conditionally(df)
         assert_frame_equal(df_actuals.sort_index(axis=1), df_expected.sort_index(axis=1))
 
-    @pytest.mark.parametrize("use_thread_pool", [True, False])
-    def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_thread_pool):
+    @pytest.mark.parametrize("use_pf_client", [True, False])
+    def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_pf_client):
         output_path = os.path.join(tmpdir, "eval_test_results.jsonl")
         result = evaluate(
             data=evaluate_test_data_jsonl_file,
             evaluators={"g": F1ScoreEvaluator()},
             output_path=output_path,
-            _use_thread_pool=use_thread_pool,
+            _use_pf_client=use_pf_client,
         )
 
         assert result is not None
@@ -381,3 +382,19 @@ def test_evaluate_with_errors(self):
         expected.at[2, "outputs.yeti.result"] = np.nan
         expected.at[3, "outputs.yeti.result"] = np.nan
         assert_frame_equal(expected, result_df)
+
+    @patch("promptflow.evals.evaluate._evaluate._evaluate")
+    def test_evaluate_main_entry_guard(self, mock_evaluate, evaluate_test_data_jsonl_file):
+        err_msg = (
+            "An attempt has been made to start a new process before the\n        "
+            "current process has finished its bootstrapping phase."
+        )
+        mock_evaluate.side_effect = RuntimeError(err_msg)
+
+        with pytest.raises(RuntimeError) as exc_info:
+            evaluate(
+                data=evaluate_test_data_jsonl_file,
+                evaluators={"f1_score": F1ScoreEvaluator()},
+            )
+
+        assert "Please ensure the evaluate API is properly guarded with the '__main__' block" in exc_info.value.args[0]