Skip to content

Commit

Permalink
Merge branch 'main' into users/ninhu/token_cache
Browse files Browse the repository at this point in the history
  • Loading branch information
ninghu committed Jun 18, 2024
2 parents 35be3cc + dc84948 commit 206099c
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os
import re
from pathlib import Path
from typing import Any, Dict, List

from promptflow._constants import FlowEntryRegex
from promptflow._core.entry_meta_generator import _generate_flow_meta
from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT
from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT, PF_FLOW_META_LOAD_IN_SUBPROCESS
from promptflow._utils.flow_utils import resolve_python_entry_file

from ._base_inspector_proxy import AbstractInspectorProxy
Expand Down Expand Up @@ -35,7 +36,7 @@ def get_entry_meta(
**kwargs,
) -> Dict[str, Any]:
timeout = kwargs.get("timeout", FLOW_META_JSON_GEN_TIMEOUT)
load_in_subprocess = kwargs.get("load_in_subprocess", True)
load_in_subprocess = os.environ.get(PF_FLOW_META_LOAD_IN_SUBPROCESS, "True").lower() == "true"

flow_dag = {"entry": entry}
# generate flow.json only for eager flow for now
Expand Down
1 change: 1 addition & 0 deletions src/promptflow-devkit/promptflow/_sdk/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def _prepare_home_dir() -> Path:
PF_SERVICE_DEBUG = "PF_SERVICE_DEBUG"
PF_SYSTEM_METRICS_PREFIX = "__pf__"
PF_FLOW_ENTRY_IN_TMP = "PF_FLOW_ENTRY_IN_TMP"
PF_FLOW_META_LOAD_IN_SUBPROCESS = "PF_FLOW_META_LOAD_IN_SUBPROCESS"

LOCAL_MGMT_DB_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite").resolve()
LOCAL_MGMT_DB_SESSION_ACQUIRE_LOCK_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite.lock").resolve()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# ---------------------------------------------------------
import os

from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
from promptflow._utils.user_agent_utils import ClientUserAgentUtil
from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api

Expand All @@ -23,10 +23,12 @@ def __enter__(self):

if isinstance(self.client, ProxyClient):
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false"

def __exit__(self, exc_type, exc_val, exc_tb):
if isinstance(self.client, CodeClient):
recover_openai_api()

if isinstance(self.client, ProxyClient):
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None)
39 changes: 39 additions & 0 deletions src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,46 @@ def evaluate(
)
"""
try:
return _evaluate(
evaluation_name=evaluation_name,
target=target,
data=data,
evaluators=evaluators,
evaluator_config=evaluator_config,
azure_ai_project=azure_ai_project,
output_path=output_path,
**kwargs,
)
except Exception as e:
# Handle multiprocess bootstrap error
bootstrap_error = (
"An attempt has been made to start a new process before the\n "
"current process has finished its bootstrapping phase."
)
if bootstrap_error in str(e):
error_message = (
"The evaluation failed due to an error during multiprocess bootstrapping."
"Please ensure the evaluate API is properly guarded with the '__main__' block:\n\n"
" if __name__ == '__main__':\n"
" evaluate(...)"
)
raise RuntimeError(error_message)

raise e


def _evaluate(
*,
evaluation_name: Optional[str] = None,
target: Optional[Callable] = None,
data: Optional[str] = None,
evaluators: Optional[Dict[str, Callable]] = None,
evaluator_config: Optional[Dict[str, Dict[str, str]]] = None,
azure_ai_project: Optional[Dict] = None,
output_path: Optional[str] = None,
**kwargs,
):
trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None

input_data_df = _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name)
Expand Down
7 changes: 3 additions & 4 deletions src/promptflow-evals/tests/evals/e2etests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file,
assert 0 <= metrics.get("content_safety.hate_unfairness_defect_rate") <= 1

@pytest.mark.parametrize(
"use_thread_pool,function,column",
"use_pf_client,function,column",
[
(True, answer_evaluator, "length"),
(False, answer_evaluator, "length"),
Expand All @@ -156,12 +156,12 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file,
(False, answer_evaluator_int_dict, "42"),
],
)
def test_evaluate_python_function(self, data_file, use_thread_pool, function, column):
def test_evaluate_python_function(self, data_file, use_pf_client, function, column):
# data
input_data = pd.read_json(data_file, lines=True)

# run the evaluation
result = evaluate(data=data_file, evaluators={"answer": function}, _use_thread_pool=use_thread_pool)
result = evaluate(data=data_file, evaluators={"answer": function}, _use_pf_client=use_pf_client)

row_result_df = pd.DataFrame(result["rows"])
metrics = result["metrics"]
Expand Down Expand Up @@ -422,7 +422,6 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso
"answer_length": AnswerLength(return_json=return_json, aggregate_return_json=aggregate_return_json),
"f1_score": F1ScoreEvaluator(),
},
_use_thread_pool=False,
)
assert result is not None
assert "metrics" in result
Expand Down
23 changes: 20 additions & 3 deletions src/promptflow-evals/tests/evals/unittests/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import pathlib
from unittest.mock import patch

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -338,14 +339,14 @@ def test_renaming_column(self):
df_actuals = _rename_columns_conditionally(df)
assert_frame_equal(df_actuals.sort_index(axis=1), df_expected.sort_index(axis=1))

@pytest.mark.parametrize("use_thread_pool", [True, False])
def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_thread_pool):
@pytest.mark.parametrize("use_pf_client", [True, False])
def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_pf_client):
output_path = os.path.join(tmpdir, "eval_test_results.jsonl")
result = evaluate(
data=evaluate_test_data_jsonl_file,
evaluators={"g": F1ScoreEvaluator()},
output_path=output_path,
_use_thread_pool=use_thread_pool,
_use_pf_client=use_pf_client,
)

assert result is not None
Expand Down Expand Up @@ -381,3 +382,19 @@ def test_evaluate_with_errors(self):
expected.at[2, "outputs.yeti.result"] = np.nan
expected.at[3, "outputs.yeti.result"] = np.nan
assert_frame_equal(expected, result_df)

@patch("promptflow.evals.evaluate._evaluate._evaluate")
def test_evaluate_main_entry_guard(self, mock_evaluate, evaluate_test_data_jsonl_file):
err_msg = (
"An attempt has been made to start a new process before the\n "
"current process has finished its bootstrapping phase."
)
mock_evaluate.side_effect = RuntimeError(err_msg)

with pytest.raises(RuntimeError) as exc_info:
evaluate(
data=evaluate_test_data_jsonl_file,
evaluators={"f1_score": F1ScoreEvaluator()},
)

assert "Please ensure the evaluate API is properly guarded with the '__main__' block" in exc_info.value.args[0]

0 comments on commit 206099c

Please sign in to comment.