Skip to content

Commit

Permalink
Fix docstrings (#3449)
Browse files Browse the repository at this point in the history
# Description

Fix the problem with not showing docstrings for the evaluators.
https://microsoft.github.io/promptflow/reference/python-library-reference/promptflow-evals/promptflow.evals.evaluators.html.
See work item 3305596.

# All Promptflow Contribution checklist:
- [x] **The pull request does not introduce [breaking changes].**
- [x] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [x] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [x] Pull request includes test coverage for the included changes.
  • Loading branch information
nick863 committed Jun 25, 2024
1 parent 181952a commit 2eb9cb4
Show file tree
Hide file tree
Showing 18 changed files with 531 additions and 308 deletions.
6 changes: 6 additions & 0 deletions scripts/docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@
myst_heading_anchors = 5


# allow annotation for __call__ methods
autodoc_default_options = {
'special-members': '__call__',
}


def setup(app):
# Add the gallery directive
app.add_directive("gallery-grid", GalleryDirective)
86 changes: 54 additions & 32 deletions src/promptflow-evals/promptflow/evals/evaluators/_chat/_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,39 +22,59 @@


class ChatEvaluator:
"""
Initialize a chat evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
:type eval_last_turn: bool
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:type parallel: bool
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = ChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
}
]
result = chat_eval(conversation=conversation)
**Output format**
.. code-block:: python
{
"evaluation_per_turn": {
"gpt_retrieval": [1.0, 2.0],
"gpt_groundedness": [5.0, 2.0],
"gpt_relevance": [3.0, 5.0],
"gpt_coherence": [1.0, 2.0],
"gpt_fluency": [3.0, 5.0]
}
"gpt_retrieval": 1.5,
"gpt_groundedness": 3.5,
"gpt_relevance": 4.0,
"gpt_coherence": 1.5,
"gpt_fluency": 4.0
}
"""

def __init__(
self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True
):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
:type eval_last_turn: bool
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:type parallel: bool
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = ChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
}
]
result = chat_eval(conversation=conversation)
"""
self._eval_last_turn = eval_last_turn
self._parallel = parallel

Expand All @@ -73,7 +93,8 @@ def __init__(
self._retrieval_chat_evaluator = RetrievalChatEvaluator(model_config)

def __call__(self, *, conversation, **kwargs):
"""Evaluates chat scenario.
"""
Evaluates chat scenario.
:param conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys.
"context" key is optional for assistant's turn and should have "citations" key with list of citations.
Expand Down Expand Up @@ -222,7 +243,8 @@ def _validate_conversation(self, conversation: List[Dict]):
one_based_turn_num = turn_num + 1

if not isinstance(turn, dict):
raise ValueError(f"Each turn in 'conversation' must be a dictionary. Turn number: {one_based_turn_num}")
raise ValueError(
f"Each turn in 'conversation' must be a dictionary. Turn number: {one_based_turn_num}")

if "role" not in turn or "content" not in turn:
raise ValueError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,44 @@


class RetrievalChatEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = RetrievalChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = RetrievalChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
]
result = chat_eval(conversation=conversation)
"""
}
]
result = chat_eval(conversation=conversation)
**Output format**
.. code-block:: python
{
"gpt_retrieval": 3.0
"evaluation_per_turn": {
"gpt_retrieval": {
"score": [1.0, 2.0, 3.0]
}
}
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,31 @@


class CoherenceEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
"""
Initialize a coherence evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
**Usage**
**Usage**
.. code-block:: python
.. code-block:: python
eval_fn = CoherenceEvaluator(model_config)
result = eval_fn(
question="What is the capital of Japan?",
answer="The capital of Japan is Tokyo.")
"""
eval_fn = CoherenceEvaluator(model_config)
result = eval_fn(
question="What is the capital of Japan?",
answer="The capital of Japan is Tokyo.")
**Output format**
.. code-block:: python
{
"gpt_coherence": 1.0
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
Expand All @@ -39,7 +48,9 @@ def __init__(self, model_config: AzureOpenAIModelConfiguration):
self._flow = load_flow(source=prompty_path, model=prompty_model_config)

def __call__(self, *, question: str, answer: str, **kwargs):
"""Evaluate coherence.
"""
Evaluate coherence.
:param question: The question to be evaluated.
:type question: str
:param answer: The answer to be evaluated.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,55 @@


class ContentSafetyEvaluator:
def __init__(self, project_scope: dict, parallel: bool = True, credential=None):
"""
Initialize an evaluator configured to evaluate content safetry metrics for QA scenario.
"""
Initialize a content safety evaluator configured to evaluate content safetry metrics for QA scenario.
:param project_scope: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type project_scope: dict
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:param credential: The credential for connecting to Azure AI project.
:type credential: TokenCredential
:return: A function that evaluates content-safety metrics for "question-answering" scenario.
:rtype: function
:param project_scope: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type project_scope: dict
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:param credential: The credential for connecting to Azure AI project.
:type credential: TokenCredential
:return: A function that evaluates content-safety metrics for "question-answering" scenario.
:rtype: function
**Usage**
**Usage**
.. code-block:: python
.. code-block:: python
project_scope = {
"subscription_id": "<subscription_id>",
"resource_group_name": "<resource_group_name>",
"project_name": "<project_name>",
}
eval_fn = ContentSafetyEvaluator(project_scope)
result = eval_fn(
question="What is the capital of France?",
answer="Paris.",
)
"""
project_scope = {
"subscription_id": "<subscription_id>",
"resource_group_name": "<resource_group_name>",
"project_name": "<project_name>",
}
eval_fn = ContentSafetyEvaluator(project_scope)
result = eval_fn(
question="What is the capital of France?",
answer="Paris.",
)
**Output format**
.. code-block:: python
{
"violence": "Medium",
"violence_score": 5.0,
"violence_reason": "Some reason",
"sexual": "Medium",
"sexual_score": 5.0,
"sexual_reason": "Some reason",
"self_harm": "Medium",
"self_harm_score": 5.0,
"self_harm_reason": "Some reason",
"hate_unfairness": "Medium",
"hate_unfairness_score": 5.0,
"hate_unfairness_reason": "Some reason"
}
"""

def __init__(self, project_scope: dict, parallel: bool = True, credential=None):
self._parallel = parallel
self._evaluators = [
ViolenceEvaluator(project_scope, credential),
Expand All @@ -45,7 +65,8 @@ def __init__(self, project_scope: dict, parallel: bool = True, credential=None):
]

def __call__(self, *, question: str, answer: str, **kwargs):
"""Evaluates content-safety metrics for "question-answering" scenario.
"""
Evaluates content-safety metrics for "question-answering" scenario.
:param question: The question to be evaluated.
:type question: str
Expand Down
Loading

0 comments on commit 2eb9cb4

Please sign in to comment.