microsoft · awharrison-28 · Jul 14, 2023 · Jul 5, 2023 · Jul 5, 2023 · Jul 5, 2023
diff --git a/python/samples/kernel-syntax-examples/logit_bias.py b/python/samples/kernel-syntax-examples/logit_bias.py
@@ -0,0 +1,206 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+import semantic_kernel as sk
+import semantic_kernel.connectors.ai.open_ai as sk_oai
+from semantic_kernel.connectors.ai.chat_request_settings import ChatRequestSettings
+from semantic_kernel.connectors.ai.complete_request_settings import (
+    CompleteRequestSettings,
+)
+
+"""
+Logit bias enables prioritizing certain tokens within a given output.
+To utilize the logit bias function, you will need to know the token ids of the words you are using.
+See the GPT Tokenizer to obtain token ids: https://platform.openai.com/tokenizer
+Read more about logit bias and how to configure output: https://help.openai.com/en/articles/5247780-using-logit-bias-to-define-token-probability
+"""
+
+
+def _config_ban_tokens(settings_type, keys):
+    settings = (
+        ChatRequestSettings() if settings_type == "chat" else CompleteRequestSettings()
+    )
+
+    # Map each token in the keys list to a bias value from -100 (a potential ban) to 100 (exclusive selection)
+    for k in keys:
+        # -100 to potentially ban all tokens in the list
+        settings.token_selection_biases[k] = -100
+    return settings
+
+
+async def chat_request_example(kernel, api_key, org_id):
+    openai_chat_completion = sk_oai.OpenAIChatCompletion(
+        "gpt-3.5-turbo", api_key, org_id
+    )
+    kernel.add_chat_service("chat_service", openai_chat_completion)
+
+    # Spaces and capitalization affect the token ids.
+    # The following is the token ids of basketball related words.
+    keys = [
+        2032,
+        680,
+        9612,
+        26675,
+        3438,
+        42483,
+        21265,
+        6057,
+        11230,
+        1404,
+        2484,
+        12494,
+        35,
+        822,
+        11108,
+    ]
+    banned_words = [
+        "swish",
+        "screen",
+        "score",
+        "dominant",
+        "basketball",
+        "game",
+        "GOAT",
+        "Shooting",
+        "Dribbling",
+    ]
+
+    # Model will try its best to avoid using any of the above words
+    settings = _config_ban_tokens("chat", keys)
+
+    prompt_config = sk.PromptTemplateConfig.from_completion_parameters(
+        max_tokens=2000, temperature=0.7, top_p=0.8
+    )
+    prompt_template = sk.ChatPromptTemplate(
+        "{{$user_input}}", kernel.prompt_template_engine, prompt_config
+    )
+
+    # Setup chat with prompt
+    prompt_template.add_system_message("You are a basketball expert")
+    user_mssg = "I love the LA Lakers, tell me an interesting fact about LeBron James."
+    prompt_template.add_user_message(user_mssg)
+    function_config = sk.SemanticFunctionConfig(prompt_config, prompt_template)
+    kernel.register_semantic_function("ChatBot", "Chat", function_config)
+
+    chat_messages = list()
+    chat_messages.append(("user", user_mssg))
+    answer = await openai_chat_completion.complete_chat_async(chat_messages, settings)
+    chat_messages.append(("assistant", str(answer)))
+
+    user_mssg = "What are his best all-time stats?"
+    chat_messages.append(("user", user_mssg))
+    answer = await openai_chat_completion.complete_chat_async(chat_messages, settings)
+    chat_messages.append(("assistant", str(answer)))
+
+    context_vars = sk.ContextVariables()
+    context_vars["chat_history"] = ""
+    context_vars["chat_bot_ans"] = ""
+    for role, mssg in chat_messages:
+        if role == "user":
+            context_vars["chat_history"] += f"User:> {mssg}\n"
+        elif role == "assistant":
+            context_vars["chat_history"] += f"ChatBot:> {mssg}\n"
+            context_vars["chat_bot_ans"] += f"{mssg}\n"
+
+    kernel.remove_chat_service("chat_service")
+    return context_vars, banned_words
+
+
+async def text_complete_request_example(kernel, api_key, org_id):
+    openai_text_completion = sk_oai.OpenAITextCompletion(
+        "text-davinci-002", api_key, org_id
+    )
+    kernel.add_text_completion_service("text_service", openai_text_completion)
+
+    # Spaces and capitalization affect the token ids.
+    # The following is the token ids of pie related words.
+    keys = [
+        18040,
+        17180,
+        16108,
+        4196,
+        79,
+        931,
+        5116,
+        30089,
+        36724,
+        47,
+        931,
+        5116,
+        431,
+        5171,
+        613,
+        5171,
+        350,
+        721,
+        272,
+        47,
+        721,
+        272,
+    ]
+    banned_words = [
+        "apple",
+        " apple",
+        "Apple",
+        " Apple",
+        "pumpkin",
+        " pumpkin",
+        " Pumpkin",
+        "pecan",
+        " pecan",
+        " Pecan",
+        "Pecan",
+    ]
+
+    # Model will try its best to avoid using any of the above words
+    settings = _config_ban_tokens("complete", keys)
+
+    user_mssg = "The best pie flavor to have in autumn is"
+    answer = await openai_text_completion.complete_async(user_mssg, settings)
+
+    context_vars = sk.ContextVariables()
+    context_vars["chat_history"] = f"User:> {user_mssg}\nChatBot:> {answer}\n"
+    context_vars["chat_bot_ans"] = str(answer)
+
+    kernel.remove_text_completion_service("text_service")
+    return context_vars, banned_words
+
+
+def _check_banned_words(banned_list, actual_list) -> bool:
+    passed = True
+    for word in banned_list:
+        if word in actual_list:
+            print(f'The banned word "{word}" was found in the answer')
+            passed = False
+    return passed
+
+
+def _format_output(context, banned_words) -> None:
+    print(context["chat_history"])
+    chat_bot_ans_words = context["chat_bot_ans"].split()
+    if _check_banned_words(banned_words, chat_bot_ans_words):
+        print("None of the banned words were found in the answer")
+
+
+async def main() -> None:
+    kernel = sk.Kernel()
+    api_key, org_id = sk.openai_settings_from_dot_env()
+
+    print("Chat completion example:")
+    print("------------------------")
+    chat, banned_words = await chat_request_example(kernel, api_key, org_id)
+    _format_output(chat, banned_words)
+
+    print("------------------------")
+
+    print("\nText completion example:")
+    print("------------------------")
+    chat, banned_words = await text_complete_request_example(kernel, api_key, org_id)
+    _format_output(chat, banned_words)
+
+    return
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Dict
 
 if TYPE_CHECKING:
     from semantic_kernel.semantic_functions.prompt_template_config import (
@@ -17,6 +17,7 @@ class ChatRequestSettings:
     frequency_penalty: float = 0.0
     number_of_responses: int = 1
     max_tokens: int = 256
+    token_selection_biases: Dict[int, int] = field(default_factory=dict)
 
     def update_from_completion_config(
         self, completion_config: "PromptTemplateConfig.CompletionConfig"

@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, List
+from typing import TYPE_CHECKING, Dict, List
 
 if TYPE_CHECKING:
     from semantic_kernel.semantic_functions.prompt_template_config import (
@@ -19,6 +19,7 @@ class CompleteRequestSettings:
     stop_sequences: List[str] = field(default_factory=list)
     number_of_responses: int = 1
     logprobs: int = 0
+    token_selection_biases: Dict[int, int] = field(default_factory=dict)
 
     def update_from_completion_config(
         self, completion_config: "PromptTemplateConfig.CompletionConfig"

@@ -108,6 +108,7 @@ async def complete_async(
             frequency_penalty=request_settings.frequency_penalty,
             max_tokens=request_settings.max_tokens,
             number_of_responses=request_settings.number_of_responses,
+            token_selection_biases=request_settings.token_selection_biases,
         )
         response = await self._send_chat_request(
             prompt_to_message, chat_settings, False
@@ -129,6 +130,7 @@ async def complete_stream_async(
             frequency_penalty=request_settings.frequency_penalty,
             max_tokens=request_settings.max_tokens,
             number_of_responses=request_settings.number_of_responses,
+            token_selection_biases=request_settings.token_selection_biases,
         )
         response = await self._send_chat_request(prompt_to_message, chat_settings, True)
 
@@ -208,6 +210,12 @@ async def _send_chat_request(
                 max_tokens=request_settings.max_tokens,
                 n=request_settings.number_of_responses,
                 stream=stream,
+                logit_bias=(
+                    request_settings.token_selection_biases
+                    if request_settings.token_selection_biases is not None
+                    and len(request_settings.token_selection_biases) > 0
+                    else None
+                ),
             )
         except Exception as ex:
             raise AIException(

@@ -142,6 +142,12 @@ async def _send_completion_request(
                     and len(request_settings.stop_sequences) > 0
                     else None
                 ),
+                logit_bias=(
+                    request_settings.token_selection_biases
+                    if request_settings.token_selection_biases is not None
+                    and len(request_settings.token_selection_biases) > 0
+                    else None
+                ),
             )
         except Exception as ex:
             raise AIException(

@@ -154,4 +154,54 @@ async def test_azure_chat_completion_call_with_parameters() -> None:
             frequency_penalty=complete_request_settings.frequency_penalty,
             n=complete_request_settings.number_of_responses,
             stream=False,
+            logit_bias=None,
+        )
+
+
+@pytest.mark.asyncio
+async def test_azure_chat_completion_call_with_parameters_and_Logit_Bias_Defined() -> None:
+    mock_openai = AsyncMock()
+    with patch(
+        "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion.openai",
+        new=mock_openai,
+    ):
+        deployment_name = "test_deployment"
+        endpoint = "https://test-endpoint.com"
+        api_key = "test_api_key"
+        api_type = "azure"
+        api_version = "2023-03-15-preview"
+        logger = Logger("test_logger")
+        prompt = "hello world"
+        messages = [{"role": "user", "content": prompt}]
+        complete_request_settings = CompleteRequestSettings()
+
+        token_bias = {1: -100}
+        complete_request_settings.token_selection_biases = token_bias
+
+        azure_chat_completion = AzureChatCompletion(
+            deployment_name=deployment_name,
+            endpoint=endpoint,
+            api_key=api_key,
+            api_version=api_version,
+            logger=logger,
+        )
+
+        await azure_chat_completion.complete_async(prompt, complete_request_settings)
+
+        mock_openai.ChatCompletion.acreate.assert_called_once_with(
+            engine=deployment_name,
+            api_key=api_key,
+            api_type=api_type,
+            api_base=endpoint,
+            api_version=api_version,
+            organization=None,
+            messages=messages,
+            temperature=complete_request_settings.temperature,
+            max_tokens=complete_request_settings.max_tokens,
+            top_p=complete_request_settings.top_p,
+            presence_penalty=complete_request_settings.presence_penalty,
+            frequency_penalty=complete_request_settings.frequency_penalty,
+            n=complete_request_settings.number_of_responses,
+            stream=False,
+            logit_bias=token_bias,
         )
@@ -153,4 +153,54 @@ async def test_azure_text_completion_call_with_parameters() -> None:
             stop=None,
             n=complete_request_settings.number_of_responses,
             stream=False,
+            logit_bias=None,
+        )
+
+
+@pytest.mark.asyncio
+async def test_azure_text_completion_call_with_parameters_logit_bias_not_none() -> None:
+    mock_openai = AsyncMock()
+    with patch(
+        "semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion.openai",
+        new=mock_openai,
+    ):
+        deployment_name = "test_deployment"
+        endpoint = "https://test-endpoint.com"
+        api_key = "test_api_key"
+        api_type = "azure"
+        api_version = "2023-03-15-preview"
+        logger = Logger("test_logger")
+        prompt = "hello world"
+        complete_request_settings = CompleteRequestSettings()
+
+        token_bias = {200: 100}
+        complete_request_settings.token_selection_biases = token_bias
+
+        azure_text_completion = AzureTextCompletion(
+            deployment_name=deployment_name,
+            endpoint=endpoint,
+            api_key=api_key,
+            api_version=api_version,
+            logger=logger,
+        )
+
+        await azure_text_completion.complete_async(prompt, complete_request_settings)
+
+        mock_openai.Completion.acreate.assert_called_once_with(
+            engine=deployment_name,
+            api_key=api_key,
+            api_type=api_type,
+            api_base=endpoint,
+            api_version=api_version,
+            organization=None,
+            prompt=prompt,
+            temperature=complete_request_settings.temperature,
+            max_tokens=complete_request_settings.max_tokens,
+            top_p=complete_request_settings.top_p,
+            presence_penalty=complete_request_settings.presence_penalty,
+            frequency_penalty=complete_request_settings.frequency_penalty,
+            stop=None,
+            n=complete_request_settings.number_of_responses,
+            stream=False,
+            logit_bias=token_bias,
         )